def test_requires(self, connect_s3_mock): s3_conn_mock = connect_s3_mock.return_value bucket_mock = s3_conn_mock.get_bucket.return_value class FakeKey(object): """A test double of the structure returned by boto when listing keys in an S3 bucket.""" def __init__(self, path): self.key = path self.size = 10 bucket_mock.list.return_value = [FakeKey(path) for path in self.SAMPLE_KEY_PATHS] task = PathSelectionByDateIntervalTask( source=self.SOURCE, interval=Month.parse('2014-03'), pattern=[r'.*?FakeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz'], expand_interval=datetime.timedelta(0), ) expected_paths = [ 'FakeServerGroup/tracking.log-20140318.gz', 'FakeServerGroup/tracking.log-20140319-1395256622.gz', ] self.assertItemsEqual( task.requires(), [UncheckedExternalURL(source + path) for path in expected_paths for source in self.SOURCE] )
def test_requires(self, connect_s3_mock): s3_conn_mock = connect_s3_mock bucket_mock = s3_conn_mock.get_bucket.return_value class FakeKey(object): """A test double of the structure returned by boto when listing keys in an S3 bucket.""" def __init__(self, path): self.key = path self.size = 10 bucket_mock.list.return_value = [FakeKey(path) for path in self.SAMPLE_KEY_PATHS] task = PathSelectionByDateIntervalTask( source=self.SOURCE, interval=Month.parse('2014-03'), pattern=[r'.*?FakeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz'], expand_interval=datetime.timedelta(0), ) expected_paths = [ 'FakeServerGroup/tracking.log-20140318.gz', 'FakeServerGroup/tracking.log-20140319-1395256622.gz', ] self.assertItemsEqual( task.requires(), [UncheckedExternalURL(source + path) for path in expected_paths for source in self.SOURCE] )
def test_timestamped_urls(self): task = PathSelectionByDateIntervalTask( source=self.SOURCE, interval=Month.parse('2014-03'), pattern=[r'.*?FakeServerGroup/tracking.log-.*-(?P<timestamp>\d{10})\.gz'], expand_interval=datetime.timedelta(0), ) self.assert_only_matched(task, [ 'FakeServerGroup/tracking.log-20140319-1395256622.gz', ]) task = PathSelectionByDateIntervalTask( source=self.SOURCE, interval=Month.parse('2014-03'), pattern=[r'.*?FakeServerGroup/tracking.log-.*-(?P<timestamp>\d{10})\.gz'], expand_interval=datetime.timedelta(1), ) self.assert_only_matched(task, [ 'FakeServerGroup/tracking.log-20140319-1395256622.gz', 'FakeServerGroup/tracking.log-20140401-1396379384.gz', ])
def test_edge_urls(self): task = PathSelectionByDateIntervalTask( source=self.SOURCE, interval=Month.parse('2014-03'), pattern=[r'.*?FakeEdgeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz'], expand_interval=datetime.timedelta(0), ) self.assert_only_matched(task, [ 'FakeEdgeServerGroup/tracking.log-20140324-1395670621.gz', ])
def test_filtering_of_urls(self): task = EventLogSelectionTask( source=self.SOURCE, interval=Month.parse('2014-03'), pattern=r'.*?FakeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz', expand_interval=datetime.timedelta(0), ) self.assert_only_matched(task, [ 'FakeServerGroup/tracking.log-20140318.gz', 'FakeServerGroup/tracking.log-20140319-1395256622.gz', ])
def test_filtering_of_urls(self): task = EventLogSelectionTask( source=self.SOURCE, interval=Month.parse('2014-03'), pattern=[r'.*?FakeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz'], expand_interval=datetime.timedelta(0), ) self.assert_only_matched(task, [ 'FakeServerGroup/tracking.log-20140318.gz', 'FakeServerGroup/tracking.log-20140319-1395256622.gz', ])
def test_expanded_interval(self): task = PathSelectionByDateIntervalTask( source=self.SOURCE, interval=Month.parse('2014-03'), pattern=[r'.*?FakeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz'], expand_interval=datetime.timedelta(1), ) self.assert_only_matched(task, [ 'FakeServerGroup/tracking.log-20140228.gz', 'FakeServerGroup/tracking.log-20140318.gz', 'FakeServerGroup/tracking.log-20140319-1395256622.gz', 'FakeServerGroup/tracking.log-20140401-1395254574.gz', ])
def test_multiple_filtering_of_urls(self): task = PathSelectionByDateIntervalTask( source=self.SOURCE, interval=Month.parse('2014-03'), pattern=[ r'.*?FakeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz', r'.*?FakeEdgeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz', r'.*tracking_\d{3,5}\.log\.gz$', ], expand_interval=datetime.timedelta(0), ) self.assert_only_matched(task, [ 'FakeServerGroup/tracking.log-20140318.gz', 'FakeServerGroup/tracking.log-20140319-1395256622.gz', 'FakeEdgeServerGroup/tracking.log-20140324-1395670621.gz', 'FakeOldServerGroup3/tracking_14602.log.gz', ])
def test_pattern_from_config(self): task = PathSelectionByDateIntervalTask( interval=Month.parse('2014-03') ) self.assertEquals(task.pattern, ('foobar',))
def test_pattern_override(self): task = PathSelectionByDateIntervalTask( interval=Month.parse('2014-03'), pattern=['baz'] ) self.assertEquals(task.pattern, ('baz',))
def test_pattern_from_config(self): task = EventLogSelectionTask(interval=Month.parse('2014-03')) self.assertEquals(task.pattern, ('foobar', ))
def test_pattern_override(self): task = EventLogSelectionTask(interval=Month.parse('2014-03'), pattern=['baz']) self.assertEquals(task.pattern, ('baz', ))
def test_default_pattern(self): task = PathSelectionByDateIntervalTask(interval=Month.parse('2014-03')) self.assertEquals(task.pattern, ( r'.*tracking.log-(?P<date>\d{8}).*\.gz', r'.*tracking.notalog-(?P<date>\d{8}).*\.gz', ))
def test_default_source(self): task = PathSelectionByDateIntervalTask(interval=Month.parse('2014-03')) self.assertEquals(task.source, ('s3://fake/input/', 's3://fake/input2/'))
def test_pattern_from_config(self): task = EventLogSelectionTask( interval=Month.parse('2014-03') ) self.assertEquals(task.pattern, 'foobar')
def test_pattern_override(self): task = EventLogSelectionTask( interval=Month.parse('2014-03'), pattern='baz' ) self.assertEquals(task.pattern, 'baz')
def test_default_pattern(self): task = PathSelectionByDateIntervalTask(interval=Month.parse('2014-03')) self.assertEquals(task.pattern, ( r'.*tracking.log-(?P<date>\\d{8}).*\\.gz', r'.*tracking.notalog-(?P<date>\\d{8}).*\\.gz', ))