def test_requires(self, connect_s3_mock): s3_conn_mock = connect_s3_mock.return_value bucket_mock = s3_conn_mock.get_bucket.return_value class FakeKey(object): """A test double of the structure returned by boto when listing keys in an S3 bucket.""" def __init__(self, path): self.key = path self.size = 10 bucket_mock.list.return_value = [FakeKey(path) for path in self.SAMPLE_KEY_PATHS] task = EventLogSelectionTask( source=self.SOURCE, interval=Month.parse('2014-03'), pattern=r'.*?FakeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz', expand_interval=datetime.timedelta(0), ) expected_paths = [ 'FakeServerGroup/tracking.log-20140318.gz', 'FakeServerGroup/tracking.log-20140319-1395256622.gz', ] self.assertItemsEqual( task.requires(), [UncheckedExternalURL(source + path) for path in expected_paths for source in self.SOURCE] )
def test_requires(self, connect_s3_mock): s3_conn_mock = connect_s3_mock.return_value bucket_mock = s3_conn_mock.get_bucket.return_value class FakeKey(object): """A test double of the structure returned by boto when listing keys in an S3 bucket.""" def __init__(self, path): self.key = path self.size = 10 bucket_mock.list.return_value = [ FakeKey(path) for path in self.SAMPLE_KEY_PATHS ] task = EventLogSelectionTask( source=self.SOURCE, interval=Month.parse('2014-03'), pattern=[r'.*?FakeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz'], expand_interval=datetime.timedelta(0), ) expected_paths = [ 'FakeServerGroup/tracking.log-20140318.gz', 'FakeServerGroup/tracking.log-20140319-1395256622.gz', ] self.assertItemsEqual(task.requires(), [ UncheckedExternalURL(source + path) for path in expected_paths for source in self.SOURCE ])
def test_filtering_of_urls(self): task = EventLogSelectionTask( source=self.SOURCE, interval=Month.parse('2014-03'), pattern=[r'.*?FakeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz'], expand_interval=datetime.timedelta(0), ) self.assert_only_matched(task, [ 'FakeServerGroup/tracking.log-20140318.gz', 'FakeServerGroup/tracking.log-20140319-1395256622.gz', ])
def test_multiple_filtering_of_urls(self): task = EventLogSelectionTask( source=self.SOURCE, interval=Month.parse('2014-03'), pattern=[ r'.*?FakeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz', r'.*?FakeEdgeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz', r'.*tracking_\d{3,5}\.log\.gz$', ], expand_interval=datetime.timedelta(0), ) self.assert_only_matched(task, [ 'FakeServerGroup/tracking.log-20140318.gz', 'FakeServerGroup/tracking.log-20140319-1395256622.gz', 'FakeEdgeServerGroup/tracking.log-20140324-1395670621.gz', 'FakeOldServerGroup3/tracking_14602.log.gz', ])
def test_default_pattern(self): task = EventLogSelectionTask(interval=Month.parse('2014-03')) self.assertEquals(task.pattern, ( r'.*tracking.log-(?P<date>\d{8}).*\.gz', r'.*tracking.notalog-(?P<date>\d{8}).*\.gz', ))
def test_default_source(self): task = EventLogSelectionTask(interval=Month.parse('2014-03')) self.assertEquals(task.source, ('s3://fake/input/', 's3://fake/input2/'))
def test_pattern_override(self): task = EventLogSelectionTask(interval=Month.parse('2014-03'), pattern=['baz']) self.assertEquals(task.pattern, ('baz', ))
def test_pattern_from_config(self): task = EventLogSelectionTask(interval=Month.parse('2014-03')) self.assertEquals(task.pattern, ('foobar', ))