예제 #1
0
    def test_requires(self, connect_s3_mock):
        s3_conn_mock = connect_s3_mock.return_value
        bucket_mock = s3_conn_mock.get_bucket.return_value

        class FakeKey(object):
            """A test double of the structure returned by boto when listing keys in an S3 bucket."""
            def __init__(self, path):
                self.key = path
                self.size = 10

        bucket_mock.list.return_value = [FakeKey(path) for path in self.SAMPLE_KEY_PATHS]

        task = PathSelectionByDateIntervalTask(
            source=self.SOURCE,
            interval=Month.parse('2014-03'),
            pattern=[r'.*?FakeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz'],
            expand_interval=datetime.timedelta(0),
        )

        expected_paths = [
            'FakeServerGroup/tracking.log-20140318.gz',
            'FakeServerGroup/tracking.log-20140319-1395256622.gz',
        ]

        self.assertItemsEqual(
            task.requires(),
            [UncheckedExternalURL(source + path) for path in expected_paths for source in self.SOURCE]
        )
예제 #2
0
    def test_requires(self, connect_s3_mock):
        s3_conn_mock = connect_s3_mock
        bucket_mock = s3_conn_mock.get_bucket.return_value

        class FakeKey(object):
            """A test double of the structure returned by boto when listing keys in an S3 bucket."""
            def __init__(self, path):
                self.key = path
                self.size = 10

        bucket_mock.list.return_value = [FakeKey(path) for path in self.SAMPLE_KEY_PATHS]

        task = PathSelectionByDateIntervalTask(
            source=self.SOURCE,
            interval=Month.parse('2014-03'),
            pattern=[r'.*?FakeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz'],
            expand_interval=datetime.timedelta(0),
        )

        expected_paths = [
            'FakeServerGroup/tracking.log-20140318.gz',
            'FakeServerGroup/tracking.log-20140319-1395256622.gz',
        ]

        self.assertItemsEqual(
            task.requires(),
            [UncheckedExternalURL(source + path) for path in expected_paths for source in self.SOURCE]
        )
예제 #3
0
 def test_timestamped_urls(self):
     task = PathSelectionByDateIntervalTask(
         source=self.SOURCE,
         interval=Month.parse('2014-03'),
         pattern=[r'.*?FakeServerGroup/tracking.log-.*-(?P<timestamp>\d{10})\.gz'],
         expand_interval=datetime.timedelta(0),
     )
     self.assert_only_matched(task, [
         'FakeServerGroup/tracking.log-20140319-1395256622.gz',
     ])
     task = PathSelectionByDateIntervalTask(
         source=self.SOURCE,
         interval=Month.parse('2014-03'),
         pattern=[r'.*?FakeServerGroup/tracking.log-.*-(?P<timestamp>\d{10})\.gz'],
         expand_interval=datetime.timedelta(1),
     )
     self.assert_only_matched(task, [
         'FakeServerGroup/tracking.log-20140319-1395256622.gz',
         'FakeServerGroup/tracking.log-20140401-1396379384.gz',
     ])
예제 #4
0
    def test_edge_urls(self):
        task = PathSelectionByDateIntervalTask(
            source=self.SOURCE,
            interval=Month.parse('2014-03'),
            pattern=[r'.*?FakeEdgeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz'],
            expand_interval=datetime.timedelta(0),
        )

        self.assert_only_matched(task, [
            'FakeEdgeServerGroup/tracking.log-20140324-1395670621.gz',
        ])
예제 #5
0
 def test_timestamped_urls(self):
     task = PathSelectionByDateIntervalTask(
         source=self.SOURCE,
         interval=Month.parse('2014-03'),
         pattern=[r'.*?FakeServerGroup/tracking.log-.*-(?P<timestamp>\d{10})\.gz'],
         expand_interval=datetime.timedelta(0),
     )
     self.assert_only_matched(task, [
         'FakeServerGroup/tracking.log-20140319-1395256622.gz',
     ])
     task = PathSelectionByDateIntervalTask(
         source=self.SOURCE,
         interval=Month.parse('2014-03'),
         pattern=[r'.*?FakeServerGroup/tracking.log-.*-(?P<timestamp>\d{10})\.gz'],
         expand_interval=datetime.timedelta(1),
     )
     self.assert_only_matched(task, [
         'FakeServerGroup/tracking.log-20140319-1395256622.gz',
         'FakeServerGroup/tracking.log-20140401-1396379384.gz',
     ])
예제 #6
0
    def test_edge_urls(self):
        task = PathSelectionByDateIntervalTask(
            source=self.SOURCE,
            interval=Month.parse('2014-03'),
            pattern=[r'.*?FakeEdgeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz'],
            expand_interval=datetime.timedelta(0),
        )

        self.assert_only_matched(task, [
            'FakeEdgeServerGroup/tracking.log-20140324-1395670621.gz',
        ])
예제 #7
0
    def test_filtering_of_urls(self):
        task = EventLogSelectionTask(
            source=self.SOURCE,
            interval=Month.parse('2014-03'),
            pattern=r'.*?FakeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz',
            expand_interval=datetime.timedelta(0),
        )

        self.assert_only_matched(task, [
            'FakeServerGroup/tracking.log-20140318.gz',
            'FakeServerGroup/tracking.log-20140319-1395256622.gz',
        ])
    def test_filtering_of_urls(self):
        task = EventLogSelectionTask(
            source=self.SOURCE,
            interval=Month.parse('2014-03'),
            pattern=[r'.*?FakeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz'],
            expand_interval=datetime.timedelta(0),
        )

        self.assert_only_matched(task, [
            'FakeServerGroup/tracking.log-20140318.gz',
            'FakeServerGroup/tracking.log-20140319-1395256622.gz',
        ])
예제 #9
0
    def test_expanded_interval(self):
        task = PathSelectionByDateIntervalTask(
            source=self.SOURCE,
            interval=Month.parse('2014-03'),
            pattern=[r'.*?FakeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz'],
            expand_interval=datetime.timedelta(1),
        )

        self.assert_only_matched(task, [
            'FakeServerGroup/tracking.log-20140228.gz',
            'FakeServerGroup/tracking.log-20140318.gz',
            'FakeServerGroup/tracking.log-20140319-1395256622.gz',
            'FakeServerGroup/tracking.log-20140401-1395254574.gz',
        ])
    def test_expanded_interval(self):
        task = PathSelectionByDateIntervalTask(
            source=self.SOURCE,
            interval=Month.parse('2014-03'),
            pattern=[r'.*?FakeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz'],
            expand_interval=datetime.timedelta(1),
        )

        self.assert_only_matched(task, [
            'FakeServerGroup/tracking.log-20140228.gz',
            'FakeServerGroup/tracking.log-20140318.gz',
            'FakeServerGroup/tracking.log-20140319-1395256622.gz',
            'FakeServerGroup/tracking.log-20140401-1395254574.gz',
        ])
예제 #11
0
    def test_multiple_filtering_of_urls(self):
        task = PathSelectionByDateIntervalTask(
            source=self.SOURCE,
            interval=Month.parse('2014-03'),
            pattern=[
                r'.*?FakeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz',
                r'.*?FakeEdgeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz',
                r'.*tracking_\d{3,5}\.log\.gz$',
            ],
            expand_interval=datetime.timedelta(0),
        )

        self.assert_only_matched(task, [
            'FakeServerGroup/tracking.log-20140318.gz',
            'FakeServerGroup/tracking.log-20140319-1395256622.gz',
            'FakeEdgeServerGroup/tracking.log-20140324-1395670621.gz',
            'FakeOldServerGroup3/tracking_14602.log.gz',
        ])
예제 #12
0
    def test_multiple_filtering_of_urls(self):
        task = PathSelectionByDateIntervalTask(
            source=self.SOURCE,
            interval=Month.parse('2014-03'),
            pattern=[
                r'.*?FakeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz',
                r'.*?FakeEdgeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz',
                r'.*tracking_\d{3,5}\.log\.gz$',
            ],
            expand_interval=datetime.timedelta(0),
        )

        self.assert_only_matched(task, [
            'FakeServerGroup/tracking.log-20140318.gz',
            'FakeServerGroup/tracking.log-20140319-1395256622.gz',
            'FakeEdgeServerGroup/tracking.log-20140324-1395670621.gz',
            'FakeOldServerGroup3/tracking_14602.log.gz',
        ])
예제 #13
0
 def test_pattern_from_config(self):
     task = PathSelectionByDateIntervalTask(
         interval=Month.parse('2014-03')
     )
     self.assertEquals(task.pattern, ('foobar',))
예제 #14
0
 def test_pattern_override(self):
     task = PathSelectionByDateIntervalTask(
         interval=Month.parse('2014-03'),
         pattern=['baz']
     )
     self.assertEquals(task.pattern, ('baz',))
예제 #15
0
 def test_pattern_from_config(self):
     task = PathSelectionByDateIntervalTask(
         interval=Month.parse('2014-03')
     )
     self.assertEquals(task.pattern, ('foobar',))
 def test_pattern_from_config(self):
     task = EventLogSelectionTask(interval=Month.parse('2014-03'))
     self.assertEquals(task.pattern, ('foobar', ))
 def test_pattern_override(self):
     task = EventLogSelectionTask(interval=Month.parse('2014-03'),
                                  pattern=['baz'])
     self.assertEquals(task.pattern, ('baz', ))
예제 #18
0
 def test_default_pattern(self):
     task = PathSelectionByDateIntervalTask(interval=Month.parse('2014-03'))
     self.assertEquals(task.pattern, (
         r'.*tracking.log-(?P<date>\d{8}).*\.gz',
         r'.*tracking.notalog-(?P<date>\d{8}).*\.gz',
     ))
예제 #19
0
 def test_default_source(self):
     task = PathSelectionByDateIntervalTask(interval=Month.parse('2014-03'))
     self.assertEquals(task.source, ('s3://fake/input/', 's3://fake/input2/'))
예제 #20
0
 def test_pattern_from_config(self):
     task = EventLogSelectionTask(
         interval=Month.parse('2014-03')
     )
     self.assertEquals(task.pattern, 'foobar')
예제 #21
0
 def test_pattern_override(self):
     task = EventLogSelectionTask(
         interval=Month.parse('2014-03'),
         pattern='baz'
     )
     self.assertEquals(task.pattern, 'baz')
예제 #22
0
 def test_default_source(self):
     task = PathSelectionByDateIntervalTask(interval=Month.parse('2014-03'))
     self.assertEquals(task.source, ('s3://fake/input/', 's3://fake/input2/'))
예제 #23
0
 def test_default_pattern(self):
     task = PathSelectionByDateIntervalTask(interval=Month.parse('2014-03'))
     self.assertEquals(task.pattern, (
         r'.*tracking.log-(?P<date>\\d{8}).*\\.gz',
         r'.*tracking.notalog-(?P<date>\\d{8}).*\\.gz',
     ))
예제 #24
0
 def test_pattern_override(self):
     task = PathSelectionByDateIntervalTask(
         interval=Month.parse('2014-03'),
         pattern=['baz']
     )
     self.assertEquals(task.pattern, ('baz',))