Esempio n. 1
0
    def test_requires(self, connect_s3_mock):
        s3_conn_mock = connect_s3_mock.return_value
        bucket_mock = s3_conn_mock.get_bucket.return_value

        class FakeKey(object):
            """A test double of the structure returned by boto when listing keys in an S3 bucket."""
            def __init__(self, path):
                self.key = path
                self.size = 10

        bucket_mock.list.return_value = [FakeKey(path) for path in self.SAMPLE_KEY_PATHS]

        task = PathSelectionByDateIntervalTask(
            source=self.SOURCE,
            interval=Month.parse('2014-03'),
            pattern=[r'.*?FakeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz'],
            expand_interval=datetime.timedelta(0),
        )

        expected_paths = [
            'FakeServerGroup/tracking.log-20140318.gz',
            'FakeServerGroup/tracking.log-20140319-1395256622.gz',
        ]

        self.assertItemsEqual(
            task.requires(),
            [UncheckedExternalURL(source + path) for path in expected_paths for source in self.SOURCE]
        )
    def test_requires(self, connect_s3_mock):
        s3_conn_mock = connect_s3_mock
        bucket_mock = s3_conn_mock.get_bucket.return_value

        class FakeKey(object):
            """A test double of the structure returned by boto when listing keys in an S3 bucket."""
            def __init__(self, path):
                self.key = path
                self.size = 10

        bucket_mock.list.return_value = [FakeKey(path) for path in self.SAMPLE_KEY_PATHS]

        task = PathSelectionByDateIntervalTask(
            source=self.SOURCE,
            interval=Month.parse('2014-03'),
            pattern=[r'.*?FakeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz'],
            expand_interval=datetime.timedelta(0),
        )

        expected_paths = [
            'FakeServerGroup/tracking.log-20140318.gz',
            'FakeServerGroup/tracking.log-20140319-1395256622.gz',
        ]

        self.assertItemsEqual(
            task.requires(),
            [UncheckedExternalURL(source + path) for path in expected_paths for source in self.SOURCE]
        )
Esempio n. 3
0
 def test_timestamped_urls(self):
     task = PathSelectionByDateIntervalTask(
         source=self.SOURCE,
         interval=Month.parse('2014-03'),
         pattern=[r'.*?FakeServerGroup/tracking.log-.*-(?P<timestamp>\d{10})\.gz'],
         expand_interval=datetime.timedelta(0),
     )
     self.assert_only_matched(task, [
         'FakeServerGroup/tracking.log-20140319-1395256622.gz',
     ])
     task = PathSelectionByDateIntervalTask(
         source=self.SOURCE,
         interval=Month.parse('2014-03'),
         pattern=[r'.*?FakeServerGroup/tracking.log-.*-(?P<timestamp>\d{10})\.gz'],
         expand_interval=datetime.timedelta(1),
     )
     self.assert_only_matched(task, [
         'FakeServerGroup/tracking.log-20140319-1395256622.gz',
         'FakeServerGroup/tracking.log-20140401-1396379384.gz',
     ])
    def test_edge_urls(self):
        task = PathSelectionByDateIntervalTask(
            source=self.SOURCE,
            interval=Month.parse('2014-03'),
            pattern=[r'.*?FakeEdgeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz'],
            expand_interval=datetime.timedelta(0),
        )

        self.assert_only_matched(task, [
            'FakeEdgeServerGroup/tracking.log-20140324-1395670621.gz',
        ])
 def test_timestamped_urls(self):
     task = PathSelectionByDateIntervalTask(
         source=self.SOURCE,
         interval=Month.parse('2014-03'),
         pattern=[r'.*?FakeServerGroup/tracking.log-.*-(?P<timestamp>\d{10})\.gz'],
         expand_interval=datetime.timedelta(0),
     )
     self.assert_only_matched(task, [
         'FakeServerGroup/tracking.log-20140319-1395256622.gz',
     ])
     task = PathSelectionByDateIntervalTask(
         source=self.SOURCE,
         interval=Month.parse('2014-03'),
         pattern=[r'.*?FakeServerGroup/tracking.log-.*-(?P<timestamp>\d{10})\.gz'],
         expand_interval=datetime.timedelta(1),
     )
     self.assert_only_matched(task, [
         'FakeServerGroup/tracking.log-20140319-1395256622.gz',
         'FakeServerGroup/tracking.log-20140401-1396379384.gz',
     ])
Esempio n. 6
0
    def test_edge_urls(self):
        task = PathSelectionByDateIntervalTask(
            source=self.SOURCE,
            interval=Month.parse('2014-03'),
            pattern=[r'.*?FakeEdgeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz'],
            expand_interval=datetime.timedelta(0),
        )

        self.assert_only_matched(task, [
            'FakeEdgeServerGroup/tracking.log-20140324-1395670621.gz',
        ])
Esempio n. 7
0
    def test_filtering_of_urls(self):
        task = EventLogSelectionTask(
            source=self.SOURCE,
            interval=Month.parse('2014-03'),
            pattern=r'.*?FakeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz',
            expand_interval=datetime.timedelta(0),
        )

        self.assert_only_matched(task, [
            'FakeServerGroup/tracking.log-20140318.gz',
            'FakeServerGroup/tracking.log-20140319-1395256622.gz',
        ])
    def test_filtering_of_urls(self):
        task = EventLogSelectionTask(
            source=self.SOURCE,
            interval=Month.parse('2014-03'),
            pattern=[r'.*?FakeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz'],
            expand_interval=datetime.timedelta(0),
        )

        self.assert_only_matched(task, [
            'FakeServerGroup/tracking.log-20140318.gz',
            'FakeServerGroup/tracking.log-20140319-1395256622.gz',
        ])
Esempio n. 9
0
    def test_expanded_interval(self):
        task = PathSelectionByDateIntervalTask(
            source=self.SOURCE,
            interval=Month.parse('2014-03'),
            pattern=[r'.*?FakeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz'],
            expand_interval=datetime.timedelta(1),
        )

        self.assert_only_matched(task, [
            'FakeServerGroup/tracking.log-20140228.gz',
            'FakeServerGroup/tracking.log-20140318.gz',
            'FakeServerGroup/tracking.log-20140319-1395256622.gz',
            'FakeServerGroup/tracking.log-20140401-1395254574.gz',
        ])
    def test_expanded_interval(self):
        task = PathSelectionByDateIntervalTask(
            source=self.SOURCE,
            interval=Month.parse('2014-03'),
            pattern=[r'.*?FakeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz'],
            expand_interval=datetime.timedelta(1),
        )

        self.assert_only_matched(task, [
            'FakeServerGroup/tracking.log-20140228.gz',
            'FakeServerGroup/tracking.log-20140318.gz',
            'FakeServerGroup/tracking.log-20140319-1395256622.gz',
            'FakeServerGroup/tracking.log-20140401-1395254574.gz',
        ])
Esempio n. 11
0
    def test_multiple_filtering_of_urls(self):
        task = PathSelectionByDateIntervalTask(
            source=self.SOURCE,
            interval=Month.parse('2014-03'),
            pattern=[
                r'.*?FakeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz',
                r'.*?FakeEdgeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz',
                r'.*tracking_\d{3,5}\.log\.gz$',
            ],
            expand_interval=datetime.timedelta(0),
        )

        self.assert_only_matched(task, [
            'FakeServerGroup/tracking.log-20140318.gz',
            'FakeServerGroup/tracking.log-20140319-1395256622.gz',
            'FakeEdgeServerGroup/tracking.log-20140324-1395670621.gz',
            'FakeOldServerGroup3/tracking_14602.log.gz',
        ])
Esempio n. 12
0
    def test_multiple_filtering_of_urls(self):
        task = PathSelectionByDateIntervalTask(
            source=self.SOURCE,
            interval=Month.parse('2014-03'),
            pattern=[
                r'.*?FakeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz',
                r'.*?FakeEdgeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz',
                r'.*tracking_\d{3,5}\.log\.gz$',
            ],
            expand_interval=datetime.timedelta(0),
        )

        self.assert_only_matched(task, [
            'FakeServerGroup/tracking.log-20140318.gz',
            'FakeServerGroup/tracking.log-20140319-1395256622.gz',
            'FakeEdgeServerGroup/tracking.log-20140324-1395670621.gz',
            'FakeOldServerGroup3/tracking_14602.log.gz',
        ])
Esempio n. 13
0
 def test_pattern_from_config(self):
     task = PathSelectionByDateIntervalTask(
         interval=Month.parse('2014-03')
     )
     self.assertEquals(task.pattern, ('foobar',))
Esempio n. 14
0
 def test_pattern_override(self):
     task = PathSelectionByDateIntervalTask(
         interval=Month.parse('2014-03'),
         pattern=['baz']
     )
     self.assertEquals(task.pattern, ('baz',))
Esempio n. 15
0
 def test_pattern_from_config(self):
     task = PathSelectionByDateIntervalTask(
         interval=Month.parse('2014-03')
     )
     self.assertEquals(task.pattern, ('foobar',))
 def test_pattern_from_config(self):
     task = EventLogSelectionTask(interval=Month.parse('2014-03'))
     self.assertEquals(task.pattern, ('foobar', ))
 def test_pattern_override(self):
     task = EventLogSelectionTask(interval=Month.parse('2014-03'),
                                  pattern=['baz'])
     self.assertEquals(task.pattern, ('baz', ))
Esempio n. 18
0
 def test_default_pattern(self):
     task = PathSelectionByDateIntervalTask(interval=Month.parse('2014-03'))
     self.assertEquals(task.pattern, (
         r'.*tracking.log-(?P<date>\d{8}).*\.gz',
         r'.*tracking.notalog-(?P<date>\d{8}).*\.gz',
     ))
Esempio n. 19
0
 def test_default_source(self):
     task = PathSelectionByDateIntervalTask(interval=Month.parse('2014-03'))
     self.assertEquals(task.source, ('s3://fake/input/', 's3://fake/input2/'))
Esempio n. 20
0
 def test_pattern_from_config(self):
     task = EventLogSelectionTask(
         interval=Month.parse('2014-03')
     )
     self.assertEquals(task.pattern, 'foobar')
Esempio n. 21
0
 def test_pattern_override(self):
     task = EventLogSelectionTask(
         interval=Month.parse('2014-03'),
         pattern='baz'
     )
     self.assertEquals(task.pattern, 'baz')
Esempio n. 22
0
 def test_default_source(self):
     task = PathSelectionByDateIntervalTask(interval=Month.parse('2014-03'))
     self.assertEquals(task.source, ('s3://fake/input/', 's3://fake/input2/'))
Esempio n. 23
0
 def test_default_pattern(self):
     task = PathSelectionByDateIntervalTask(interval=Month.parse('2014-03'))
     self.assertEquals(task.pattern, (
         r'.*tracking.log-(?P<date>\\d{8}).*\\.gz',
         r'.*tracking.notalog-(?P<date>\\d{8}).*\\.gz',
     ))
Esempio n. 24
0
 def test_pattern_override(self):
     task = PathSelectionByDateIntervalTask(
         interval=Month.parse('2014-03'),
         pattern=['baz']
     )
     self.assertEquals(task.pattern, ('baz',))