Exemple #1
0
 def _setup(self, tags=()):
     self.archiver = Archiver(
         ddfs=DDFS(),
         archive_prefix='processed',
         archive_mode=True,
         max_blobs=100,
         tags=tags)
Exemple #2
0
 def _setup(self, archive_mode=True, max_blobs=100, archive_some=False):
     ddfs = DDFS()
     if archive_some:
         blobs = ('/b13.1', '/b13.2', '/b13.3')
         ddfs.ddfs['processed:data:chunk:2011-11-13'] = [blobs]
     self.archiver = Archiver(ddfs=ddfs,
                              archive_prefix='processed',
                              archive_mode=archive_mode,
                              max_blobs=max_blobs,
                              tags=['incoming:data:chunk'])
Exemple #3
0
 def _determine_job_blobs(self):
     self._notify(JOB_BLOBS)
     tags = self.job_options.tags
     urls = self.job_options.urls + self.urls if self.urls else self.job_options.urls
     if tags or urls:
         log.info('Processing input: %s...', (tags + urls)[:1000])
     else:
         log.info('No input available for %s.' % self.rule.name)
     archiver = Archiver(
         ddfs=self.ddfs,
         archive_prefix=self.rule.archive_tag_prefix,
         archive_mode=self.rule.archive,
         max_blobs=self.rule.max_blobs,
         tags=tags,
         urls=urls,
         newest_first=self.rule.newest_first,
     )
     return archiver
Exemple #4
0
 def _setup(self, tags=()):
     self.archiver = Archiver(ddfs=DDFS(),
                              archive_prefix='processed',
                              archive_mode=True,
                              max_blobs=100,
                              tags=tags)
Exemple #5
0
class TestArchiver(object):
    def _setup(self, tags=()):
        self.archiver = Archiver(ddfs=DDFS(),
                                 archive_prefix='processed',
                                 archive_mode=True,
                                 max_blobs=100,
                                 tags=tags)

    def test_get_archive_name(self):
        self._setup()
        tag = 'incoming:data:chunk:2012-12-01'
        actual = self.archiver._get_archive_name(tag)
        eq_(actual, 'processed:data:chunk:2012-12-01')

    def test_blob_count(self):
        self._setup()
        self.archiver.tag_map = self.fake_tag_map
        eq_(self.archiver.blob_count, 5)

    def test_job_blobs(self):
        self._setup()
        self.archiver.tag_map = self.fake_tag_map
        expected = [('blob1.a', 'blob1.b', 'blob1.c'),
                    ('blob2.a', 'blob2.b', 'blob2.c'),
                    ('blob3.a', 'blob3.b', 'blob3.c'),
                    ('blob4.a', 'blob4.b', 'blob4.c'),
                    ('blob5.a', 'blob5.b', 'blob5.c')]
        eq_(self.archiver.job_blobs, expected)

    @property
    def fake_tag_map(self):
        return {
            'tag1': [('blob1.a', 'blob1.b', 'blob1.c'),
                     ('blob2.a', 'blob2.b', 'blob2.c')],
            'tag2': [('blob3.a', 'blob3.b', 'blob3.c'),
                     ('blob4.a', 'blob4.b', 'blob4.c'),
                     ('blob5.a', 'blob5.b', 'blob5.c')]
        }

    def test_archive(self):
        incoming_tag = 'incoming:data:chunk:2011-11-13'
        archived_tag = 'processed:data:chunk:2011-11-13'

        # no archived tags before the archive call
        self._setup(tags=[incoming_tag])
        eq_([incoming_tag], self.archiver.ddfs.list(incoming_tag))
        eq_([], self.archiver.ddfs.list(archived_tag))

        # one archived tag after the archive call
        self.archiver.archive()
        eq_([incoming_tag], self.archiver.ddfs.list(incoming_tag))
        eq_([archived_tag], self.archiver.ddfs.list(archived_tag))

        # incoming and archived tags point to the same blobs
        expected_blobs = [('/b13.1', '/b13.2', '/b13.3'),
                          ('/b13.1.a', '/b13.2.a', '/b13.3.a')]
        incoming_blobs = self.archiver.ddfs.blobs(incoming_tag)
        archived_blobs = self.archiver.ddfs.blobs(archived_tag)
        eq_(incoming_blobs, expected_blobs)
        eq_(archived_blobs, expected_blobs)

    def test_replica_agnostic_archive(self):
        incoming_tag = "incoming:froody:chunk:2012-05-17"
        processed_tag = "processed:froody:chunk:2012-05-17"

        self._setup(tags=[incoming_tag])

        self.archiver.archive()

        eq_(len(self.archiver.ddfs.blobs(processed_tag)), 1)
Exemple #6
0
class TestArchiver(object):

    def _setup(self, tags=()):
        self.archiver = Archiver(
            ddfs=DDFS(),
            archive_prefix='processed',
            archive_mode=True,
            max_blobs=100,
            tags=tags)

    def test_get_archive_name(self):
        self._setup()
        tag = 'incoming:data:chunk:2012-12-01'
        actual = self.archiver._get_archive_name(tag)
        eq_(actual, 'processed:data:chunk:2012-12-01')

    def test_blob_count(self):
        self._setup()
        self.archiver.tag_map = self.fake_tag_map
        eq_(self.archiver.blob_count, 5)

    def test_job_blobs(self):
        self._setup()
        self.archiver.tag_map = self.fake_tag_map
        expected = [
            ('blob1.a', 'blob1.b', 'blob1.c'),
            ('blob2.a', 'blob2.b', 'blob2.c'),
            ('blob3.a', 'blob3.b', 'blob3.c'),
            ('blob4.a', 'blob4.b', 'blob4.c'),
            ('blob5.a', 'blob5.b', 'blob5.c')]
        eq_(self.archiver.job_blobs, expected)

    @property
    def fake_tag_map(self):
        return {
            'tag1': [
                ('blob1.a', 'blob1.b', 'blob1.c'),
                ('blob2.a', 'blob2.b', 'blob2.c')],
            'tag2': [
                ('blob3.a', 'blob3.b', 'blob3.c'),
                ('blob4.a', 'blob4.b', 'blob4.c'),
                ('blob5.a', 'blob5.b', 'blob5.c')]}

    def test_archive(self):
        incoming_tag = 'incoming:data:chunk:2011-11-13'
        archived_tag = 'processed:data:chunk:2011-11-13'

        # no archived tags before the archive call
        self._setup(tags=[incoming_tag])
        eq_([incoming_tag], self.archiver.ddfs.list(incoming_tag))
        eq_([], self.archiver.ddfs.list(archived_tag))

        # one archived tag after the archive call
        self.archiver.archive()
        eq_([incoming_tag], self.archiver.ddfs.list(incoming_tag))
        eq_([archived_tag], self.archiver.ddfs.list(archived_tag))

        # incoming and archived tags point to the same blobs
        expected_blobs = [
            ('/b13.1', '/b13.2', '/b13.3'),
            ('/b13.1.a', '/b13.2.a', '/b13.3.a')]
        incoming_blobs = self.archiver.ddfs.blobs(incoming_tag)
        archived_blobs = self.archiver.ddfs.blobs(archived_tag)
        eq_(incoming_blobs, expected_blobs)
        eq_(archived_blobs, expected_blobs)

    def test_replica_agnostic_archive(self):
        incoming_tag = "incoming:froody:chunk:2012-05-17"
        processed_tag = "processed:froody:chunk:2012-05-17"

        self._setup(tags=[incoming_tag])

        self.archiver.archive()

        eq_(len(self.archiver.ddfs.blobs(processed_tag)), 1)