def _setup(self, tags=()): self.archiver = Archiver( ddfs=DDFS(), archive_prefix='processed', archive_mode=True, max_blobs=100, tags=tags)
def _setup(self, archive_mode=True, max_blobs=100, archive_some=False): ddfs = DDFS() if archive_some: blobs = ('/b13.1', '/b13.2', '/b13.3') ddfs.ddfs['processed:data:chunk:2011-11-13'] = [blobs] self.archiver = Archiver(ddfs=ddfs, archive_prefix='processed', archive_mode=archive_mode, max_blobs=max_blobs, tags=['incoming:data:chunk'])
def _determine_job_blobs(self): self._notify(JOB_BLOBS) tags = self.job_options.tags urls = self.job_options.urls + self.urls if self.urls else self.job_options.urls if tags or urls: log.info('Processing input: %s...', (tags + urls)[:1000]) else: log.info('No input available for %s.' % self.rule.name) archiver = Archiver( ddfs=self.ddfs, archive_prefix=self.rule.archive_tag_prefix, archive_mode=self.rule.archive, max_blobs=self.rule.max_blobs, tags=tags, urls=urls, newest_first=self.rule.newest_first, ) return archiver
def _setup(self, tags=()): self.archiver = Archiver(ddfs=DDFS(), archive_prefix='processed', archive_mode=True, max_blobs=100, tags=tags)
class TestArchiver(object): def _setup(self, tags=()): self.archiver = Archiver(ddfs=DDFS(), archive_prefix='processed', archive_mode=True, max_blobs=100, tags=tags) def test_get_archive_name(self): self._setup() tag = 'incoming:data:chunk:2012-12-01' actual = self.archiver._get_archive_name(tag) eq_(actual, 'processed:data:chunk:2012-12-01') def test_blob_count(self): self._setup() self.archiver.tag_map = self.fake_tag_map eq_(self.archiver.blob_count, 5) def test_job_blobs(self): self._setup() self.archiver.tag_map = self.fake_tag_map expected = [('blob1.a', 'blob1.b', 'blob1.c'), ('blob2.a', 'blob2.b', 'blob2.c'), ('blob3.a', 'blob3.b', 'blob3.c'), ('blob4.a', 'blob4.b', 'blob4.c'), ('blob5.a', 'blob5.b', 'blob5.c')] eq_(self.archiver.job_blobs, expected) @property def fake_tag_map(self): return { 'tag1': [('blob1.a', 'blob1.b', 'blob1.c'), ('blob2.a', 'blob2.b', 'blob2.c')], 'tag2': [('blob3.a', 'blob3.b', 'blob3.c'), ('blob4.a', 'blob4.b', 'blob4.c'), ('blob5.a', 'blob5.b', 'blob5.c')] } def test_archive(self): incoming_tag = 'incoming:data:chunk:2011-11-13' archived_tag = 'processed:data:chunk:2011-11-13' # no archived tags before the archive call self._setup(tags=[incoming_tag]) eq_([incoming_tag], self.archiver.ddfs.list(incoming_tag)) eq_([], self.archiver.ddfs.list(archived_tag)) # one archived tag after the archive call self.archiver.archive() eq_([incoming_tag], self.archiver.ddfs.list(incoming_tag)) eq_([archived_tag], self.archiver.ddfs.list(archived_tag)) # incoming and archived tags point to the same blobs expected_blobs = [('/b13.1', '/b13.2', '/b13.3'), ('/b13.1.a', '/b13.2.a', '/b13.3.a')] incoming_blobs = self.archiver.ddfs.blobs(incoming_tag) archived_blobs = self.archiver.ddfs.blobs(archived_tag) eq_(incoming_blobs, expected_blobs) eq_(archived_blobs, expected_blobs) def test_replica_agnostic_archive(self): incoming_tag = "incoming:froody:chunk:2012-05-17" processed_tag = "processed:froody:chunk:2012-05-17" self._setup(tags=[incoming_tag]) self.archiver.archive() eq_(len(self.archiver.ddfs.blobs(processed_tag)), 1)
class TestArchiver(object): def _setup(self, tags=()): self.archiver = Archiver( ddfs=DDFS(), archive_prefix='processed', archive_mode=True, max_blobs=100, tags=tags) def test_get_archive_name(self): self._setup() tag = 'incoming:data:chunk:2012-12-01' actual = self.archiver._get_archive_name(tag) eq_(actual, 'processed:data:chunk:2012-12-01') def test_blob_count(self): self._setup() self.archiver.tag_map = self.fake_tag_map eq_(self.archiver.blob_count, 5) def test_job_blobs(self): self._setup() self.archiver.tag_map = self.fake_tag_map expected = [ ('blob1.a', 'blob1.b', 'blob1.c'), ('blob2.a', 'blob2.b', 'blob2.c'), ('blob3.a', 'blob3.b', 'blob3.c'), ('blob4.a', 'blob4.b', 'blob4.c'), ('blob5.a', 'blob5.b', 'blob5.c')] eq_(self.archiver.job_blobs, expected) @property def fake_tag_map(self): return { 'tag1': [ ('blob1.a', 'blob1.b', 'blob1.c'), ('blob2.a', 'blob2.b', 'blob2.c')], 'tag2': [ ('blob3.a', 'blob3.b', 'blob3.c'), ('blob4.a', 'blob4.b', 'blob4.c'), ('blob5.a', 'blob5.b', 'blob5.c')]} def test_archive(self): incoming_tag = 'incoming:data:chunk:2011-11-13' archived_tag = 'processed:data:chunk:2011-11-13' # no archived tags before the archive call self._setup(tags=[incoming_tag]) eq_([incoming_tag], self.archiver.ddfs.list(incoming_tag)) eq_([], self.archiver.ddfs.list(archived_tag)) # one archived tag after the archive call self.archiver.archive() eq_([incoming_tag], self.archiver.ddfs.list(incoming_tag)) eq_([archived_tag], self.archiver.ddfs.list(archived_tag)) # incoming and archived tags point to the same blobs expected_blobs = [ ('/b13.1', '/b13.2', '/b13.3'), ('/b13.1.a', '/b13.2.a', '/b13.3.a')] incoming_blobs = self.archiver.ddfs.blobs(incoming_tag) archived_blobs = self.archiver.ddfs.blobs(archived_tag) eq_(incoming_blobs, expected_blobs) eq_(archived_blobs, expected_blobs) def test_replica_agnostic_archive(self): incoming_tag = "incoming:froody:chunk:2012-05-17" processed_tag = "processed:froody:chunk:2012-05-17" self._setup(tags=[incoming_tag]) self.archiver.archive() eq_(len(self.archiver.ddfs.blobs(processed_tag)), 1)