class TestPlainContent(BaseTestCase): def setUp(self): super(TestPlainContent, self).setUp() if len(self.conf['services']['rawx']) < 4: self.skipTest( "Plain tests needs more than 3 rawx to run") self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient(self.conf) self.container_name = "TestPlainContent-%f" % time.time() self.container_client.container_create(account=self.account, reference=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() self.content = random_str(64) self.stgpol = "SINGLE" self.stgpol_twocopies = "TWOCOPIES" self.stgpol_threecopies = "THREECOPIES" def _test_create(self, stgpol, data_size): data = random_data(data_size) content = self.content_factory.new(self.container_id, self.content, len(data), stgpol) content.create(BytesIO(data)) meta, chunks = self.container_client.content_locate( cid=self.container_id, content=content.content_id) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], stgpol) self.assertEqual(meta['name'], self.content) metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) if metachunk_nb == 0: metachunk_nb = 1 # special case for empty content chunks = ChunksHelper(chunks) # TODO NO NO NO if stgpol == self.stgpol_threecopies: nb_copy = 3 elif stgpol == self.stgpol_twocopies: nb_copy = 2 elif stgpol == self.stgpol: nb_copy = 1 self.assertEqual(len(chunks), metachunk_nb * nb_copy) for pos in range(metachunk_nb): chunks_at_pos = chunks.filter(pos=pos) self.assertEqual(len(chunks_at_pos), nb_copy) data_begin = pos * self.chunk_size data_end = pos * self.chunk_size + self.chunk_size chunk_hash = md5_data(data[data_begin:data_end]) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(md5_stream(stream), chunk_hash) self.assertEqual(meta['content_path'], self.content) self.assertEqual(meta['container_id'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], str(pos)) # Check that chunk data matches chunk hash from xattr self.assertEqual(meta['chunk_hash'], chunk_hash) # Check that chunk data matches chunk hash from database self.assertEqual(chunk.checksum, chunk_hash) full_path = encode_fullpath( self.account, self.container_name, self.content, meta['content_version'], meta['content_id']) self.assertEqual(meta['full_path'], full_path) self.assertEqual(meta['oio_version'], '4.2') def test_twocopies_create_0_byte(self): self._test_create(self.stgpol_twocopies, 0) def test_twocopies_create_1_byte(self): self._test_create(self.stgpol_twocopies, 1) def test_twocopies_create_chunksize_bytes(self): self._test_create(self.stgpol_twocopies, self.chunk_size) def test_twocopies_create_chunksize_plus_1_bytes(self): self._test_create(self.stgpol_twocopies, self.chunk_size + 1) def test_twocopies_create_6294503_bytes(self): self._test_create(self.stgpol_twocopies, 6294503) def test_single_create_0_byte(self): self._test_create(self.stgpol, 0) def test_single_create_chunksize_plus_1_bytes(self): self._test_create(self.stgpol, self.chunk_size + 1) def _new_content(self, stgpol, data, broken_pos_list=[]): old_content = self.content_factory.new( self.container_id, self.content, len(data), stgpol) old_content.create(BytesIO(data)) broken_chunks_info = {} for pos, idx in broken_pos_list: c = old_content.chunks.filter(pos=pos)[idx] meta, stream = self.blob_client.chunk_get(c.url) if pos not in broken_chunks_info: broken_chunks_info[pos] = {} broken_chunks_info[pos][idx] = { "url": c.url, "id": c.id, "hash": c.checksum, "dl_meta": meta, "dl_hash": md5_stream(stream) } self.blob_client.chunk_delete(c.url) # get the new structure of the uploaded content return (self.content_factory.get( self.container_id, old_content.content_id), broken_chunks_info) def _rebuild_and_check(self, content, broken_chunks_info, full_rebuild_pos, allow_frozen_container=False): rebuild_pos, rebuild_idx = full_rebuild_pos rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx] content.rebuild_chunk(rebuild_chunk_info["id"], allow_frozen_container=allow_frozen_container) # get the new structure of the content rebuilt_content = self.content_factory.get(self.container_id, content.content_id) # find the rebuilt chunk for c in rebuilt_content.chunks.filter(pos=rebuild_pos): if len(content.chunks.filter(id=c.id)) > 0: # not the rebuilt chunk # if this chunk is broken, it must not have been rebuilt for b_c_i in broken_chunks_info[rebuild_pos].values(): if c.id == b_c_i["id"]: with ExpectedException(NotFound): _, _ = self.blob_client.chunk_get(c.url) continue meta, stream = self.blob_client.chunk_get(c.url) self.assertEqual(meta["chunk_id"], c.id) self.assertEqual(md5_stream(stream), rebuild_chunk_info["dl_hash"]) self.assertEqual(c.checksum, rebuild_chunk_info["hash"]) self.assertThat(c.url, NotEquals(rebuild_chunk_info["url"])) del meta["chunk_id"] del rebuild_chunk_info["dl_meta"]["chunk_id"] self.assertEqual(meta, rebuild_chunk_info["dl_meta"]) def _test_rebuild(self, stgpol, data_size, broken_pos_list, full_rebuild_pos): data = random_data(data_size) content, broken_chunks_info = self._new_content( stgpol, data, broken_pos_list) self._rebuild_and_check(content, broken_chunks_info, full_rebuild_pos) def test_2copies_content_0_byte_1broken_rebuild_pos_0_idx_0(self): self._test_rebuild(self.stgpol_twocopies, 0, [(0, 0)], (0, 0)) def test_2copies_content_1_byte_1broken_rebuild_pos_0_idx_1(self): self._test_rebuild(self.stgpol_twocopies, 1, [(0, 1)], (0, 1)) def test_3copies_content_chunksize_bytes_2broken_rebuild_pos_0_idx_1(self): if len(self.conf['services']['rawx']) <= 3: self.skipTest("Need more than 3 rawx") self._test_rebuild(self.stgpol_threecopies, self.chunk_size, [(0, 0), (0, 1)], (0, 1)) def test_3copies_content_2xchksize_bytes_2broken_rebuild_pos_1_idx_2(self): self._test_rebuild(self.stgpol_threecopies, 2 * self.chunk_size, [(1, 0), (1, 2)], (1, 2)) def test_2copies_content_0_byte_2broken_rebuild_pos_0_idx_0(self): with ExpectedException(UnrecoverableContent): self._test_rebuild( self.stgpol_twocopies, 0, [(0, 0), (0, 1)], (0, 0)) def test_rebuild_chunk_in_frozen_container(self): data = random_data(self.chunk_size) content, broken_chunks_info = self._new_content( self.stgpol_twocopies, data, [(0, 0)]) system = dict() system['sys.status'] = str(OIO_DB_FROZEN) self.container_client.container_set_properties( self.account, self.container_name, None, system=system) try: full_rebuild_pos = (0, 0) rebuild_pos, rebuild_idx = full_rebuild_pos rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx] self.assertRaises(ServiceBusy, content.rebuild_chunk, rebuild_chunk_info["id"]) finally: system['sys.status'] = str(OIO_DB_ENABLED) self.container_client.container_set_properties( self.account, self.container_name, None, system=system) self._rebuild_and_check(content, broken_chunks_info, full_rebuild_pos, allow_frozen_container=True) def _test_fetch(self, stgpol, data_size, broken_pos_list): data = random_data(data_size) content, _ = self._new_content(stgpol, data, broken_pos_list) fetched_data = "".join(content.fetch()) self.assertEqual(fetched_data, data) for pos, idx in broken_pos_list: # check nothing has been rebuilt c = content.chunks.filter(pos=pos)[0] self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url) def test_twocopies_fetch_content_0_byte_without_broken_chunks(self): self._test_fetch(self.stgpol_twocopies, 0, []) def test_twocopies_fetch_content_0_byte_with_broken_0_0(self): self._test_fetch(self.stgpol_twocopies, 0, [(0, 0)]) def test_twocopies_fetch_content_1_byte_without_broken_chunks(self): self._test_fetch(self.stgpol_twocopies, 1, []) def test_twocopies_fetch_content_1_byte_with_broken_0_0(self): self._test_fetch(self.stgpol_twocopies, 1, [(0, 0)]) def test_twocopies_fetch_chunksize_bytes_without_broken_chunks(self): self._test_fetch(self.stgpol_twocopies, self.chunk_size, []) def test_twocopies_fetch_2xchuksize_bytes_with_broken_0_0_and_1_0(self): self._test_fetch( self.stgpol_twocopies, self.chunk_size * 2, [(0, 0), (1, 0)]) def test_twocopies_fetch_content_chunksize_bytes_2_broken_chunks(self): data = random_data(self.chunk_size) content, _ = self._new_content( self.stgpol_twocopies, data, [(0, 0), (0, 1)]) gen = content.fetch() self.assertRaises(UnrecoverableContent, gen.next) def test_single_fetch_content_1_byte_without_broken_chunks(self): self._test_fetch(self.stgpol, 1, []) def test_single_fetch_chunksize_bytes_plus_1_without_broken_chunk(self): self._test_fetch(self.stgpol, self.chunk_size * 2, [])
class TestECContent(BaseTestCase): def setUp(self): super(TestECContent, self).setUp() if len(self.conf['services']['rawx']) < 12: self.skipTest("Not enough rawx. " "EC tests needs at least 12 rawx to run") self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient(self.conf) self.container_name = "TestECContent%f" % time.time() self.container_client.container_create(account=self.account, reference=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() self.content = "%s-%s" % (self.__class__.__name__, random_str(4)) self.stgpol = "EC" self.size = 1024 * 1024 + 320 self.k = 6 self.m = 3 def tearDown(self): super(TestECContent, self).tearDown() def random_chunks(self, nb): pos = random.sample(xrange(self.k + self.m), nb) return ["0.%s" % i for i in pos] def _test_create(self, data_size): # generate random test data data = random_data(data_size) # using factory create new EC content content = self.content_factory.new(self.container_id, self.content, len(data), self.stgpol) # verify the factory gave us an ECContent self.assertEqual(type(content), ECContent) # perform the content creation content.create(BytesIO(data)) meta, chunks = self.container_client.content_locate( cid=self.container_id, content=content.content_id) # verify metadata chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], self.stgpol) self.assertEqual(meta['name'], self.content) metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) \ if len(data) != 0 else 1 offset = 0 # verify each metachunk for metapos in range(metachunk_nb): chunks_at_pos = content.chunks.filter(metapos=metapos) if len(chunks_at_pos) < 1: break metachunk_size = chunks_at_pos[0].size metachunk_hash = md5_data(data[offset:offset + metachunk_size]) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(meta['metachunk_size'], str(chunk.size)) self.assertEqual(meta['metachunk_hash'], chunk.checksum) self.assertEqual(meta['content_path'], self.content) self.assertEqual(meta['container_id'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], chunk.pos) self.assertEqual(meta['chunk_hash'], md5_stream(stream)) full_path = encode_fullpath(self.account, self.container_name, self.content, meta['content_version'], meta['content_id']) self.assertEqual(meta['full_path'], full_path) self.assertEqual(meta['oio_version'], '4.2') self.assertEqual(metachunk_hash, chunk.checksum) offset += metachunk_size def test_create_0_byte(self): self._test_create(0) def test_create_1_byte(self): self._test_create(1) def test_create(self): self._test_create(DAT_LEGIT_SIZE) def test_create_6294503_bytes(self): self._test_create(6294503) def _test_rebuild(self, data_size, broken_pos_list): # generate test data data = os.urandom(data_size) # create initial content old_content = self.content_factory.new(self.container_id, self.content, len(data), self.stgpol) # verify factory work as intended self.assertEqual(type(old_content), ECContent) # perform initial content creation old_content.create(BytesIO(data)) uploaded_content = self.content_factory.get(self.container_id, old_content.content_id) # break the content old_info = {} for pos in broken_pos_list: old_info[pos] = {} c = uploaded_content.chunks.filter(pos=pos)[0] old_info[pos]["url"] = c.url old_info[pos]["id"] = c.id old_info[pos]["hash"] = c.checksum chunk_id_to_rebuild = c.id meta, stream = self.blob_client.chunk_get(c.url) old_info[pos]["dl_meta"] = meta old_info[pos]["dl_hash"] = md5_stream(stream) # delete the chunk self.blob_client.chunk_delete(c.url) # rebuild the broken chunks uploaded_content.rebuild_chunk(chunk_id_to_rebuild) rebuilt_content = self.content_factory.get(self.container_id, uploaded_content.content_id) # sanity check self.assertEqual(type(rebuilt_content), ECContent) # verify rebuild result for pos in broken_pos_list: c = rebuilt_content.chunks.filter(pos=pos)[0] rebuilt_meta, rebuilt_stream = self.blob_client.chunk_get(c.url) self.assertEqual(rebuilt_meta["chunk_id"], c.id) self.assertEqual(md5_stream(rebuilt_stream), old_info[pos]["dl_hash"]) self.assertEqual(c.checksum, old_info[pos]["hash"]) self.assertNotEqual(c.url, old_info[pos]["url"]) self.assertGreaterEqual(rebuilt_meta['chunk_mtime'], old_info[pos]['dl_meta']['chunk_mtime']) del old_info[pos]["dl_meta"]["chunk_mtime"] del rebuilt_meta["chunk_mtime"] del old_info[pos]["dl_meta"]["chunk_id"] del rebuilt_meta["chunk_id"] self.assertEqual(rebuilt_meta, old_info[pos]["dl_meta"]) def test_content_0_byte_rebuild(self): self._test_rebuild(0, self.random_chunks(1)) def test_content_0_byte_rebuild_advanced(self): self._test_rebuild(0, self.random_chunks(3)) def test_content_1_byte_rebuild(self): self._test_rebuild(1, self.random_chunks(1)) def test_content_1_byte_rebuild_advanced(self): self._test_rebuild(1, self.random_chunks(3)) def test_content_rebuild(self): self._test_rebuild(DAT_LEGIT_SIZE, self.random_chunks(1)) def test_content_rebuild_advanced(self): self._test_rebuild(DAT_LEGIT_SIZE, self.random_chunks(3)) def test_content_rebuild_unrecoverable(self): self.assertRaises(UnrecoverableContent, self._test_rebuild, DAT_LEGIT_SIZE, self.random_chunks(4)) def _new_content(self, data, broken_pos_list=[]): old_content = self.content_factory.new(self.container_id, self.content, len(data), self.stgpol) self.assertEqual(type(old_content), ECContent) old_content.create(BytesIO(data)) # break content for pos in broken_pos_list: c = old_content.chunks.filter(pos=pos)[0] self.blob_client.chunk_delete(c.url) # get the new structure of the uploaded content return self.content_factory.get(self.container_id, old_content.content_id) def test_orphan_chunk(self): content = self._new_content(random_data(10)) self.assertRaises(OrphanChunk, content.rebuild_chunk, "invalid") def _test_fetch(self, data_size, broken_pos_list=None): broken_pos_list = broken_pos_list or [] test_data = random_data(data_size) content = self._new_content(test_data, broken_pos_list) data = b''.join(content.fetch()) self.assertEqual(len(data), len(test_data)) self.assertEqual(md5_data(data), md5_data(test_data)) # verify that chunks are broken for pos in broken_pos_list: chunk = content.chunks.filter(pos=pos)[0] self.assertRaises(NotFound, self.blob_client.chunk_delete, chunk.url) def test_fetch_content_0_byte(self): self._test_fetch(0) def test_fetch_content_1_byte(self): self._test_fetch(1) def test_fetch_content(self): self._test_fetch(DAT_LEGIT_SIZE) def test_fetch_content_0_byte_broken(self): self._test_fetch(0, self.random_chunks(3)) def test_fetch_content_1_byte_broken(self): self._test_fetch(1, self.random_chunks(3)) def test_fetch_content_broken(self): self._test_fetch(DAT_LEGIT_SIZE, self.random_chunks(3)) def test_fetch_content_unrecoverable(self): broken_chunks = self.random_chunks(4) self.assertRaises(OioException, self._test_fetch, DAT_LEGIT_SIZE, broken_chunks)
class TestBlobIndexer(BaseTestCase): def setUp(self): super(TestBlobIndexer, self).setUp() self.rdir_client = RdirClient(self.conf) self.blob_client = BlobClient(self.conf) _, self.rawx_path, rawx_addr, _ = \ self.get_service_url('rawx') services = self.conscience.all_services('rawx') self.rawx_id = None for rawx in services: if rawx_addr == rawx['addr']: self.rawx_id = rawx['tags'].get('tag.service_id', None) if self.rawx_id is None: self.rawx_id = rawx_addr conf = self.conf.copy() conf['volume'] = self.rawx_path self.blob_indexer = BlobIndexer(conf) # clear rawx/rdir chunk_files = paths_gen(self.rawx_path) for chunk_file in chunk_files: os.remove(chunk_file) self.rdir_client.admin_clear(self.rawx_id, clear_all=True) def _put_chunk(self): account = random_str(16) container = random_str(16) cid = cid_from_name(account, container) content_path = random_str(16) content_version = 1234567890 content_id = random_id(32) fullpath = encode_fullpath(account, container, content_path, content_version, content_id) chunk_id = random_chunk_id() data = random_buffer(string.printable, 100) meta = { 'full_path': fullpath, 'container_id': cid, 'content_path': content_path, 'version': content_version, 'id': content_id, 'chunk_method': 'ec/algo=liberasurecode_rs_vand,k=6,m=3', 'policy': 'TESTPOLICY', 'chunk_hash': md5(data).hexdigest().upper(), 'oio_version': OIO_VERSION, 'chunk_pos': 0, 'metachunk_hash': md5().hexdigest(), 'metachunk_size': 1024 } self.blob_client.chunk_put('http://' + self.rawx_id + '/' + chunk_id, meta, data) sleep(1) # ensure chunk event have been processed return account, container, cid, content_path, content_version, \ content_id, chunk_id def _delete_chunk(self, chunk_id): self.blob_client.chunk_delete('http://' + self.rawx_id + '/' + chunk_id) sleep(1) # ensure chunk event have been processed def _link_chunk(self, target_chunk_id): account = random_str(16) container = random_str(16) cid = cid_from_name(account, container) content_path = random_str(16) content_version = 1234567890 content_id = random_id(32) fullpath = encode_fullpath(account, container, content_path, content_version, content_id) _, link = self.blob_client.chunk_link( 'http://' + self.rawx_id + '/' + target_chunk_id, None, fullpath) chunk_id = link.split('/')[-1] sleep(1) # ensure chunk event have been processed return account, container, cid, content_path, content_version, \ content_id, chunk_id def _chunk_path(self, chunk_id): return self.rawx_path + '/' + chunk_id[:3] + '/' + chunk_id def test_blob_indexer(self): _, _, expected_cid, _, _, expected_content_id, expected_chunk_id = \ self._put_chunk() chunks = list(self.rdir_client.chunk_fetch(self.rawx_id)) self.assertEqual(1, len(chunks)) cid, content_id, chunk_id, _ = chunks[0] self.assertEqual(expected_cid, cid) self.assertEqual(expected_content_id, content_id) self.assertEqual(expected_chunk_id, chunk_id) self.rdir_client.admin_clear(self.rawx_id, clear_all=True) self.blob_indexer.index_pass() self.assertEqual(1, self.blob_indexer.successes) self.assertEqual(0, self.blob_indexer.errors) chunks = self.rdir_client.chunk_fetch(self.rawx_id) chunks = list(chunks) self.assertEqual(1, len(chunks)) cid, content_id, chunk_id, _ = chunks[0] self.assertEqual(expected_cid, cid) self.assertEqual(expected_content_id, content_id) self.assertEqual(expected_chunk_id, chunk_id) self._delete_chunk(expected_chunk_id) chunks = self.rdir_client.chunk_fetch(self.rawx_id) chunks = list(chunks) self.assertEqual(0, len(chunks)) def test_blob_indexer_with_old_chunk(self): expected_account, expected_container, expected_cid, \ expected_content_path, expected_content_version, \ expected_content_id, expected_chunk_id = self._put_chunk() chunks = list(self.rdir_client.chunk_fetch(self.rawx_id)) self.assertEqual(1, len(chunks)) cid, content_id, chunk_id, _ = chunks[0] self.assertEqual(expected_cid, cid) self.assertEqual(expected_content_id, content_id) self.assertEqual(expected_chunk_id, chunk_id) convert_to_old_chunk(self._chunk_path(chunk_id), expected_account, expected_container, expected_content_path, expected_content_version, expected_content_id) self.rdir_client.admin_clear(self.rawx_id, clear_all=True) self.blob_indexer.index_pass() self.assertEqual(1, self.blob_indexer.successes) self.assertEqual(0, self.blob_indexer.errors) chunks = self.rdir_client.chunk_fetch(self.rawx_id) chunks = list(chunks) self.assertEqual(1, len(chunks)) cid, content_id, chunk_id, _ = chunks[0] self.assertEqual(expected_cid, cid) self.assertEqual(expected_content_id, content_id) self.assertEqual(expected_chunk_id, chunk_id) self._delete_chunk(expected_chunk_id) chunks = self.rdir_client.chunk_fetch(self.rawx_id) chunks = list(chunks) self.assertEqual(0, len(chunks)) def test_blob_indexer_with_linked_chunk(self): _, _, expected_cid, _, _, expected_content_id, expected_chunk_id = \ self._put_chunk() chunks = self.rdir_client.chunk_fetch(self.rawx_id) chunks = list(chunks) self.assertEqual(1, len(chunks)) cid, content_id, chunk_id, _ = chunks[0] self.assertEqual(expected_cid, cid) self.assertEqual(expected_content_id, content_id) self.assertEqual(expected_chunk_id, chunk_id) self.rdir_client.admin_clear(self.rawx_id, clear_all=True) self.blob_indexer.index_pass() self.assertEqual(1, self.blob_indexer.successes) self.assertEqual(0, self.blob_indexer.errors) chunks = self.rdir_client.chunk_fetch(self.rawx_id) chunks = list(chunks) self.assertEqual(1, len(chunks)) cid, content_id, chunk_id, _ = chunks[0] self.assertEqual(expected_cid, cid) self.assertEqual(expected_content_id, content_id) self.assertEqual(expected_chunk_id, chunk_id) _, _, linked_cid, _, _, linked_content_id, linked_chunk_id = \ self._link_chunk(expected_chunk_id) self.rdir_client.admin_clear(self.rawx_id, clear_all=True) self.blob_indexer.index_pass() self.assertEqual(2, self.blob_indexer.successes) self.assertEqual(0, self.blob_indexer.errors) chunks = self.rdir_client.chunk_fetch(self.rawx_id) chunks = list(chunks) self.assertEqual(2, len(chunks)) self.assertNotEqual(chunks[0][2], chunks[1][2]) for chunk in chunks: cid, content_id, chunk_id, _ = chunk if chunk_id == expected_chunk_id: self.assertEqual(expected_cid, cid) self.assertEqual(expected_content_id, content_id) else: self.assertEqual(linked_cid, cid) self.assertEqual(linked_content_id, content_id) self.assertEqual(linked_chunk_id, chunk_id) self._delete_chunk(expected_chunk_id) chunks = self.rdir_client.chunk_fetch(self.rawx_id) chunks = list(chunks) self.assertEqual(1, len(chunks)) cid, content_id, chunk_id, _ = chunks[0] self.assertEqual(linked_cid, cid) self.assertEqual(linked_content_id, content_id) self.assertEqual(linked_chunk_id, chunk_id) self._delete_chunk(linked_chunk_id) chunks = self.rdir_client.chunk_fetch(self.rawx_id) chunks = list(chunks) self.assertEqual(0, len(chunks))
class Content(object): def __init__(self, conf, container_id, metadata, chunks, storage_method): self.conf = conf self.container_id = container_id self.metadata = metadata self.chunks = ChunksHelper(chunks) self.storage_method = storage_method self.logger = get_logger(self.conf) self.cs_client = ConscienceClient(conf) self.blob_client = BlobClient() self.container_client = ContainerClient(self.conf) self.content_id = self.metadata["id"] self.stgpol = self.metadata["policy"] self.path = self.metadata["name"] self.length = int(self.metadata["length"]) self.version = self.metadata["version"] self.checksum = self.metadata["hash"] self.mime_type = self.metadata["mime_type"] self.chunk_method = self.metadata["chunk_method"] def _get_spare_chunk(self, chunks_notin, chunks_broken): spare_data = { "notin": ChunksHelper(chunks_notin, False).raw(), "broken": ChunksHelper(chunks_broken, False).raw() } try: spare_resp = self.container_client.content_spare( cid=self.container_id, content=self.content_id, data=spare_data, stgpol=self.stgpol) except ClientException as e: raise exc.SpareChunkException("No spare chunk (%s)" % e.message) url_list = [] for c in spare_resp["chunks"]: url_list.append(c["id"]) return url_list def _update_spare_chunk(self, current_chunk, new_url): old = [{ 'type': 'chunk', 'id': current_chunk.url, 'hash': current_chunk.checksum, 'size': current_chunk.size, 'pos': current_chunk.pos, 'content': self.content_id }] new = [{ 'type': 'chunk', 'id': new_url, 'hash': current_chunk.checksum, 'size': current_chunk.size, 'pos': current_chunk.pos, 'content': self.content_id }] update_data = {'old': old, 'new': new} self.container_client.container_raw_update(cid=self.container_id, data=update_data) def _create_object(self): self.container_client.content_create(cid=self.container_id, path=self.path, content_id=self.content_id, stgpol=self.stgpol, size=self.length, checksum=self.checksum, version=self.version, chunk_method=self.chunk_method, mime_type=self.mime_type, data=self.chunks.raw()) def rebuild_chunk(self, chunk_id, allow_same_rawx=False): raise NotImplementedError() def create(self, stream): raise NotImplementedError() def fetch(self): raise NotImplementedError() def delete(self): self.container_client.content_delete(cid=self.container_id, path=self.path) def move_chunk(self, chunk_id): current_chunk = self.chunks.filter(id=chunk_id).one() if current_chunk is None: raise OrphanChunk("Chunk not found in content") other_chunks = self.chunks.filter( metapos=current_chunk.metapos).exclude(id=chunk_id).all() spare_urls = self._get_spare_chunk(other_chunks, [current_chunk]) self.logger.debug("copy chunk from %s to %s", current_chunk.url, spare_urls[0]) self.blob_client.chunk_copy(current_chunk.url, spare_urls[0]) self._update_spare_chunk(current_chunk, spare_urls[0]) try: self.blob_client.chunk_delete(current_chunk.url) except: self.logger.warn("Failed to delete chunk %s" % current_chunk.url) current_chunk.url = spare_urls[0] return current_chunk.raw()
class TestDupContent(BaseTestCase): def setUp(self): super(TestDupContent, self).setUp() if len(self.conf['rawx']) < 3: self.skipTest("Not enough rawx. " "Dup tests needs more than 2 rawx to run") self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient() self.container_name = "TestDupContent%f" % time.time() self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() def tearDown(self): super(TestDupContent, self).tearDown() def _test_upload(self, stgpol, data_size): data = random_data(data_size) content = self.content_factory.new(self.container_id, "titi", len(data), stgpol) self.assertEqual(type(content), DupContent) content.upload(StringIO.StringIO(data)) meta, chunks = self.container_client.content_show( cid=self.container_id, content=content.content_id) chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], stgpol) self.assertEqual(meta['name'], "titi") metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) if metachunk_nb == 0: metachunk_nb = 1 # special case for empty content if stgpol == "THREECOPIES": nb_copy = 3 elif stgpol == "TWOCOPIES": nb_copy = 2 elif stgpol == "SINGLE": nb_copy = 1 self.assertEqual(len(chunks), metachunk_nb * nb_copy) for pos in range(metachunk_nb): chunks_at_pos = chunks.filter(pos=pos) self.assertEqual(len(chunks_at_pos), nb_copy) data_begin = pos * self.chunk_size data_end = pos * self.chunk_size + self.chunk_size chunk_hash = md5_data(data[data_begin:data_end]) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(md5_stream(stream), chunk_hash) self.assertEqual(meta['content_size'], str(len(data))) self.assertEqual(meta['content_path'], "titi") self.assertEqual(meta['content_cid'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], str(pos)) self.assertEqual(meta['chunk_hash'], chunk_hash) def test_twocopies_upload_0_byte(self): self._test_upload("TWOCOPIES", 0) def test_twocopies_upload_1_byte(self): self._test_upload("TWOCOPIES", 1) def test_twocopies_upload_chunksize_bytes(self): self._test_upload("TWOCOPIES", self.chunk_size) def test_twocopies_upload_chunksize_plus_1_bytes(self): self._test_upload("TWOCOPIES", self.chunk_size + 1) def test_single_upload_0_byte(self): self._test_upload("SINGLE", 0) def test_single_upload_chunksize_plus_1_bytes(self): self._test_upload("SINGLE", self.chunk_size + 1) def test_chunks_cleanup_when_upload_failed(self): data = random_data(2 * self.chunk_size) content = self.content_factory.new(self.container_id, "titi", len(data), "TWOCOPIES") self.assertEqual(type(content), DupContent) # set bad url for position 1 for chunk in content.chunks.filter(pos=1): chunk.url = "http://127.0.0.1:9/DEADBEEF" self.assertRaises(Exception, content.upload, StringIO.StringIO(data)) for chunk in content.chunks.exclude(pos=1): self.assertRaises(NotFound, self.blob_client.chunk_head, chunk.url) def _new_content(self, stgpol, data, broken_pos_list): old_content = self.content_factory.new(self.container_id, "titi", len(data), stgpol) self.assertEqual(type(old_content), DupContent) old_content.upload(StringIO.StringIO(data)) for pos, idx in broken_pos_list: c = old_content.chunks.filter(pos=pos)[idx] self.blob_client.chunk_delete(c.url) # get the new structure of the uploaded content return self.content_factory.get(self.container_id, old_content.content_id) def _test_download(self, stgpol, data_size, broken_pos_list): data = random_data(data_size) content = self._new_content(stgpol, data, broken_pos_list) downloaded_data = "".join(content.download()) self.assertEqual(downloaded_data, data) for pos, idx in broken_pos_list: # check nothing has been rebuilt c = content.chunks.filter(pos=pos)[0] self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url) def test_twocopies_download_content_0_byte_without_broken_chunks(self): self._test_download("TWOCOPIES", 0, []) def test_twocopies_download_content_0_byte_with_broken_0_0(self): self._test_download("TWOCOPIES", 0, [(0, 0)]) def test_twocopies_download_content_1_byte_without_broken_chunks(self): self._test_download("TWOCOPIES", 1, []) def test_twocopies_download_content_1_byte_with_broken_0_0(self): self._test_download("TWOCOPIES", 1, [(0, 0)]) def test_twocopies_download_chunksize_bytes_without_broken_chunks(self): self._test_download("TWOCOPIES", self.chunk_size, []) def test_twocopies_download_2xchuksize_bytes_with_broken_0_0_and_1_0(self): self._test_download("TWOCOPIES", self.chunk_size * 2, [(0, 0), (1, 0)]) def test_twocopies_download_content_chunksize_bytes_2_broken_chunks(self): data = random_data(self.chunk_size) content = self._new_content("TWOCOPIES", data, [(0, 0), (0, 1)]) gen = content.download() self.assertRaises(UnrecoverableContent, gen.next) def test_single_download_content_1_byte_without_broken_chunks(self): self._test_download("SINGLE", 1, []) def test_single_download_chunksize_bytes_plus_1_without_broken_chunk(self): self._test_download("SINGLE", self.chunk_size * 2, [])
class BlobMoverWorker(object): def __init__(self, conf, logger, volume): self.conf = conf self.logger = logger or get_logger(conf) self.volume = volume self.run_time = 0 self.passes = 0 self.errors = 0 self.last_reported = 0 self.last_usage_check = 0 self.chunks_run_time = 0 self.bytes_running_time = 0 self.bytes_processed = 0 self.total_bytes_processed = 0 self.total_chunks_processed = 0 self.usage_target = int_value(conf.get('usage_target'), 0) self.usage_check_interval = int_value(conf.get('usage_check_interval'), 3600) self.report_interval = int_value(conf.get('report_interval'), 3600) self.max_chunks_per_second = int_value(conf.get('chunks_per_second'), 30) self.max_bytes_per_second = int_value(conf.get('bytes_per_second'), 10000000) self.blob_client = BlobClient() self.container_client = ContainerClient(conf) def mover_pass(self): self.namespace, self.address = check_volume(self.volume) start_time = report_time = time.time() total_errors = 0 mover_time = 0 paths = paths_gen(self.volume) for path in paths: loop_time = time.time() now = time.time() if now - self.last_usage_check >= self.usage_check_interval: used, total = statfs(self.volume) usage = (float(used) / total) * 100 if usage <= self.usage_target: self.logger.info( 'current usage %.2f%%: target reached (%.2f%%)', usage, self.usage_target) self.last_usage_check = now break self.safe_chunk_move(path) self.chunks_run_time = ratelimit(self.chunks_run_time, self.max_chunks_per_second) self.total_chunks_processed += 1 now = time.time() if now - self.last_reported >= self.report_interval: self.logger.info( '%(start_time)s ' '%(passes)d ' '%(errors)d ' '%(c_rate).2f ' '%(b_rate).2f ' '%(total).2f ' '%(mover_time).2f' '%(mover_rate).2f' % { 'start_time': time.ctime(report_time), 'passes': self.passes, 'errors': self.errors, 'c_rate': self.passes / (now - report_time), 'b_rate': self.bytes_processed / (now - report_time), 'total': (now - start_time), 'mover_time': mover_time, 'mover_rate': mover_time / (now - start_time) }) report_time = now total_errors += self.errors self.passes = 0 self.bytes_processed = 0 self.last_reported = now mover_time += (now - loop_time) elapsed = (time.time() - start_time) or 0.000001 self.logger.info( '%(elapsed).02f ' '%(errors)d ' '%(chunk_rate).2f ' '%(bytes_rate).2f ' '%(mover_time).2f ' '%(mover_rate).2f' % { 'elapsed': elapsed, 'errors': total_errors + self.errors, 'chunk_rate': self.total_chunks_processed / elapsed, 'bytes_rate': self.total_bytes_processed / elapsed, 'mover_time': mover_time, 'mover_rate': mover_time / elapsed }) def safe_chunk_move(self, path): try: self.chunk_move(path) except Exception as e: self.errors += 1 self.logger.error('ERROR while moving chunk %s: %s', path, e) self.passes += 1 def load_chunk_metadata(self, path): with open(path) as f: return read_chunk_metadata(f) def chunk_move(self, path): meta = self.load_chunk_metadata(path) content_cid = meta['content_cid'] content_path = meta['content_path'] chunk_url = 'http://%s/%s' % \ (self.address, meta['chunk_id']) try: _, data = self.container_client.content_show(cid=content_cid, path=content_path) except exc.NotFound: raise exc.OrphanChunk('Content not found') current_chunk = None notin = [] for c in data: if c['pos'] == meta['chunk_pos']: notin.append(c) for c in notin: if c['url'] == chunk_url: current_chunk = c notin.remove(c) if not current_chunk: raise exc.OrphanChunk('Chunk not found in content') spare_data = {'notin': notin, 'broken': [current_chunk], 'size': 0} spare_resp = self.container_client.content_spare(cid=content_cid, path=content_path, data=spare_data) new_chunk = spare_resp['chunks'][0] self.blob_client.chunk_copy(current_chunk['url'], new_chunk['id']) old = [{ 'type': 'chunk', 'id': current_chunk['url'], 'hash': meta['chunk_hash'], 'size': int(meta['chunk_size']) }] new = [{ 'type': 'chunk', 'id': new_chunk['id'], 'hash': meta['chunk_hash'], 'size': int(meta['chunk_size']) }] update_data = {'old': old, 'new': new} self.container_client.container_raw_update(cid=content_cid, data=update_data) self.blob_client.chunk_delete(current_chunk['url']) self.logger.info('moved chunk %s to %s', current_chunk['url'], new_chunk['id'])
class TestDupContent(BaseTestCase): def setUp(self): super(TestDupContent, self).setUp() if len(self.conf['rawx']) < 3: self.skipTest("Not enough rawx. " "Dup tests needs more than 2 rawx to run") self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient() self.container_name = "TestDupContent%f" % time.time() self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() def tearDown(self): super(TestDupContent, self).tearDown() def _test_upload(self, stgpol, data_size): data = random_data(data_size) content = self.content_factory.new(self.container_id, "titi", len(data), stgpol) self.assertEqual(type(content), DupContent) content.upload(StringIO.StringIO(data)) meta, chunks = self.container_client.content_show( cid=self.container_id, content=content.content_id) chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], stgpol) self.assertEqual(meta['name'], "titi") metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) if metachunk_nb == 0: metachunk_nb = 1 # special case for empty content if stgpol == "THREECOPIES": nb_copy = 3 elif stgpol == "TWOCOPIES": nb_copy = 2 elif stgpol == "SINGLE": nb_copy = 1 self.assertEqual(len(chunks), metachunk_nb * nb_copy) for pos in range(metachunk_nb): chunks_at_pos = chunks.filter(pos=pos) self.assertEqual(len(chunks_at_pos), nb_copy) data_begin = pos * self.chunk_size data_end = pos * self.chunk_size + self.chunk_size chunk_hash = md5_data(data[data_begin:data_end]) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(md5_stream(stream), chunk_hash) self.assertEqual(meta['content_size'], str(len(data))) self.assertEqual(meta['content_path'], "titi") self.assertEqual(meta['content_cid'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], str(pos)) self.assertEqual(meta['chunk_hash'], chunk_hash) def test_twocopies_upload_0_byte(self): self._test_upload("TWOCOPIES", 0) def test_twocopies_upload_1_byte(self): self._test_upload("TWOCOPIES", 1) def test_twocopies_upload_chunksize_bytes(self): self._test_upload("TWOCOPIES", self.chunk_size) def test_twocopies_upload_chunksize_plus_1_bytes(self): self._test_upload("TWOCOPIES", self.chunk_size + 1) def test_single_upload_0_byte(self): self._test_upload("SINGLE", 0) def test_single_upload_chunksize_plus_1_bytes(self): self._test_upload("SINGLE", self.chunk_size + 1) def test_chunks_cleanup_when_upload_failed(self): data = random_data(2 * self.chunk_size) content = self.content_factory.new(self.container_id, "titi", len(data), "TWOCOPIES") self.assertEqual(type(content), DupContent) # set bad url for position 1 for chunk in content.chunks.filter(pos=1): chunk.url = "http://127.0.0.1:9/DEADBEEF" self.assertRaises(Exception, content.upload, StringIO.StringIO(data)) for chunk in content.chunks.exclude(pos=1): self.assertRaises(NotFound, self.blob_client.chunk_head, chunk.url) def _new_content(self, stgpol, data, broken_pos_list=[]): old_content = self.content_factory.new(self.container_id, "titi", len(data), stgpol) self.assertEqual(type(old_content), DupContent) old_content.upload(StringIO.StringIO(data)) broken_chunks_info = {} for pos, idx in broken_pos_list: c = old_content.chunks.filter(pos=pos)[idx] meta, stream = self.blob_client.chunk_get(c.url) if pos not in broken_chunks_info: broken_chunks_info[pos] = {} broken_chunks_info[pos][idx] = { "url": c.url, "id": c.id, "hash": c.hash, "dl_meta": meta, "dl_hash": md5_stream(stream) } self.blob_client.chunk_delete(c.url) # get the new structure of the uploaded content return (self.content_factory.get( self.container_id, old_content.content_id), broken_chunks_info) def _test_rebuild(self, stgpol, data_size, broken_pos_list, full_rebuild_pos): data = random_data(data_size) content, broken_chunks_info = self._new_content(stgpol, data, broken_pos_list) rebuild_pos, rebuild_idx = full_rebuild_pos rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx] content.rebuild_chunk(rebuild_chunk_info["id"]) # get the new structure of the content rebuilt_content = self.content_factory.get(self.container_id, content.content_id) self.assertEqual(type(rebuilt_content), DupContent) # find the rebuilt chunk for c in rebuilt_content.chunks.filter(pos=rebuild_pos): if len(content.chunks.filter(id=c.id)) > 0: # not the rebuilt chunk # if this chunk is broken, it must not have been rebuilt for b_c_i in broken_chunks_info[rebuild_pos].values(): if c.id == b_c_i["id"]: with ExpectedException(NotFound): _, _ = self.blob_client.chunk_get(c.url) continue meta, stream = self.blob_client.chunk_get(c.url) self.assertEqual(meta["chunk_id"], c.id) self.assertEqual(md5_stream(stream), rebuild_chunk_info["dl_hash"]) self.assertEqual(c.hash, rebuild_chunk_info["hash"]) self.assertThat(c.url, NotEquals(rebuild_chunk_info["url"])) del meta["chunk_id"] del rebuild_chunk_info["dl_meta"]["chunk_id"] self.assertEqual(meta, rebuild_chunk_info["dl_meta"]) def test_2copies_content_0_byte_1broken_rebuild_pos_0_idx_0(self): self._test_rebuild("TWOCOPIES", 0, [(0, 0)], (0, 0)) def test_2copies_content_1_byte_1broken_rebuild_pos_0_idx_1(self): self._test_rebuild("TWOCOPIES", 1, [(0, 1)], (0, 1)) def test_3copies_content_chunksize_bytes_2broken_rebuild_pos_0_idx_1(self): if len(self.conf['rawx']) <= 3: self.skipTest("Need more than 3 rawx") self._test_rebuild("THREECOPIES", self.chunk_size, [(0, 0), (0, 1)], (0, 1)) def test_3copies_content_2xchksize_bytes_2broken_rebuild_pos_1_idx_2(self): if len(self.conf['rawx']) <= 3: self.skipTest("Need more than 3 rawx") self._test_rebuild("THREECOPIES", 2 * self.chunk_size, [(1, 0), (1, 2)], (1, 2)) def test_2copies_content_0_byte_2broken_rebuild_pos_0_idx_0(self): with ExpectedException(UnrecoverableContent): self._test_rebuild("TWOCOPIES", 0, [(0, 0), (0, 1)], (0, 0)) def _test_download(self, stgpol, data_size, broken_pos_list): data = random_data(data_size) content, _ = self._new_content(stgpol, data, broken_pos_list) downloaded_data = "".join(content.download()) self.assertEqual(downloaded_data, data) for pos, idx in broken_pos_list: # check nothing has been rebuilt c = content.chunks.filter(pos=pos)[0] self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url) def test_twocopies_download_content_0_byte_without_broken_chunks(self): self._test_download("TWOCOPIES", 0, []) def test_twocopies_download_content_0_byte_with_broken_0_0(self): self._test_download("TWOCOPIES", 0, [(0, 0)]) def test_twocopies_download_content_1_byte_without_broken_chunks(self): self._test_download("TWOCOPIES", 1, []) def test_twocopies_download_content_1_byte_with_broken_0_0(self): self._test_download("TWOCOPIES", 1, [(0, 0)]) def test_twocopies_download_chunksize_bytes_without_broken_chunks(self): self._test_download("TWOCOPIES", self.chunk_size, []) def test_twocopies_download_2xchuksize_bytes_with_broken_0_0_and_1_0(self): self._test_download("TWOCOPIES", self.chunk_size * 2, [(0, 0), (1, 0)]) def test_twocopies_download_content_chunksize_bytes_2_broken_chunks(self): data = random_data(self.chunk_size) content, _ = self._new_content("TWOCOPIES", data, [(0, 0), (0, 1)]) gen = content.download() self.assertRaises(UnrecoverableContent, gen.next) def test_single_download_content_1_byte_without_broken_chunks(self): self._test_download("SINGLE", 1, []) def test_single_download_chunksize_bytes_plus_1_without_broken_chunk(self): self._test_download("SINGLE", self.chunk_size * 2, [])
class Content(object): def __init__(self, conf, container_id, metadata, chunks, stgpol_args): self.conf = conf self.container_id = container_id self.metadata = metadata self.chunks = ChunksHelper(chunks) self.stgpol_args = stgpol_args self.logger = get_logger(self.conf) self.cs_client = ConscienceClient(conf) self.container_client = ContainerClient(self.conf) self.blob_client = BlobClient() self.session = requests.Session() self.content_id = self.metadata["id"] self.stgpol_name = self.metadata["policy"] self.path = self.metadata["name"] self.length = int(self.metadata["length"]) self.version = self.metadata["version"] self.hash = self.metadata["hash"] self.mime_type = self.metadata["mime-type"] self.chunk_method = self.metadata["chunk-method"] def _meta2_get_spare_chunk(self, chunks_notin, chunks_broken): spare_data = { "notin": ChunksHelper(chunks_notin, False).raw(), "broken": ChunksHelper(chunks_broken, False).raw() } try: spare_resp = self.container_client.content_spare( cid=self.container_id, content=self.content_id, data=spare_data, stgpol=self.stgpol_name) except ClientException as e: raise exc.SpareChunkException("No spare chunk (%s)" % e.message) url_list = [] for c in spare_resp["chunks"]: url_list.append(c["id"]) return url_list def _meta2_update_spare_chunk(self, current_chunk, new_url): old = [{'type': 'chunk', 'id': current_chunk.url, 'hash': current_chunk.hash, 'size': current_chunk.size, 'pos': current_chunk.pos, 'content': self.content_id}] new = [{'type': 'chunk', 'id': new_url, 'hash': current_chunk.hash, 'size': current_chunk.size, 'pos': current_chunk.pos, 'content': self.content_id}] update_data = {'old': old, 'new': new} self.container_client.container_raw_update( cid=self.container_id, data=update_data) def _meta2_create_object(self): self.container_client.content_create(cid=self.container_id, path=self.path, content_id=self.content_id, stgpol=self.stgpol_name, size=self.length, checksum=self.hash, version=self.version, chunk_method=self.chunk_method, mime_type=self.mime_type, data=self.chunks.raw()) def rebuild_chunk(self, chunk_id): raise NotImplementedError() def upload(self, stream): try: self._upload(stream) except Exception as e: for chunk in self.chunks: try: self.blob_client.chunk_delete(chunk.url) except: pass raise e def _upload(self, stream): raise NotImplementedError() def download(self): raise NotImplementedError()
class BlobMoverWorker(object): def __init__(self, conf, logger, volume): self.conf = conf self.logger = logger or get_logger(conf) self.volume = volume self.run_time = 0 self.passes = 0 self.errors = 0 self.last_reported = 0 self.last_usage_check = 0 self.chunks_run_time = 0 self.bytes_running_time = 0 self.bytes_processed = 0 self.total_bytes_processed = 0 self.total_chunks_processed = 0 self.usage_target = int_value( conf.get('usage_target'), 0) self.usage_check_interval = int_value( conf.get('usage_check_interval'), 3600) self.report_interval = int_value( conf.get('report_interval'), 3600) self.max_chunks_per_second = int_value( conf.get('chunks_per_second'), 30) self.max_bytes_per_second = int_value( conf.get('bytes_per_second'), 10000000) self.blob_client = BlobClient() self.container_client = ContainerClient(conf) def mover_pass(self): self.namespace, self.address = check_volume(self.volume) start_time = report_time = time.time() total_errors = 0 mover_time = 0 paths = paths_gen(self.volume) for path in paths: loop_time = time.time() now = time.time() if now - self.last_usage_check >= self.usage_check_interval: used, total = statfs(self.volume) usage = (float(used) / total) * 100 if usage <= self.usage_target: self.logger.info( 'current usage %.2f%%: target reached (%.2f%%)', usage, self.usage_target) self.last_usage_check = now break self.safe_chunk_move(path) self.chunks_run_time = ratelimit( self.chunks_run_time, self.max_chunks_per_second ) self.total_chunks_processed += 1 now = time.time() if now - self.last_reported >= self.report_interval: self.logger.info( '%(start_time)s ' '%(passes)d ' '%(errors)d ' '%(c_rate).2f ' '%(b_rate).2f ' '%(total).2f ' '%(mover_time).2f' '%(mover_rate).2f' % { 'start_time': time.ctime(report_time), 'passes': self.passes, 'errors': self.errors, 'c_rate': self.passes / (now - report_time), 'b_rate': self.bytes_processed / (now - report_time), 'total': (now - start_time), 'mover_time': mover_time, 'mover_rate': mover_time / (now - start_time) } ) report_time = now total_errors += self.errors self.passes = 0 self.bytes_processed = 0 self.last_reported = now mover_time += (now - loop_time) elapsed = (time.time() - start_time) or 0.000001 self.logger.info( '%(elapsed).02f ' '%(errors)d ' '%(chunk_rate).2f ' '%(bytes_rate).2f ' '%(mover_time).2f ' '%(mover_rate).2f' % { 'elapsed': elapsed, 'errors': total_errors + self.errors, 'chunk_rate': self.total_chunks_processed / elapsed, 'bytes_rate': self.total_bytes_processed / elapsed, 'mover_time': mover_time, 'mover_rate': mover_time / elapsed } ) def safe_chunk_move(self, path): try: self.chunk_move(path) except Exception as e: self.errors += 1 self.logger.error('ERROR while moving chunk %s: %s', path, e) self.passes += 1 def load_chunk_metadata(self, path): with open(path) as f: return read_chunk_metadata(f) def chunk_move(self, path): meta = self.load_chunk_metadata(path) content_cid = meta['content_cid'] content_path = meta['content_path'] chunk_url = 'http://%s/%s' % \ (self.address, meta['chunk_id']) try: data = self.container_client.content_show( cid=content_cid, path=content_path) except exc.NotFound: raise exc.OrphanChunk('Content not found') current_chunk = None notin = [] for c in data: if c['pos'] == meta['chunk_pos']: notin.append(c) for c in notin: if c['url'] == chunk_url: current_chunk = c notin.remove(c) if not current_chunk: raise exc.OrphanChunk('Chunk not found in content') spare_data = {'notin': notin, 'broken': [current_chunk], 'size': 0} spare_resp = self.container_client.content_spare( cid=content_cid, path=content_path, data=spare_data) new_chunk = spare_resp['chunks'][0] self.blob_client.chunk_copy( current_chunk['url'], new_chunk['id']) old = [{'type': 'chunk', 'id': current_chunk['url'], 'hash': meta['chunk_hash'], 'size': int(meta['chunk_size'])}] new = [{'type': 'chunk', 'id': new_chunk['id'], 'hash': meta['chunk_hash'], 'size': int(meta['chunk_size'])}] update_data = {'old': old, 'new': new} self.container_client.container_raw_update( cid=content_cid, data=update_data) self.blob_client.chunk_delete(current_chunk['url']) self.logger.info( 'moved chunk %s to %s', current_chunk['url'], new_chunk['id'])
class TestDupContent(BaseTestCase): def setUp(self): super(TestDupContent, self).setUp() if len(self.conf['rawx']) < 3: self.skipTest("Not enough rawx. " "Dup tests needs more than 2 rawx to run") self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient() self.container_name = "TestDupContent%f" % time.time() self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() def tearDown(self): super(TestDupContent, self).tearDown() def _test_upload(self, stgpol, data_size): data = random_data(data_size) content = self.content_factory.new(self.container_id, "titi", len(data), stgpol) self.assertEqual(type(content), DupContent) content.upload(StringIO.StringIO(data)) meta, chunks = self.container_client.content_show( cid=self.container_id, content=content.content_id) chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], stgpol) self.assertEqual(meta['name'], "titi") metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) if metachunk_nb == 0: metachunk_nb = 1 # special case for empty content if stgpol == "THREECOPIES": nb_copy = 3 elif stgpol == "TWOCOPIES": nb_copy = 2 elif stgpol == "SINGLE": nb_copy = 1 self.assertEqual(len(chunks), metachunk_nb * nb_copy) for pos in range(metachunk_nb): chunks_at_pos = chunks.filter(pos=pos) self.assertEqual(len(chunks_at_pos), nb_copy) data_begin = pos * self.chunk_size data_end = pos * self.chunk_size + self.chunk_size chunk_hash = md5_data(data[data_begin:data_end]) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(md5_stream(stream), chunk_hash) self.assertEqual(meta['content_size'], str(len(data))) self.assertEqual(meta['content_path'], "titi") self.assertEqual(meta['content_cid'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], str(pos)) self.assertEqual(meta['chunk_hash'], chunk_hash) def test_twocopies_upload_0_byte(self): self._test_upload("TWOCOPIES", 0) def test_twocopies_upload_1_byte(self): self._test_upload("TWOCOPIES", 1) def test_twocopies_upload_chunksize_bytes(self): self._test_upload("TWOCOPIES", self.chunk_size) def test_twocopies_upload_chunksize_plus_1_bytes(self): self._test_upload("TWOCOPIES", self.chunk_size + 1) def test_single_upload_0_byte(self): self._test_upload("SINGLE", 0) def test_single_upload_chunksize_plus_1_bytes(self): self._test_upload("SINGLE", self.chunk_size + 1) def test_chunks_cleanup_when_upload_failed(self): data = random_data(2 * self.chunk_size) content = self.content_factory.new(self.container_id, "titi", len(data), "TWOCOPIES") self.assertEqual(type(content), DupContent) # set bad url for position 1 for chunk in content.chunks.filter(pos=1): chunk.url = "http://127.0.0.1:9/DEADBEEF" self.assertRaises(Exception, content.upload, StringIO.StringIO(data)) for chunk in content.chunks.exclude(pos=1): self.assertRaises(NotFound, self.blob_client.chunk_head, chunk.url) def _new_content(self, stgpol, data, broken_pos_list=[]): old_content = self.content_factory.new(self.container_id, "titi", len(data), stgpol) self.assertEqual(type(old_content), DupContent) old_content.upload(StringIO.StringIO(data)) broken_chunks_info = {} for pos, idx in broken_pos_list: c = old_content.chunks.filter(pos=pos)[idx] meta, stream = self.blob_client.chunk_get(c.url) if pos not in broken_chunks_info: broken_chunks_info[pos] = {} broken_chunks_info[pos][idx] = { "url": c.url, "id": c.id, "hash": c.hash, "dl_meta": meta, "dl_hash": md5_stream(stream) } self.blob_client.chunk_delete(c.url) # get the new structure of the uploaded content return (self.content_factory.get(self.container_id, old_content.content_id), broken_chunks_info) def _test_rebuild(self, stgpol, data_size, broken_pos_list, full_rebuild_pos): data = random_data(data_size) content, broken_chunks_info = self._new_content( stgpol, data, broken_pos_list) rebuild_pos, rebuild_idx = full_rebuild_pos rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx] content.rebuild_chunk(rebuild_chunk_info["id"]) # get the new structure of the content rebuilt_content = self.content_factory.get(self.container_id, content.content_id) self.assertEqual(type(rebuilt_content), DupContent) # find the rebuilt chunk for c in rebuilt_content.chunks.filter(pos=rebuild_pos): if len(content.chunks.filter(id=c.id)) > 0: # not the rebuilt chunk # if this chunk is broken, it must not have been rebuilt for b_c_i in broken_chunks_info[rebuild_pos].values(): if c.id == b_c_i["id"]: with ExpectedException(NotFound): _, _ = self.blob_client.chunk_get(c.url) continue meta, stream = self.blob_client.chunk_get(c.url) self.assertEqual(meta["chunk_id"], c.id) self.assertEqual(md5_stream(stream), rebuild_chunk_info["dl_hash"]) self.assertEqual(c.hash, rebuild_chunk_info["hash"]) self.assertThat(c.url, NotEquals(rebuild_chunk_info["url"])) del meta["chunk_id"] del rebuild_chunk_info["dl_meta"]["chunk_id"] self.assertEqual(meta, rebuild_chunk_info["dl_meta"]) def test_2copies_content_0_byte_1broken_rebuild_pos_0_idx_0(self): self._test_rebuild("TWOCOPIES", 0, [(0, 0)], (0, 0)) def test_2copies_content_1_byte_1broken_rebuild_pos_0_idx_1(self): self._test_rebuild("TWOCOPIES", 1, [(0, 1)], (0, 1)) def test_3copies_content_chunksize_bytes_2broken_rebuild_pos_0_idx_1(self): if len(self.conf['rawx']) <= 3: self.skipTest("Need more than 3 rawx") self._test_rebuild("THREECOPIES", self.chunk_size, [(0, 0), (0, 1)], (0, 1)) def test_3copies_content_2xchksize_bytes_2broken_rebuild_pos_1_idx_2(self): if len(self.conf['rawx']) <= 3: self.skipTest("Need more than 3 rawx") self._test_rebuild("THREECOPIES", 2 * self.chunk_size, [(1, 0), (1, 2)], (1, 2)) def test_2copies_content_0_byte_2broken_rebuild_pos_0_idx_0(self): with ExpectedException(UnrecoverableContent): self._test_rebuild("TWOCOPIES", 0, [(0, 0), (0, 1)], (0, 0)) def _test_download(self, stgpol, data_size, broken_pos_list): data = random_data(data_size) content, _ = self._new_content(stgpol, data, broken_pos_list) downloaded_data = "".join(content.download()) self.assertEqual(downloaded_data, data) for pos, idx in broken_pos_list: # check nothing has been rebuilt c = content.chunks.filter(pos=pos)[0] self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url) def test_twocopies_download_content_0_byte_without_broken_chunks(self): self._test_download("TWOCOPIES", 0, []) def test_twocopies_download_content_0_byte_with_broken_0_0(self): self._test_download("TWOCOPIES", 0, [(0, 0)]) def test_twocopies_download_content_1_byte_without_broken_chunks(self): self._test_download("TWOCOPIES", 1, []) def test_twocopies_download_content_1_byte_with_broken_0_0(self): self._test_download("TWOCOPIES", 1, [(0, 0)]) def test_twocopies_download_chunksize_bytes_without_broken_chunks(self): self._test_download("TWOCOPIES", self.chunk_size, []) def test_twocopies_download_2xchuksize_bytes_with_broken_0_0_and_1_0(self): self._test_download("TWOCOPIES", self.chunk_size * 2, [(0, 0), (1, 0)]) def test_twocopies_download_content_chunksize_bytes_2_broken_chunks(self): data = random_data(self.chunk_size) content, _ = self._new_content("TWOCOPIES", data, [(0, 0), (0, 1)]) gen = content.download() self.assertRaises(UnrecoverableContent, gen.next) def test_single_download_content_1_byte_without_broken_chunks(self): self._test_download("SINGLE", 1, []) def test_single_download_chunksize_bytes_plus_1_without_broken_chunk(self): self._test_download("SINGLE", self.chunk_size * 2, [])
class Content(object): def __init__(self, conf, container_id, metadata, chunks, stgpol_args): self.conf = conf self.container_id = container_id self.metadata = metadata self.chunks = ChunksHelper(chunks) self.stgpol_args = stgpol_args self.logger = get_logger(self.conf) self.cs_client = ConscienceClient(conf) self.container_client = ContainerClient(self.conf) self.blob_client = BlobClient() self.session = requests.Session() self.content_id = self.metadata["id"] self.stgpol_name = self.metadata["policy"] self.path = self.metadata["name"] self.length = int(self.metadata["length"]) self.version = self.metadata["version"] self.hash = self.metadata["hash"] self.mime_type = self.metadata["mime-type"] self.chunk_method = self.metadata["chunk-method"] def _meta2_get_spare_chunk(self, chunks_notin, chunks_broken): spare_data = { "notin": ChunksHelper(chunks_notin, False).raw(), "broken": ChunksHelper(chunks_broken, False).raw() } try: spare_resp = self.container_client.content_spare( cid=self.container_id, content=self.content_id, data=spare_data, stgpol=self.stgpol_name) except ClientException as e: raise exc.SpareChunkException("No spare chunk (%s)" % e.message) url_list = [] for c in spare_resp["chunks"]: url_list.append(c["id"]) return url_list def _meta2_update_spare_chunk(self, current_chunk, new_url): old = [{'type': 'chunk', 'id': current_chunk.url, 'hash': current_chunk.hash, 'size': current_chunk.size, 'pos': current_chunk.pos, 'content': self.content_id}] new = [{'type': 'chunk', 'id': new_url, 'hash': current_chunk.hash, 'size': current_chunk.size, 'pos': current_chunk.pos, 'content': self.content_id}] update_data = {'old': old, 'new': new} self.container_client.container_raw_update( cid=self.container_id, data=update_data) def _meta2_create_object(self): self.container_client.content_create(cid=self.container_id, path=self.path, content_id=self.content_id, stgpol=self.stgpol_name, size=self.length, checksum=self.hash, version=self.version, chunk_method=self.chunk_method, mime_type=self.mime_type, data=self.chunks.raw()) def rebuild_chunk(self, chunk_id): raise NotImplementedError() def upload(self, stream): try: self._upload(stream) except: # Keep the stack trace exc_info = sys.exc_info() for chunk in self.chunks: try: self.blob_client.chunk_delete(chunk.url) except: self.logger.warn("Failed to delete %s", chunk.url) # Raise with the original stack trace raise exc_info[0], exc_info[1], exc_info[2] def _upload(self, stream): raise NotImplementedError() def download(self): raise NotImplementedError() def delete(self): self.container_client.content_delete(cid=self.container_id, path=self.path) def move_chunk(self, chunk_id): current_chunk = self.chunks.filter(id=chunk_id).one() if current_chunk is None: raise OrphanChunk("Chunk not found in content") other_chunks = self.chunks.filter( metapos=current_chunk.metapos).exclude(id=chunk_id).all() spare_urls = self._meta2_get_spare_chunk(other_chunks, [current_chunk]) self.logger.debug("copy chunk from %s to %s", current_chunk.url, spare_urls[0]) self.blob_client.chunk_copy(current_chunk.url, spare_urls[0]) self._meta2_update_spare_chunk(current_chunk, spare_urls[0]) try: self.blob_client.chunk_delete(current_chunk.url) except: self.logger.warn("Failed to delete chunk %s" % current_chunk.url) current_chunk.url = spare_urls[0] return current_chunk.raw()
class Content(object): def __init__(self, conf, container_id, metadata, chunks, stgpol_args): self.conf = conf self.container_id = container_id self.metadata = metadata self.chunks = ChunksHelper(chunks) self.stgpol_args = stgpol_args self.logger = get_logger(self.conf) self.cs_client = ConscienceClient(conf) self.container_client = ContainerClient(self.conf) self.blob_client = BlobClient() self.session = requests.Session() self.content_id = self.metadata["id"] self.stgpol_name = self.metadata["policy"] self.path = self.metadata["name"] self.length = int(self.metadata["length"]) self.version = self.metadata["version"] self.hash = self.metadata["hash"] self.mime_type = self.metadata["mime-type"] self.chunk_method = self.metadata["chunk-method"] def _meta2_get_spare_chunk(self, chunks_notin, chunks_broken): spare_data = { "notin": ChunksHelper(chunks_notin, False).raw(), "broken": ChunksHelper(chunks_broken, False).raw() } try: spare_resp = self.container_client.content_spare( cid=self.container_id, content=self.content_id, data=spare_data, stgpol=self.stgpol_name) except ClientException as e: raise exc.SpareChunkException("No spare chunk (%s)" % e.message) url_list = [] for c in spare_resp["chunks"]: url_list.append(c["id"]) return url_list def _meta2_update_spare_chunk(self, current_chunk, new_url): old = [{ 'type': 'chunk', 'id': current_chunk.url, 'hash': current_chunk.hash, 'size': current_chunk.size, 'pos': current_chunk.pos, 'content': self.content_id }] new = [{ 'type': 'chunk', 'id': new_url, 'hash': current_chunk.hash, 'size': current_chunk.size, 'pos': current_chunk.pos, 'content': self.content_id }] update_data = {'old': old, 'new': new} self.container_client.container_raw_update(cid=self.container_id, data=update_data) def _meta2_create_object(self): self.container_client.content_create(cid=self.container_id, path=self.path, content_id=self.content_id, stgpol=self.stgpol_name, size=self.length, checksum=self.hash, version=self.version, chunk_method=self.chunk_method, mime_type=self.mime_type, data=self.chunks.raw()) def rebuild_chunk(self, chunk_id): raise NotImplementedError() def upload(self, stream): try: self._upload(stream) except Exception as e: for chunk in self.chunks: try: self.blob_client.chunk_delete(chunk.url) except: pass raise e def _upload(self, stream): raise NotImplementedError() def download(self): raise NotImplementedError()
class Content(object): def __init__(self, conf, container_id, metadata, chunks, storage_method, account, container_name, container_client=None): self.conf = conf self.container_id = container_id self.metadata = metadata self.chunks = ChunksHelper(chunks) self.storage_method = storage_method self.logger = get_logger(self.conf) self.blob_client = BlobClient() self.container_client = (container_client or ContainerClient(self.conf, logger=self.logger)) # FIXME: all these may be properties self.content_id = self.metadata["id"] self.path = self.metadata["name"] self.length = int(self.metadata["length"]) self.version = self.metadata["version"] self.checksum = self.metadata["hash"] self.chunk_method = self.metadata["chunk_method"] self.account = account self.container_name = container_name if 'full_path' in self.metadata: self.full_path = metadata['full_path'] else: self.full_path = [ '{0}/{1}/{2}/{3}'.format(quote_plus(self.account), quote_plus(self.container_name), quote_plus(self.path), self.version) ] @property def mime_type(self): return self.metadata["mime_type"] @mime_type.setter def mime_type(self, value): self.metadata["mime_type"] = value @property def policy(self): return self.metadata["policy"] @policy.setter def policy(self, value): self.metadata["policy"] = value @property def properties(self): return self.metadata.get('properties') @properties.setter def properties(self, value): if not isinstance(value, dict): raise ValueError("'value' must be a dict") self.metadata['properties'] = value def _get_spare_chunk(self, chunks_notin, chunks_broken): spare_data = { "notin": ChunksHelper(chunks_notin, False).raw(), "broken": ChunksHelper(chunks_broken, False).raw() } try: spare_resp = self.container_client.content_spare( cid=self.container_id, path=self.content_id, data=spare_data, stgpol=self.policy) except ClientException as e: raise exc.SpareChunkException("No spare chunk (%s)" % e.message) url_list = [] for c in spare_resp["chunks"]: url_list.append(c["id"]) return url_list def _add_raw_chunk(self, current_chunk, url): data = { 'type': 'chunk', 'id': url, 'hash': current_chunk.checksum, 'size': current_chunk.size, 'pos': current_chunk.pos, 'content': self.content_id } self.container_client.container_raw_insert(data, cid=self.container_id) def _update_spare_chunk(self, current_chunk, new_url): old = { 'type': 'chunk', 'id': current_chunk.url, 'hash': current_chunk.checksum, 'size': current_chunk.size, 'pos': current_chunk.pos, 'content': self.content_id } new = { 'type': 'chunk', 'id': new_url, 'hash': current_chunk.checksum, 'size': current_chunk.size, 'pos': current_chunk.pos, 'content': self.content_id } self.container_client.container_raw_update(old, new, cid=self.container_id) def _generate_sysmeta(self): sysmeta = dict() sysmeta['id'] = self.content_id sysmeta['version'] = self.version sysmeta['policy'] = self.policy sysmeta['mime_type'] = self.mime_type sysmeta['chunk_method'] = self.chunk_method sysmeta['chunk_size'] = self.metadata['chunk_size'] sysmeta['oio_version'] = OIO_VERSION sysmeta['full_path'] = self.full_path sysmeta['content_path'] = self.path sysmeta['container_id'] = self.container_id return sysmeta def _create_object(self, **kwargs): data = {'chunks': self.chunks.raw(), 'properties': self.properties} self.container_client.content_create(cid=self.container_id, path=self.path, content_id=self.content_id, stgpol=self.policy, size=self.length, checksum=self.checksum, version=self.version, chunk_method=self.chunk_method, mime_type=self.mime_type, data=data, **kwargs) def rebuild_chunk(self, chunk_id, allow_same_rawx=False, chunk_pos=None): raise NotImplementedError() def create(self, stream, **kwargs): raise NotImplementedError() def fetch(self): raise NotImplementedError() def delete(self, **kwargs): self.container_client.content_delete(cid=self.container_id, path=self.path, **kwargs) def move_chunk(self, chunk_id): current_chunk = self.chunks.filter(id=chunk_id).one() if current_chunk is None: raise OrphanChunk("Chunk not found in content") other_chunks = self.chunks.filter( metapos=current_chunk.metapos).exclude(id=chunk_id).all() spare_urls = self._get_spare_chunk(other_chunks, [current_chunk]) self.logger.debug("copy chunk from %s to %s", current_chunk.url, spare_urls[0]) self.blob_client.chunk_copy(current_chunk.url, spare_urls[0]) self._update_spare_chunk(current_chunk, spare_urls[0]) try: self.blob_client.chunk_delete(current_chunk.url) except: self.logger.warn("Failed to delete chunk %s" % current_chunk.url) current_chunk.url = spare_urls[0] return current_chunk.raw()
class TestRainContent(BaseTestCase): def setUp(self): super(TestRainContent, self).setUp() if len(self.conf['rawx']) < 12: self.skipTest("Not enough rawx. " "Rain tests needs more than 12 rawx to run") self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient() self.container_name = "TestRainContent%f" % time.time() self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() def tearDown(self): super(TestRainContent, self).tearDown() def _test_upload(self, data_size): data = random_data(data_size) content = self.content_factory.new(self.container_id, "titi", len(data), "RAIN") k = 6 m = 2 self.assertEqual(type(content), RainContent) content.upload(StringIO.StringIO(data)) meta, chunks = self.container_client.content_show( cid=self.container_id, content=content.content_id) chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], "RAIN") self.assertEqual(meta['name'], "titi") metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) if metachunk_nb == 0: metachunk_nb = 1 # special case for empty content nb_chunks_min = metachunk_nb * (k + m) - (k - 1) nb_chunks_max = metachunk_nb * (k + m) self.assertEquals(len(chunks) >= nb_chunks_min, True) self.assertEquals(len(chunks) <= nb_chunks_max, True) for metapos in range(metachunk_nb): chunks_at_pos = content.chunks.filter(metapos=metapos) data_chunks_at_pos = chunks_at_pos.filter(is_parity=False) parity_chunks_at_pos = chunks_at_pos.filter(is_parity=True) if metapos < metachunk_nb - 1: self.assertEqual(len(data_chunks_at_pos), k) else: self.assertEquals(len(data_chunks_at_pos) >= 1, True) self.assertEquals(len(data_chunks_at_pos) <= k, True) self.assertEqual(len(parity_chunks_at_pos), m) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(md5_stream(stream), chunk.hash) self.assertEqual(meta['content_size'], str(len(data))) self.assertEqual(meta['content_path'], "titi") self.assertEqual(meta['content_cid'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], chunk.pos) self.assertEqual(meta['chunk_hash'], chunk.hash) data_begin = metapos * self.chunk_size data_end = metapos * self.chunk_size + self.chunk_size target_metachunk_hash = md5_data(data[data_begin:data_end]) metachunk_hash = hashlib.md5() for chunk in data_chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) for d in stream: metachunk_hash.update(d) self.assertEqual(metachunk_hash.hexdigest().upper(), target_metachunk_hash) def test_upload_0_byte(self): self._test_upload(0) def test_upload_1_byte(self): self._test_upload(1) def test_upload_chunksize_bytes(self): self._test_upload(self.chunk_size) def test_upload_chunksize_plus_1_bytes(self): self._test_upload(self.chunk_size + 1) def test_chunks_cleanup_when_upload_failed(self): data = random_data(2 * self.chunk_size) content = self.content_factory.new(self.container_id, "titi", len(data), "RAIN") self.assertEqual(type(content), RainContent) # set bad url for position 1 for chunk in content.chunks.filter(pos="1.p0"): chunk.url = "http://127.0.0.1:9/DEADBEEF" self.assertRaises(Exception, content.upload, StringIO.StringIO(data)) for chunk in content.chunks.exclude(pos="1.p0"): self.assertRaises(NotFound, self.blob_client.chunk_head, chunk.url) def _test_rebuild(self, data_size, broken_pos_list): data = os.urandom(data_size) old_content = self.content_factory.new(self.container_id, "titi", len(data), "RAIN") self.assertEqual(type(old_content), RainContent) old_content.upload(StringIO.StringIO(data)) # get the new structure of the uploaded content uploaded_content = self.content_factory.get(self.container_id, old_content.content_id) old_info = {} for pos in broken_pos_list: old_info[pos] = {} c = uploaded_content.chunks.filter(pos=pos)[0] old_info[pos]["url"] = c.url old_info[pos]["id"] = c.id old_info[pos]["hash"] = c.hash chunk_id_to_rebuild = c.id meta, stream = self.blob_client.chunk_get(c.url) old_info[pos]["dl_meta"] = meta old_info[pos]["dl_hash"] = md5_stream(stream) # delete the chunk self.blob_client.chunk_delete(c.url) # rebuild the broken chunks uploaded_content.rebuild_chunk(chunk_id_to_rebuild) # get the new structure of the content rebuilt_content = self.content_factory.get(self.container_id, uploaded_content.content_id) self.assertEqual(type(rebuilt_content), RainContent) for pos in broken_pos_list: c = rebuilt_content.chunks.filter(pos=pos)[0] rebuilt_meta, rebuilt_stream = self.blob_client.chunk_get(c.url) self.assertEqual(rebuilt_meta["chunk_id"], c.id) self.assertEqual(md5_stream(rebuilt_stream), old_info[pos]["dl_hash"]) self.assertEqual(c.hash, old_info[pos]["hash"]) self.assertThat(c.url, NotEquals(old_info[pos]["url"])) del old_info[pos]["dl_meta"]["chunk_id"] del rebuilt_meta["chunk_id"] self.assertEqual(rebuilt_meta, old_info[pos]["dl_meta"]) def test_content_0_byte_rebuild_pos_0_0(self): self._test_rebuild(0, ["0.0"]) def test_content_0_byte_rebuild_pos_0_0_and_0_p0(self): self._test_rebuild(0, ["0.0", "0.p0"]) def test_content_1_byte_rebuild_pos_0_0(self): self._test_rebuild(1, ["0.0"]) def test_content_1_byte_rebuild_pos_0_p0(self): self._test_rebuild(1, ["0.p0"]) def test_content_1_byte_rebuild_pos_0_0_and_0_p0(self): self._test_rebuild(1, ["0.0", "0.p0"]) def test_content_chunksize_bytes_rebuild_pos_0_0(self): self._test_rebuild(self.conf["chunk_size"], ["0.0"]) def test_content_chunksize_bytes_rebuild_pos_0_0_and_0_1(self): self._test_rebuild(self.conf["chunk_size"], ["0.0", "0.1"]) def test_content_chunksize_bytes_rebuild_pos_0_0_and_0_p0(self): self._test_rebuild(self.conf["chunk_size"], ["0.0", "0.p0"]) def test_content_chunksize_bytes_rebuild_pos_0_p0_and_0_p1(self): self._test_rebuild(self.conf["chunk_size"], ["0.p0", "0.p1"]) def test_content_chunksize_bytes_rebuild_more_than_k_chunk(self): self.assertRaises(UnrecoverableContent, self._test_rebuild, self.conf["chunk_size"], ["0.0", "0.1", "0.2"]) def _new_content(self, data, broken_pos_list=[]): old_content = self.content_factory.new(self.container_id, "titi", len(data), "RAIN") self.assertEqual(type(old_content), RainContent) old_content.upload(StringIO.StringIO(data)) for pos in broken_pos_list: c = old_content.chunks.filter(pos=pos)[0] self.blob_client.chunk_delete(c.url) # get the new structure of the uploaded content return self.content_factory.get(self.container_id, old_content.content_id) def test_orphan_chunk(self): content = self._new_content(random_data(10)) self.assertRaises(OrphanChunk, content.rebuild_chunk, "uNkNoWnId") def test_rebuild_on_the_fly(self): data = random_data(self.conf["chunk_size"]) content = self._new_content(data, ["0.0", "0.p0"]) stream = content.rebuild_metachunk("0", on_the_fly=True) dl_data = "".join(stream) self.assertEqual(dl_data, data) del_chunk_0_0 = content.chunks.filter(pos="0.0")[0] del_chunk_0_p0 = content.chunks.filter(pos="0.p0")[0] self.assertRaises(NotFound, self.blob_client.chunk_get, del_chunk_0_0.url) self.assertRaises(NotFound, self.blob_client.chunk_get, del_chunk_0_p0.url) def _test_download(self, data_size, broken_pos_list): data = random_data(data_size) content = self._new_content(data, broken_pos_list) downloaded_data = "".join(content.download()) self.assertEqual(downloaded_data, data) for pos in broken_pos_list: c = content.chunks.filter(pos=pos)[0] self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url) def test_download_content_0_byte_without_broken_chunks(self): self._test_download(0, []) def test_download_content_1_byte_without_broken_chunks(self): self._test_download(1, []) def test_download_content_chunksize_bytes_without_broken_chunks(self): self._test_download(self.conf["chunk_size"], []) def test_download_content_chunksize_plus_1_without_broken_chunks(self): self._test_download(self.conf["chunk_size"] + 1, []) def test_download_content_0_byte_with_broken_0_0_and_0_p0(self): self._test_download(0, ["0.0", "0.p0"]) def test_download_content_1_byte_with_broken_0_0_and_0_p0(self): self._test_download(1, ["0.0", "0.p0"]) def test_download_content_2xchunksize_with_broken_0_2_and_1_0(self): self._test_download(2 * self.conf["chunk_size"], ["0.2", "1.0"]) def test_download_content_chunksize_bytes_with_3_broken_chunks(self): data = random_data(self.conf["chunk_size"]) content = self._new_content(data, ["0.0", "0.1", "0.2"]) gen = content.download() self.assertRaises(UnrecoverableContent, gen.next) def test_download_interrupt_close(self): data = random_data(self.conf["chunk_size"]) content = self._new_content(data, ["0.p0"]) download_iter = content.download() self.assertEqual(download_iter.next(), data[0:READ_CHUNK_SIZE - 1]) download_iter.close()
class TestRainContent(BaseTestCase): def setUp(self): super(TestRainContent, self).setUp() if len(self.conf['rawx']) < 12: self.skipTest("Not enough rawx. " "Rain tests needs more than 12 rawx to run") self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient() self.container_name = "TestRainContent%f" % time.time() self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() def tearDown(self): super(TestRainContent, self).tearDown() def _test_upload(self, data_size): data = random_data(data_size) content = self.content_factory.new(self.container_id, "titi", len(data), "RAIN") k = 6 m = 2 self.assertEqual(type(content), RainContent) content.upload(StringIO.StringIO(data)) meta, chunks = self.container_client.content_show( cid=self.container_id, content=content.content_id) chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], "RAIN") self.assertEqual(meta['name'], "titi") metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) if metachunk_nb == 0: metachunk_nb = 1 # special case for empty content nb_chunks_min = metachunk_nb * (1 + m) nb_chunks_max = metachunk_nb * (k + m) self.assertGreaterEqual(len(chunks), nb_chunks_min) self.assertLessEqual(len(chunks), nb_chunks_max) for metapos in range(metachunk_nb): chunks_at_pos = content.chunks.filter(metapos=metapos) data_chunks_at_pos = chunks_at_pos.filter(is_parity=False) parity_chunks_at_pos = chunks_at_pos.filter(is_parity=True) self.assertEquals(len(data_chunks_at_pos) >= 1, True) self.assertEquals(len(data_chunks_at_pos) <= k, True) self.assertEqual(len(parity_chunks_at_pos), m) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(md5_stream(stream), chunk.hash) self.assertEqual(meta['content_size'], str(len(data))) self.assertEqual(meta['content_path'], "titi") self.assertEqual(meta['content_cid'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], chunk.pos) self.assertEqual(meta['chunk_hash'], chunk.hash) data_begin = metapos * self.chunk_size data_end = metapos * self.chunk_size + self.chunk_size target_metachunk_hash = md5_data(data[data_begin:data_end]) metachunk_hash = hashlib.md5() for chunk in data_chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) for d in stream: metachunk_hash.update(d) self.assertEqual(metachunk_hash.hexdigest().upper(), target_metachunk_hash) def test_upload_0_byte(self): self._test_upload(0) def test_upload_1_byte(self): self._test_upload(1) def test_upload_chunksize_bytes(self): self._test_upload(self.chunk_size) def test_upload_chunksize_plus_1_bytes(self): self._test_upload(self.chunk_size + 1) def test_chunks_cleanup_when_upload_failed(self): data = random_data(2 * self.chunk_size) content = self.content_factory.new(self.container_id, "titi", len(data), "RAIN") self.assertEqual(type(content), RainContent) # set bad url for position 1 for chunk in content.chunks.filter(pos="1.p0"): chunk.url = "http://127.0.0.1:9/DEADBEEF" self.assertRaises(Exception, content.upload, StringIO.StringIO(data)) for chunk in content.chunks.exclude(pos="1.p0"): self.assertRaises(NotFound, self.blob_client.chunk_head, chunk.url) def _test_rebuild(self, data_size, broken_pos_list): data = os.urandom(data_size) old_content = self.content_factory.new(self.container_id, "titi", len(data), "RAIN") self.assertEqual(type(old_content), RainContent) old_content.upload(StringIO.StringIO(data)) # get the new structure of the uploaded content uploaded_content = self.content_factory.get(self.container_id, old_content.content_id) old_info = {} for pos in broken_pos_list: old_info[pos] = {} c = uploaded_content.chunks.filter(pos=pos)[0] old_info[pos]["url"] = c.url old_info[pos]["id"] = c.id old_info[pos]["hash"] = c.hash chunk_id_to_rebuild = c.id meta, stream = self.blob_client.chunk_get(c.url) old_info[pos]["dl_meta"] = meta old_info[pos]["dl_hash"] = md5_stream(stream) # delete the chunk self.blob_client.chunk_delete(c.url) # rebuild the broken chunks uploaded_content.rebuild_chunk(chunk_id_to_rebuild) # get the new structure of the content rebuilt_content = self.content_factory.get(self.container_id, uploaded_content.content_id) self.assertEqual(type(rebuilt_content), RainContent) for pos in broken_pos_list: c = rebuilt_content.chunks.filter(pos=pos)[0] rebuilt_meta, rebuilt_stream = self.blob_client.chunk_get(c.url) self.assertEqual(rebuilt_meta["chunk_id"], c.id) self.assertEqual(md5_stream(rebuilt_stream), old_info[pos]["dl_hash"]) self.assertEqual(c.hash, old_info[pos]["hash"]) self.assertThat(c.url, NotEquals(old_info[pos]["url"])) del old_info[pos]["dl_meta"]["chunk_id"] del rebuilt_meta["chunk_id"] self.assertEqual(rebuilt_meta, old_info[pos]["dl_meta"]) def test_content_0_byte_rebuild_pos_0_0(self): self._test_rebuild(0, ["0.0"]) def test_content_0_byte_rebuild_pos_0_0_and_0_p0(self): self._test_rebuild(0, ["0.0", "0.p0"]) def test_content_1_byte_rebuild_pos_0_0(self): self._test_rebuild(1, ["0.0"]) def test_content_1_byte_rebuild_pos_0_p0(self): self._test_rebuild(1, ["0.p0"]) def test_content_1_byte_rebuild_pos_0_0_and_0_p0(self): self._test_rebuild(1, ["0.0", "0.p0"]) def test_content_chunksize_bytes_rebuild_pos_0_0(self): self._test_rebuild(self.conf["chunk_size"], ["0.0"]) def test_content_chunksize_bytes_rebuild_pos_0_0_and_0_1(self): self._test_rebuild(self.conf["chunk_size"], ["0.0", "0.1"]) def test_content_chunksize_bytes_rebuild_pos_0_0_and_0_p0(self): self._test_rebuild(self.conf["chunk_size"], ["0.0", "0.p0"]) def test_content_chunksize_bytes_rebuild_pos_0_p0_and_0_p1(self): self._test_rebuild(self.conf["chunk_size"], ["0.p0", "0.p1"]) def test_content_chunksize_bytes_rebuild_more_than_k_chunk(self): self.assertRaises(UnrecoverableContent, self._test_rebuild, self.conf["chunk_size"], ["0.0", "0.1", "0.2"]) def _new_content(self, data, broken_pos_list=[]): old_content = self.content_factory.new(self.container_id, "titi", len(data), "RAIN") self.assertEqual(type(old_content), RainContent) old_content.upload(StringIO.StringIO(data)) for pos in broken_pos_list: c = old_content.chunks.filter(pos=pos)[0] self.blob_client.chunk_delete(c.url) # get the new structure of the uploaded content return self.content_factory.get(self.container_id, old_content.content_id) def test_orphan_chunk(self): content = self._new_content(random_data(10)) self.assertRaises(OrphanChunk, content.rebuild_chunk, "uNkNoWnId") def test_rebuild_on_the_fly(self): data = random_data(self.conf["chunk_size"]) content = self._new_content(data, ["0.0", "0.p0"]) stream = content.rebuild_metachunk("0", on_the_fly=True) dl_data = "".join(stream) self.assertEqual(dl_data, data) del_chunk_0_0 = content.chunks.filter(pos="0.0")[0] del_chunk_0_p0 = content.chunks.filter(pos="0.p0")[0] self.assertRaises(NotFound, self.blob_client.chunk_get, del_chunk_0_0.url) self.assertRaises(NotFound, self.blob_client.chunk_get, del_chunk_0_p0.url) def _test_download(self, data_size, broken_pos_list): data = random_data(data_size) content = self._new_content(data, broken_pos_list) downloaded_data = "".join(content.download()) self.assertEqual(downloaded_data, data) for pos in broken_pos_list: c = content.chunks.filter(pos=pos)[0] self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url) def test_download_content_0_byte_without_broken_chunks(self): self._test_download(0, []) def test_download_content_1_byte_without_broken_chunks(self): self._test_download(1, []) def test_download_content_chunksize_bytes_without_broken_chunks(self): self._test_download(self.conf["chunk_size"], []) def test_download_content_chunksize_plus_1_without_broken_chunks(self): self._test_download(self.conf["chunk_size"] + 1, []) def test_download_content_0_byte_with_broken_0_0_and_0_p0(self): self._test_download(0, ["0.0", "0.p0"]) def test_download_content_1_byte_with_broken_0_0_and_0_p0(self): self._test_download(1, ["0.0", "0.p0"]) def test_download_content_2xchunksize_with_broken_0_2_and_1_0(self): self._test_download(2 * self.conf["chunk_size"], ["0.2", "1.0"]) def test_download_content_chunksize_bytes_with_3_broken_chunks(self): data = random_data(self.conf["chunk_size"]) content = self._new_content(data, ["0.0", "0.1", "0.2"]) gen = content.download() self.assertRaises(UnrecoverableContent, gen.next) def test_download_interrupt_close(self): data = random_data(self.conf["chunk_size"]) content = self._new_content(data, ["0.p0"]) download_iter = content.download() dl_data = "" for buf in download_iter: dl_data += buf self.assertEqual(len(dl_data), len(data)) self.assertEqual(dl_data, data) download_iter.close()
class TestECContent(BaseTestCase): def setUp(self): super(TestECContent, self).setUp() if len(self.conf['services']['rawx']) < 12: self.skipTest("Not enough rawx. " "EC tests needs at least 12 rawx to run") self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient() self.container_name = "TestECContent%f" % time.time() self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() self.content = random_str(64) self.stgpol = "EC" self.size = 1024*1024 + 320 self.k = 6 self.m = 3 def tearDown(self): super(TestECContent, self).tearDown() def random_chunks(self, nb): l = random.sample(xrange(self.k + self.m), nb) return ["0.%s" % i for i in l] def _test_create(self, data_size): # generate random test data data = random_data(data_size) # using factory create new EC content content = self.content_factory.new( self.container_id, self.content, len(data), self.stgpol) # verify the factory gave us an ECContent self.assertEqual(type(content), ECContent) # perform the content creation content.create(StringIO(data)) meta, chunks = self.container_client.content_show( cid=self.container_id, content=content.content_id) # verify metadata chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], self.stgpol) self.assertEqual(meta['name'], self.content) metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) \ if len(data) != 0 else 1 # verify each metachunk for metapos in range(metachunk_nb): chunks_at_pos = content.chunks.filter(metapos=metapos) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(meta['metachunk_size'], str(chunk.size)) self.assertEqual(meta['metachunk_hash'], chunk.checksum) self.assertEqual(meta['content_path'], self.content) self.assertEqual(meta['container_id'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], chunk.pos) self.assertEqual(meta['chunk_hash'], md5_stream(stream)) def test_create_0_byte(self): self._test_create(0) def test_create_1_byte(self): self._test_create(1) def test_create(self): self._test_create(DAT_LEGIT_SIZE) def _test_rebuild(self, data_size, broken_pos_list): # generate test data data = os.urandom(data_size) # create initial content old_content = self.content_factory.new( self.container_id, self.content, len(data), self.stgpol) # verify factory work as intended self.assertEqual(type(old_content), ECContent) # perform initial content creation old_content.create(StringIO(data)) uploaded_content = self.content_factory.get(self.container_id, old_content.content_id) # break the content old_info = {} for pos in broken_pos_list: old_info[pos] = {} c = uploaded_content.chunks.filter(pos=pos)[0] old_info[pos]["url"] = c.url old_info[pos]["id"] = c.id old_info[pos]["hash"] = c.checksum chunk_id_to_rebuild = c.id meta, stream = self.blob_client.chunk_get(c.url) old_info[pos]["dl_meta"] = meta old_info[pos]["dl_hash"] = md5_stream(stream) # delete the chunk self.blob_client.chunk_delete(c.url) # rebuild the broken chunks uploaded_content.rebuild_chunk(chunk_id_to_rebuild) rebuilt_content = self.content_factory.get(self.container_id, uploaded_content.content_id) # sanity check self.assertEqual(type(rebuilt_content), ECContent) # verify rebuild result for pos in broken_pos_list: c = rebuilt_content.chunks.filter(pos=pos)[0] rebuilt_meta, rebuilt_stream = self.blob_client.chunk_get(c.url) self.assertEqual(rebuilt_meta["chunk_id"], c.id) self.assertEqual(md5_stream(rebuilt_stream), old_info[pos]["dl_hash"]) self.assertEqual(c.checksum, old_info[pos]["hash"]) self.assertNotEqual(c.url, old_info[pos]["url"]) del old_info[pos]["dl_meta"]["chunk_id"] del rebuilt_meta["chunk_id"] self.assertEqual(rebuilt_meta, old_info[pos]["dl_meta"]) def test_content_0_byte_rebuild(self): self._test_rebuild(0, self.random_chunks(1)) def test_content_0_byte_rebuild_advanced(self): self._test_rebuild(0, self.random_chunks(3)) def test_content_1_byte_rebuild(self): self._test_rebuild(1, self.random_chunks(1)) def test_content_1_byte_rebuild_advanced(self): self._test_rebuild(1, self.random_chunks(3)) def test_content_rebuild(self): self._test_rebuild(DAT_LEGIT_SIZE, self.random_chunks(1)) def test_content_rebuild_advanced(self): self._test_rebuild(DAT_LEGIT_SIZE, self.random_chunks(3)) def test_content_rebuild_unrecoverable(self): self.assertRaises( UnrecoverableContent, self._test_rebuild, DAT_LEGIT_SIZE, self.random_chunks(4)) def _new_content(self, data, broken_pos_list=[]): old_content = self.content_factory.new( self.container_id, self.content, len(data), self.stgpol) self.assertEqual(type(old_content), ECContent) old_content.create(StringIO(data)) # break content for pos in broken_pos_list: c = old_content.chunks.filter(pos=pos)[0] self.blob_client.chunk_delete(c.url) # get the new structure of the uploaded content return self.content_factory.get(self.container_id, old_content.content_id) def test_orphan_chunk(self): content = self._new_content(random_data(10)) self.assertRaises(OrphanChunk, content.rebuild_chunk, "invalid") def _test_fetch(self, data_size, broken_pos_list=None): broken_pos_list = broken_pos_list or [] test_data = random_data(data_size) content = self._new_content(test_data, broken_pos_list) data = "".join(content.fetch()) self.assertEqual(len(data), len(test_data)) self.assertEqual(md5_data(data), md5_data(test_data)) # verify that chunks are broken for pos in broken_pos_list: chunk = content.chunks.filter(pos=pos)[0] self.assertRaises( NotFound, self.blob_client.chunk_delete, chunk.url) def test_fetch_content_0_byte(self): self._test_fetch(0) def test_fetch_content_1_byte(self): self._test_fetch(1) def test_fetch_content(self): self._test_fetch(DAT_LEGIT_SIZE) def test_fetch_content_0_byte_broken(self): self._test_fetch(0, self.random_chunks(3)) def test_fetch_content_1_byte_broken(self): self._test_fetch(1, self.random_chunks(3)) def test_fetch_content_broken(self): self._test_fetch(DAT_LEGIT_SIZE, self.random_chunks(3)) def test_fetch_content_unrecoverable(self): broken_chunks = self.random_chunks(4) self.assertRaises( OioException, self._test_fetch, DAT_LEGIT_SIZE, broken_chunks)