class TestBlobMover(BaseTestCase): def setUp(self): super(TestBlobMover, self).setUp() self.container = random_str(16) self.cid = cid_from_name(self.account, self.container) self.path = random_str(16) self.api = ObjectStorageApi(self.ns) self.blob_client = BlobClient(self.conf) self.api.container_create(self.account, self.container) _, chunks = self.api.container.content_prepare(self.account, self.container, self.path, size=1) services = self.conscience.all_services('rawx') if len(chunks) >= len([s for s in services if s['score'] > 0]): self.skipTest("need at least %d rawx to run" % (len(chunks) + 1)) self.rawx_volumes = dict() for rawx in services: tags = rawx['tags'] service_id = tags.get('tag.service_id', None) if service_id is None: service_id = rawx['addr'] volume = tags.get('tag.vol', None) self.rawx_volumes[service_id] = volume self.api.object_create(self.account, self.container, obj_name=self.path, data="chunk") meta, self.chunks = self.api.object_locate(self.account, self.container, self.path) self.version = meta['version'] self.content_id = meta['id'] self.chunk_method = meta['chunk_method'] def _chunk_path(self, chunk): url = chunk['url'] volume_id = url.split('/', 3)[2] chunk_id = url.split('/', 3)[3] volume = self.rawx_volumes[volume_id] return volume + '/' + chunk_id[:3] + '/' + chunk_id def test_move_old_chunk(self): for chunk in self.chunks: convert_to_old_chunk(self._chunk_path(chunk), self.account, self.container, self.path, self.version, self.content_id) orig_chunk = random.choice(self.chunks) chunk_volume = orig_chunk['url'].split('/')[2] chunk_id = orig_chunk['url'].split('/')[3] chunk_headers, chunk_stream = self.blob_client.chunk_get( orig_chunk['url'], check_headers=False) chunks_kept = list(self.chunks) chunks_kept.remove(orig_chunk) mover = BlobMoverWorker(self.conf, None, self.rawx_volumes[chunk_volume]) mover.chunk_move(self._chunk_path(orig_chunk), chunk_id) _, new_chunks = self.api.object_locate(self.account, self.container, self.path) new_chunk = list(new_chunks) self.assertEqual(len(new_chunks), len(chunks_kept) + 1) url_kept = [c['url'] for c in chunks_kept] new_chunk = None for chunk in new_chunks: if chunk['url'] not in url_kept: self.assertIsNone(new_chunk) new_chunk = chunk self.assertNotEqual(orig_chunk['real_url'], new_chunk['real_url']) self.assertNotEqual(orig_chunk['url'], new_chunk['url']) self.assertEqual(orig_chunk['pos'], new_chunk['pos']) self.assertEqual(orig_chunk['size'], new_chunk['size']) self.assertEqual(orig_chunk['hash'], new_chunk['hash']) new_chunk_headers, new_chunk_stream = self.blob_client.chunk_get( new_chunk['url']) chunk_data = b''.join(chunk_stream) new_chunk_data = b''.join(new_chunk_stream) self.assertEqual(chunk_data, new_chunk_data) fullpath = encode_fullpath(self.account, self.container, self.path, self.version, self.content_id) self.assertEqual(fullpath, new_chunk_headers['full_path']) del new_chunk_headers['full_path'] self.assertNotEqual(chunk_headers['chunk_id'], new_chunk_headers['chunk_id']) new_chunk_id = new_chunk['url'].split('/')[3] self.assertEqual(new_chunk_id, new_chunk_headers['chunk_id']) del chunk_headers['chunk_id'] del new_chunk_headers['chunk_id'] self.assertEqual(OIO_VERSION, new_chunk_headers['oio_version']) del chunk_headers['oio_version'] del new_chunk_headers['oio_version'] self.assertEqual(chunk_headers, new_chunk_headers) def test_move_with_wrong_size(self): if not self.chunk_method.startswith('ec'): self.skipTest('Only works with EC') orig_chunk = random.choice(self.chunks) chunk_volume = orig_chunk['url'].split('/')[2] chunk_id = orig_chunk['url'].split('/')[3] mover = BlobMoverWorker(self.conf, None, self.rawx_volumes[chunk_volume]) meta, stream = mover.blob_client.chunk_get(orig_chunk['url']) data = stream.read() stream.close() data = data[:-1] del meta['chunk_hash'] wrong_stream = GeneratorIO(data) mover.blob_client.chunk_get = Mock(return_value=(meta, wrong_stream)) self.assertRaises(ChunkException, mover.chunk_move, self._chunk_path(orig_chunk), chunk_id)
class TestDupContent(BaseTestCase): def setUp(self): super(TestDupContent, self).setUp() if len(self.conf['rawx']) < 3: self.skipTest("Not enough rawx. " "Dup tests needs more than 2 rawx to run") self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient() self.container_name = "TestDupContent%f" % time.time() self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() def tearDown(self): super(TestDupContent, self).tearDown() def _test_upload(self, stgpol, data_size): data = random_data(data_size) content = self.content_factory.new(self.container_id, "titi", len(data), stgpol) self.assertEqual(type(content), DupContent) content.upload(StringIO.StringIO(data)) meta, chunks = self.container_client.content_show( cid=self.container_id, content=content.content_id) chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], stgpol) self.assertEqual(meta['name'], "titi") metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) if metachunk_nb == 0: metachunk_nb = 1 # special case for empty content if stgpol == "THREECOPIES": nb_copy = 3 elif stgpol == "TWOCOPIES": nb_copy = 2 elif stgpol == "SINGLE": nb_copy = 1 self.assertEqual(len(chunks), metachunk_nb * nb_copy) for pos in range(metachunk_nb): chunks_at_pos = chunks.filter(pos=pos) self.assertEqual(len(chunks_at_pos), nb_copy) data_begin = pos * self.chunk_size data_end = pos * self.chunk_size + self.chunk_size chunk_hash = md5_data(data[data_begin:data_end]) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(md5_stream(stream), chunk_hash) self.assertEqual(meta['content_size'], str(len(data))) self.assertEqual(meta['content_path'], "titi") self.assertEqual(meta['content_cid'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], str(pos)) self.assertEqual(meta['chunk_hash'], chunk_hash) def test_twocopies_upload_0_byte(self): self._test_upload("TWOCOPIES", 0) def test_twocopies_upload_1_byte(self): self._test_upload("TWOCOPIES", 1) def test_twocopies_upload_chunksize_bytes(self): self._test_upload("TWOCOPIES", self.chunk_size) def test_twocopies_upload_chunksize_plus_1_bytes(self): self._test_upload("TWOCOPIES", self.chunk_size + 1) def test_single_upload_0_byte(self): self._test_upload("SINGLE", 0) def test_single_upload_chunksize_plus_1_bytes(self): self._test_upload("SINGLE", self.chunk_size + 1) def test_chunks_cleanup_when_upload_failed(self): data = random_data(2 * self.chunk_size) content = self.content_factory.new(self.container_id, "titi", len(data), "TWOCOPIES") self.assertEqual(type(content), DupContent) # set bad url for position 1 for chunk in content.chunks.filter(pos=1): chunk.url = "http://127.0.0.1:9/DEADBEEF" self.assertRaises(Exception, content.upload, StringIO.StringIO(data)) for chunk in content.chunks.exclude(pos=1): self.assertRaises(NotFound, self.blob_client.chunk_head, chunk.url) def _new_content(self, stgpol, data, broken_pos_list): old_content = self.content_factory.new(self.container_id, "titi", len(data), stgpol) self.assertEqual(type(old_content), DupContent) old_content.upload(StringIO.StringIO(data)) for pos, idx in broken_pos_list: c = old_content.chunks.filter(pos=pos)[idx] self.blob_client.chunk_delete(c.url) # get the new structure of the uploaded content return self.content_factory.get(self.container_id, old_content.content_id) def _test_download(self, stgpol, data_size, broken_pos_list): data = random_data(data_size) content = self._new_content(stgpol, data, broken_pos_list) downloaded_data = "".join(content.download()) self.assertEqual(downloaded_data, data) for pos, idx in broken_pos_list: # check nothing has been rebuilt c = content.chunks.filter(pos=pos)[0] self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url) def test_twocopies_download_content_0_byte_without_broken_chunks(self): self._test_download("TWOCOPIES", 0, []) def test_twocopies_download_content_0_byte_with_broken_0_0(self): self._test_download("TWOCOPIES", 0, [(0, 0)]) def test_twocopies_download_content_1_byte_without_broken_chunks(self): self._test_download("TWOCOPIES", 1, []) def test_twocopies_download_content_1_byte_with_broken_0_0(self): self._test_download("TWOCOPIES", 1, [(0, 0)]) def test_twocopies_download_chunksize_bytes_without_broken_chunks(self): self._test_download("TWOCOPIES", self.chunk_size, []) def test_twocopies_download_2xchuksize_bytes_with_broken_0_0_and_1_0(self): self._test_download("TWOCOPIES", self.chunk_size * 2, [(0, 0), (1, 0)]) def test_twocopies_download_content_chunksize_bytes_2_broken_chunks(self): data = random_data(self.chunk_size) content = self._new_content("TWOCOPIES", data, [(0, 0), (0, 1)]) gen = content.download() self.assertRaises(UnrecoverableContent, gen.next) def test_single_download_content_1_byte_without_broken_chunks(self): self._test_download("SINGLE", 1, []) def test_single_download_chunksize_bytes_plus_1_without_broken_chunk(self): self._test_download("SINGLE", self.chunk_size * 2, [])
class TestContentFactory(BaseTestCase): def setUp(self): super(TestContentFactory, self).setUp() self.namespace = self.conf['namespace'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_name = "TestContentFactory%f" % time.time() self.blob_client = BlobClient() self.container_client = ContainerClient(self.gridconf) self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() def tearDown(self): super(TestContentFactory, self).tearDown() def test_extract_datasec(self): self.content_factory.ns_info = { "data_security": { "DUPONETWO": "DUP:distance=1|nb_copy=2", "RAIN": "RAIN:k=6|m=2|algo=liber8tion" }, "storage_policy": { "RAIN": "NONE:RAIN:NONE", "SINGLE": "NONE:NONE:NONE", "TWOCOPIES": "NONE:DUPONETWO:NONE" } } ds_type, ds_args = self.content_factory._extract_datasec("RAIN") self.assertEqual(ds_type, "RAIN") self.assertEqual(ds_args, { "k": "6", "m": "2", "algo": "liber8tion" }) ds_type, ds_args = self.content_factory._extract_datasec("SINGLE") self.assertEqual(ds_type, "DUP") self.assertEqual(ds_args, { "nb_copy": "1", "distance": "0" }) ds_type, ds_args = self.content_factory._extract_datasec("TWOCOPIES") self.assertEqual(ds_type, "DUP") self.assertEqual(ds_args, { "nb_copy": "2", "distance": "1" }) self.assertRaises(InconsistentContent, self.content_factory._extract_datasec, "UnKnOwN") def test_get_rain(self): meta = { "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash-method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime-type": "application/octet-stream", "name": "tox.ini", "policy": "RAIN", "version": "1450176946676289" } chunks = [ { "url": "http://127.0.0.1:6012/A0A0", "pos": "0.p0", "size": 512, "hash": "E7D4E4AD460971CA2E3141F2102308D4"}, { "url": "http://127.0.0.1:6010/A01", "pos": "0.1", "size": 146, "hash": "760AB5DA7C51A3654F1CA622687CD6C3"}, { "url": "http://127.0.0.1:6011/A00", "pos": "0.0", "size": 512, "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB"}, { "url": "http://127.0.0.1:6013/A0A1", "pos": "0.p1", "size": 512, "hash": "DA9D7F72AEEA5791565724424CE45C16"} ] self.content_factory.container_client.content_show = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id") self.assertEqual(type(c), RainContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual(c.version, "1450176946676289") self.assertEqual(c.algo, "liber8tion") self.assertEqual(c.k, 6) self.assertEqual(c.m, 2) self.assertEqual(len(c.chunks), 4) self.assertEqual(c.chunks[0].raw(), chunks[2]) self.assertEqual(c.chunks[1].raw(), chunks[1]) self.assertEqual(c.chunks[2].raw(), chunks[0]) self.assertEqual(c.chunks[3].raw(), chunks[3]) def test_get_dup(self): meta = { "chunk-method": "plain/bytes", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash-method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime-type": "application/octet-stream", "name": "tox.ini", "policy": "TWOCOPIES", "version": "1450176946676289" } chunks = [ { "url": "http://127.0.0.1:6010/A0", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73"}, { "url": "http://127.0.0.1:6011/A1", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73"} ] self.content_factory.container_client.content_show = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id") self.assertEqual(type(c), DupContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual(c.version, "1450176946676289") self.assertEqual(c.nb_copy, 2) self.assertEqual(c.distance, 1) self.assertEqual(len(c.chunks), 2) self.assertEqual(c.chunks[0].raw(), chunks[0]) self.assertEqual(c.chunks[1].raw(), chunks[1]) def test_get_unknown_content(self): self.assertRaises(ContentNotFound, self.content_factory.get, self.container_id, "1234") def test_new_rain(self): meta = { "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2", "ctime": "1450341162", "deleted": "False", "hash": "", "hash-method": "md5", "id": "F4B1C8DD132705007DE8B43D0709DAA2", "length": "1000", "mime-type": "application/octet-stream", "name": "titi", "policy": "RAIN", "version": "1450341162332663" } chunks = [ { "url": "http://127.0.0.1:6010/0_p1", "pos": "0.p1", "size": 1048576, "hash": "00000000000000000000000000000000"}, { "url": "http://127.0.0.1:6011/0_p0", "pos": "0.p0", "size": 1048576, "hash": "00000000000000000000000000000000"}, { "url": "http://127.0.0.1:6016/0_1", "pos": "0.1", "size": 1048576, "hash": "00000000000000000000000000000000"}, { "url": "http://127.0.0.1:6017/0_0", "pos": "0.0", "size": 1048576, "hash": "00000000000000000000000000000000"} ] self.content_factory.container_client.content_prepare = Mock( return_value=(meta, chunks)) c = self.content_factory.new("xxx_container_id", "titi", 1000, "RAIN") self.assertEqual(type(c), RainContent) self.assertEqual(c.content_id, "F4B1C8DD132705007DE8B43D0709DAA2") self.assertEqual(c.length, 1000) self.assertEqual(c.path, "titi") self.assertEqual(c.version, "1450341162332663") self.assertEqual(c.algo, "liber8tion") self.assertEqual(c.k, 6) self.assertEqual(c.m, 2) self.assertEqual(len(c.chunks), 4) self.assertEqual(c.chunks[0].raw(), chunks[3]) self.assertEqual(c.chunks[1].raw(), chunks[2]) self.assertEqual(c.chunks[2].raw(), chunks[1]) self.assertEqual(c.chunks[3].raw(), chunks[0]) def _new_content(self, stgpol, data, path="titi"): old_content = self.content_factory.new(self.container_id, path, len(data), stgpol) old_content.upload(StringIO.StringIO(data)) return self.content_factory.get(self.container_id, old_content.content_id) def _test_change_policy(self, data_size, old_policy, new_policy): if (old_policy == "RAIN" or new_policy == "RAIN") \ and len(self.conf['rawx']) < 8: self.skipTest("RAIN: Need more than 8 rawx to run") data = random_data(data_size) obj_type = { "SINGLE": DupContent, "TWOCOPIES": DupContent, "THREECOPIES": DupContent, "RAIN": RainContent } old_content = self._new_content(old_policy, data) self.assertEqual(type(old_content), obj_type[old_policy]) changed_content = self.content_factory.change_policy( old_content.container_id, old_content.content_id, new_policy) self.assertRaises(NotFound, self.container_client.content_show, self.account, cid=old_content.container_id, content=old_content.content_id) new_content = self.content_factory.get(self.container_id, changed_content.content_id) self.assertEqual(type(new_content), obj_type[new_policy]) downloaded_data = "".join(new_content.download()) self.assertEqual(downloaded_data, data) def test_change_content_0_byte_policy_single_to_rain(self): self._test_change_policy(0, "SINGLE", "RAIN") def test_change_content_0_byte_policy_rain_to_twocopies(self): self._test_change_policy(0, "RAIN", "TWOCOPIES") def test_change_content_1_byte_policy_single_to_rain(self): self._test_change_policy(1, "SINGLE", "RAIN") def test_change_content_chunksize_bytes_policy_twocopies_to_rain(self): self._test_change_policy(self.chunk_size, "TWOCOPIES", "RAIN") def test_change_content_2xchunksize_bytes_policy_threecopies_to_rain(self): self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "RAIN") def test_change_content_1_byte_policy_rain_to_threecopies(self): self._test_change_policy(1, "RAIN", "THREECOPIES") def test_change_content_chunksize_bytes_policy_rain_to_twocopies(self): self._test_change_policy(self.chunk_size, "RAIN", "TWOCOPIES") def test_change_content_2xchunksize_bytes_policy_rain_to_single(self): self._test_change_policy(self.chunk_size * 2, "RAIN", "SINGLE") def test_change_content_0_byte_policy_twocopies_to_threecopies(self): self._test_change_policy(0, "TWOCOPIES", "THREECOPIES") def test_change_content_chunksize_bytes_policy_single_to_twocopies(self): self._test_change_policy(self.chunk_size, "SINGLE", "TWOCOPIES") def test_change_content_2xchunksize_bytes_policy_3copies_to_single(self): self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "SINGLE") def test_change_content_with_same_policy(self): data = random_data(10) old_content = self._new_content("TWOCOPIES", data) changed_content = self.content_factory.change_policy( old_content.container_id, old_content.content_id, "TWOCOPIES") self.assertEqual(old_content.content_id, changed_content.content_id) def test_change_policy_unknown_content(self): self.assertRaises(ContentNotFound, self.content_factory.change_policy, self.container_id, "1234", "SINGLE") def test_change_policy_unknown_storage_policy(self): data = random_data(10) old_content = self._new_content("TWOCOPIES", data) self.assertRaises(ClientException, self.content_factory.change_policy, self.container_id, old_content.content_id, "UnKnOwN") def _test_move_chunk(self, policy): data = random_data(self.chunk_size) content = self._new_content(policy, data) chunk_id = content.chunks.filter(metapos=0)[0].id chunk_url = content.chunks.filter(metapos=0)[0].url chunk_meta, chunk_stream = self.blob_client.chunk_get(chunk_url) chunk_hash = md5_stream(chunk_stream) new_chunk = content.move_chunk(chunk_id) content_updated = self.content_factory.get(self.container_id, content.content_id) hosts = [] for c in content_updated.chunks.filter(metapos=0): self.assertThat(hosts, Not(Contains(c.host))) self.assertNotEquals(c.id, chunk_id) hosts.append(c.host) new_chunk_meta, new_chunk_stream = self.blob_client.chunk_get( new_chunk["url"]) new_chunk_hash = md5_stream(new_chunk_stream) self.assertEqual(new_chunk_hash, chunk_hash) del chunk_meta["chunk_id"] del new_chunk_meta["chunk_id"] self.assertEqual(new_chunk_meta, chunk_meta) def test_single_move_chunk(self): self._test_move_chunk("SINGLE") def test_twocopies_move_chunk(self): self._test_move_chunk("TWOCOPIES") def test_rain_move_chunk(self): if len(self.conf['rawx']) < 9: self.skipTest("Need more than 8 rawx") self._test_move_chunk("RAIN") def test_move_chunk_not_in_content(self): data = random_data(self.chunk_size) content = self._new_content("TWOCOPIES", data) with ExpectedException(OrphanChunk): content.move_chunk("1234") def test_strange_paths(self): strange_paths = [ "Annual report.txt", "foo+bar=foobar.txt", "100%_bug_free.c", "forward/slash/allowed", "I\\put\\backslashes\\and$dollar$signs$in$file$names", "Je suis tombé sur la tête, mais ça va bien.", "%s%f%u%d%%", "carriage\rreturn", "line\nfeed", "ta\tbu\tla\ttion", "controlchars", ] answers = dict() for cname in strange_paths: content = self._new_content("SINGLE", "nobody cares", cname) answers[cname] = content listing = self.container_client.container_list(self.account, self.container_name) obj_set = {k["name"].encode("utf8", "ignore") for k in listing["objects"]} try: # Ensure the saved path is the one we gave the object for cname in answers: self.assertEqual(cname, answers[cname].path) # Ensure all objects appear in listing for cname in strange_paths: self.assertIn(cname, obj_set) finally: # Cleanup for cname in answers: try: content.delete() except: pass
class TestContentFactory(BaseTestCase): def setUp(self): super(TestContentFactory, self).setUp() self.wait_for_score(('meta2', )) self.namespace = self.conf['namespace'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_name = "TestContentFactory%f" % time.time() self.blob_client = BlobClient(conf=self.conf) self.container_client = ContainerClient(self.gridconf) self.container_client.container_create(account=self.account, reference=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() self.stgpol = "SINGLE" self.stgpol_twocopies = "TWOCOPIES" self.stgpol_threecopies = "THREECOPIES" self.stgpol_ec = "EC" def tearDown(self): super(TestContentFactory, self).tearDown() def test_get_ec(self): meta = { "chunk_method": "ec/algo=liberasurecode_rs_vand,k=6,m=2", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash_method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime_type": "application/octet-stream", "name": "tox.ini", "policy": self.stgpol_ec, "version": "1450176946676289", "oio_version": "4.2", } chunks = [{ "url": "http://127.0.0.1:6012/A0A0", "pos": "0.0", "size": 512, "hash": "E7D4E4AD460971CA2E3141F2102308D4" }, { "url": "http://127.0.0.1:6010/A01", "pos": "0.1", "size": 146, "hash": "760AB5DA7C51A3654F1CA622687CD6C3" }, { "url": "http://127.0.0.1:6011/A00", "pos": "0.2", "size": 512, "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB" }, { "url": "http://127.0.0.1:6013/A0A1", "pos": "0.3", "size": 512, "hash": "DA9D7F72AEEA5791565724424CE45C16" }] self.content_factory.container_client.content_locate = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id", account=self.account, container_name=self.container_name) self.assertEqual(type(c), ECContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual( c.full_path, encode_fullpath(self.account, self.container_name, "tox.ini", meta['version'], meta['id'])) self.assertEqual(c.version, "1450176946676289") # TODO test storage method self.assertEqual(len(c.chunks), 4) self.assertEqual(c.chunks[0].raw(), chunks[0]) self.assertEqual(c.chunks[1].raw(), chunks[1]) self.assertEqual(c.chunks[2].raw(), chunks[2]) self.assertEqual(c.chunks[3].raw(), chunks[3]) def test_get_plain(self): meta = { "chunk_method": "plain/nb_copy=2", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash_method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime_type": "application/octet-stream", "name": "tox.ini", "policy": self.stgpol_twocopies, "version": "1450176946676289", "oio_version": "4.2", } chunks = [{ "url": "http://127.0.0.1:6010/A0", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73" }, { "url": "http://127.0.0.1:6011/A1", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73" }] self.content_factory.container_client.content_locate = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id", account=self.account, container_name=self.container_name) self.assertEqual(type(c), PlainContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual(c.version, "1450176946676289") self.assertEqual( c.full_path, encode_fullpath(self.account, self.container_name, "tox.ini", meta['version'], meta['id'])) # TODO test storage_method self.assertEqual(len(c.chunks), 2) self.assertEqual(c.chunks[0].raw(), chunks[0]) self.assertEqual(c.chunks[1].raw(), chunks[1]) def test_get_unknown_content(self): self.assertRaises(ContentNotFound, self.content_factory.get, self.container_id, "1234") def test_new_ec(self): meta = { "chunk_method": "ec/algo=liberasurecode_rs_vand,k=6,m=2", "ctime": "1450341162", "deleted": "False", "hash": "", "hash_method": "md5", "id": "F4B1C8DD132705007DE8B43D0709DAA2", "length": "1000", "mime_type": "application/octet-stream", "name": "titi", "policy": self.stgpol_ec, "version": "1450341162332663", "oio_version": "4.2", } chunks = [{ "url": "http://127.0.0.1:6010/0_p1", "pos": "0.3", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6011/0_p0", "pos": "0.2", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6016/0_1", "pos": "0.1", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6017/0_0", "pos": "0.0", "size": 1048576, "hash": "00000000000000000000000000000000" }] self.content_factory.container_client.content_prepare = Mock( return_value=(meta, chunks)) c = self.content_factory.new("xxx_container_id", "titi", 1000, self.stgpol_ec, account=self.account, container_name=self.container_name) self.assertEqual(type(c), ECContent) self.assertEqual(c.content_id, "F4B1C8DD132705007DE8B43D0709DAA2") self.assertEqual(c.length, 1000) self.assertEqual(c.path, "titi") self.assertEqual(c.version, "1450341162332663") # TODO test storage_method self.assertEqual(len(c.chunks), 4) self.assertEqual(c.chunks[0].raw(), chunks[3]) self.assertEqual(c.chunks[1].raw(), chunks[2]) self.assertEqual(c.chunks[2].raw(), chunks[1]) self.assertEqual(c.chunks[3].raw(), chunks[0]) def _new_content(self, stgpol, data, path="titi", account=None, container_name=None, mime_type=None, properties=None): old_content = self.content_factory.new(self.container_id, path, len(data), stgpol, account=account, container_name=container_name) if properties: old_content.properties = properties if mime_type: old_content.mime_type = mime_type old_content.create(BytesIO(data)) return self.content_factory.get(self.container_id, old_content.content_id) def _test_move_chunk(self, policy): data = random_data(self.chunk_size) content = self._new_content(policy, data) mc = content.chunks.filter(metapos=0) chunk_id = mc[0].id chunk_url = mc[0].url chunk_host = mc[0].host chunk_meta, chunk_stream = self.blob_client.chunk_get(chunk_url) chunk_hash = md5_stream(chunk_stream) new_chunk = content.move_chunk(chunk_id, service_id=chunk_host) content_updated = self.content_factory.get(self.container_id, content.content_id) hosts = [] for c in content_updated.chunks.filter(metapos=0): self.assertThat(hosts, Not(Contains(c.host))) self.assertNotEqual(c.url, chunk_url) hosts.append(c.host) new_chunk_meta, new_chunk_stream = self.blob_client.chunk_get( new_chunk["url"]) new_chunk_hash = md5_stream(new_chunk_stream) self.assertEqual(new_chunk_hash, chunk_hash) self.assertGreaterEqual(new_chunk_meta['chunk_mtime'], chunk_meta['chunk_mtime']) del chunk_meta["chunk_id"] del new_chunk_meta["chunk_id"] del chunk_meta["chunk_mtime"] del new_chunk_meta["chunk_mtime"] self.assertEqual(new_chunk_meta, chunk_meta) def test_single_move_chunk(self): self._test_move_chunk(self.stgpol) def test_twocopies_move_chunk(self): self._test_move_chunk(self.stgpol_twocopies) @ec def test_ec_move_chunk(self): self._test_move_chunk(self.stgpol_ec) def test_move_chunk_not_in_content(self): data = random_data(self.chunk_size) content = self._new_content(self.stgpol_twocopies, data) with ExpectedException(OrphanChunk): content.move_chunk("1234") def test_strange_paths(self): answers = dict() for cname in strange_paths: content = self._new_content(self.stgpol, b"nobody cares", cname) answers[cname] = content _, listing = self.container_client.content_list( self.account, self.container_name) if PY2: obj_set = {k["name"].encode('utf-8') for k in listing["objects"]} else: obj_set = {k["name"] for k in listing["objects"]} try: # Ensure the saved path is the one we gave the object for cname in answers: self.assertEqual(cname, answers[cname].path) fullpath = encode_fullpath(self.account, self.container_name, cname, answers[cname].version, answers[cname].content_id) self.assertEqual(answers[cname].full_path, fullpath) # Ensure all objects appear in listing for cname in strange_paths: self.assertIn(cname, obj_set) finally: # Cleanup for cname in answers: try: content.delete() except Exception: pass
class TestBlobRebuilder(BaseTestCase): def setUp(self): super(TestBlobRebuilder, self).setUp() self.container = random_str(16) self.cid = cid_from_name(self.account, self.container) self.path = random_str(16) self.api = ObjectStorageApi(self.ns) self.blob_client = BlobClient(self.conf) self.api.container_create(self.account, self.container) _, chunks = self.api.container.content_prepare(self.account, self.container, self.path, size=1) if len(chunks) < 2: self.skipTest("need at least 2 chunks to run") services = self.conscience.all_services('rawx') self.rawx_volumes = dict() for rawx in services: tags = rawx['tags'] service_id = tags.get('tag.service_id', None) if service_id is None: service_id = rawx['addr'] volume = tags.get('tag.vol', None) self.rawx_volumes[service_id] = volume self.api.object_create(self.account, self.container, obj_name=self.path, data="chunk") meta, self.chunks = self.api.object_locate(self.account, self.container, self.path) self.version = meta['version'] self.content_id = meta['id'] def _chunk_path(self, chunk): url = chunk['url'] volume_id = url.split('/', 3)[2] chunk_id = url.split('/', 3)[3] volume = self.rawx_volumes[volume_id] return volume + '/' + chunk_id[:3] + '/' + chunk_id def test_rebuild_old_chunk(self): for c in self.chunks: convert_to_old_chunk(self._chunk_path(c), self.account, self.container, self.path, self.version, self.content_id) chunk = random.choice(self.chunks) chunk_volume = chunk['url'].split('/')[2] chunk_id = chunk['url'].split('/')[3] chunk_headers, chunk_stream = self.blob_client.chunk_get( chunk['url'], check_headers=False) os.remove(self._chunk_path(chunk)) chunks_kept = list(self.chunks) chunks_kept.remove(chunk) conf = self.conf.copy() conf['allow_same_rawx'] = True rebuilder = BlobRebuilder(conf, service_id=chunk_volume) rebuilder_worker = rebuilder.create_worker(None, None) rebuilder_worker._process_item( (self.ns, self.cid, self.content_id, chunk_id)) _, new_chunks = self.api.object_locate(self.account, self.container, self.path) new_chunk = list(new_chunks) self.assertEqual(len(new_chunks), len(chunks_kept) + 1) url_kept = [c['url'] for c in chunks_kept] new_chunk = None for c in new_chunks: if c['url'] not in url_kept: self.assertIsNone(new_chunk) new_chunk = c # Cannot check if the URL is different: it may be the same since we # generate predictible chunk IDs. # self.assertNotEqual(chunk['real_url'], new_chunk['real_url']) # self.assertNotEqual(chunk['url'], new_chunk['url']) self.assertEqual(chunk['pos'], new_chunk['pos']) self.assertEqual(chunk['size'], new_chunk['size']) self.assertEqual(chunk['hash'], new_chunk['hash']) new_chunk_headers, new_chunk_stream = self.blob_client.chunk_get( new_chunk['url']) chunk_data = b''.join(chunk_stream) new_chunk_data = b''.join(new_chunk_stream) self.assertEqual(chunk_data, new_chunk_data) fullpath = encode_fullpath(self.account, self.container, self.path, self.version, self.content_id) self.assertEqual(fullpath, new_chunk_headers['full_path']) del new_chunk_headers['full_path'] # Since we generate predictible chunk IDs, they can be equal # self.assertNotEqual(chunk_headers['chunk_id'], # new_chunk_headers['chunk_id']) # We could compare the modification time of the chunks, # but unfortunately they have a 1s resolution... # self.assertNotEqual(chunk_headers['chunk_mtime'], # new_chunk_headers['chunk_mtime']) new_chunk_id = new_chunk['url'].split('/')[3] self.assertEqual(new_chunk_id, new_chunk_headers['chunk_id']) del chunk_headers['chunk_id'] del new_chunk_headers['chunk_id'] self.assertEqual(OIO_VERSION, new_chunk_headers['oio_version']) del chunk_headers['oio_version'] del new_chunk_headers['oio_version'] del chunk_headers['chunk_mtime'] del new_chunk_headers['chunk_mtime'] self.assertEqual(chunk_headers, new_chunk_headers)
class TestDupContent(BaseTestCase): def setUp(self): super(TestDupContent, self).setUp() if len(self.conf['rawx']) < 3: self.skipTest("Not enough rawx. " "Dup tests needs more than 2 rawx to run") self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient() self.container_name = "TestDupContent%f" % time.time() self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() def tearDown(self): super(TestDupContent, self).tearDown() def _test_upload(self, stgpol, data_size): data = random_data(data_size) content = self.content_factory.new(self.container_id, "titi", len(data), stgpol) self.assertEqual(type(content), DupContent) content.upload(StringIO.StringIO(data)) meta, chunks = self.container_client.content_show( cid=self.container_id, content=content.content_id) chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], stgpol) self.assertEqual(meta['name'], "titi") metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) if metachunk_nb == 0: metachunk_nb = 1 # special case for empty content if stgpol == "THREECOPIES": nb_copy = 3 elif stgpol == "TWOCOPIES": nb_copy = 2 elif stgpol == "SINGLE": nb_copy = 1 self.assertEqual(len(chunks), metachunk_nb * nb_copy) for pos in range(metachunk_nb): chunks_at_pos = chunks.filter(pos=pos) self.assertEqual(len(chunks_at_pos), nb_copy) data_begin = pos * self.chunk_size data_end = pos * self.chunk_size + self.chunk_size chunk_hash = md5_data(data[data_begin:data_end]) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(md5_stream(stream), chunk_hash) self.assertEqual(meta['content_size'], str(len(data))) self.assertEqual(meta['content_path'], "titi") self.assertEqual(meta['content_cid'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], str(pos)) self.assertEqual(meta['chunk_hash'], chunk_hash) def test_twocopies_upload_0_byte(self): self._test_upload("TWOCOPIES", 0) def test_twocopies_upload_1_byte(self): self._test_upload("TWOCOPIES", 1) def test_twocopies_upload_chunksize_bytes(self): self._test_upload("TWOCOPIES", self.chunk_size) def test_twocopies_upload_chunksize_plus_1_bytes(self): self._test_upload("TWOCOPIES", self.chunk_size + 1) def test_single_upload_0_byte(self): self._test_upload("SINGLE", 0) def test_single_upload_chunksize_plus_1_bytes(self): self._test_upload("SINGLE", self.chunk_size + 1) def test_chunks_cleanup_when_upload_failed(self): data = random_data(2 * self.chunk_size) content = self.content_factory.new(self.container_id, "titi", len(data), "TWOCOPIES") self.assertEqual(type(content), DupContent) # set bad url for position 1 for chunk in content.chunks.filter(pos=1): chunk.url = "http://127.0.0.1:9/DEADBEEF" self.assertRaises(Exception, content.upload, StringIO.StringIO(data)) for chunk in content.chunks.exclude(pos=1): self.assertRaises(NotFound, self.blob_client.chunk_head, chunk.url) def _new_content(self, stgpol, data, broken_pos_list=[]): old_content = self.content_factory.new(self.container_id, "titi", len(data), stgpol) self.assertEqual(type(old_content), DupContent) old_content.upload(StringIO.StringIO(data)) broken_chunks_info = {} for pos, idx in broken_pos_list: c = old_content.chunks.filter(pos=pos)[idx] meta, stream = self.blob_client.chunk_get(c.url) if pos not in broken_chunks_info: broken_chunks_info[pos] = {} broken_chunks_info[pos][idx] = { "url": c.url, "id": c.id, "hash": c.hash, "dl_meta": meta, "dl_hash": md5_stream(stream) } self.blob_client.chunk_delete(c.url) # get the new structure of the uploaded content return (self.content_factory.get( self.container_id, old_content.content_id), broken_chunks_info) def _test_rebuild(self, stgpol, data_size, broken_pos_list, full_rebuild_pos): data = random_data(data_size) content, broken_chunks_info = self._new_content(stgpol, data, broken_pos_list) rebuild_pos, rebuild_idx = full_rebuild_pos rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx] content.rebuild_chunk(rebuild_chunk_info["id"]) # get the new structure of the content rebuilt_content = self.content_factory.get(self.container_id, content.content_id) self.assertEqual(type(rebuilt_content), DupContent) # find the rebuilt chunk for c in rebuilt_content.chunks.filter(pos=rebuild_pos): if len(content.chunks.filter(id=c.id)) > 0: # not the rebuilt chunk # if this chunk is broken, it must not have been rebuilt for b_c_i in broken_chunks_info[rebuild_pos].values(): if c.id == b_c_i["id"]: with ExpectedException(NotFound): _, _ = self.blob_client.chunk_get(c.url) continue meta, stream = self.blob_client.chunk_get(c.url) self.assertEqual(meta["chunk_id"], c.id) self.assertEqual(md5_stream(stream), rebuild_chunk_info["dl_hash"]) self.assertEqual(c.hash, rebuild_chunk_info["hash"]) self.assertThat(c.url, NotEquals(rebuild_chunk_info["url"])) del meta["chunk_id"] del rebuild_chunk_info["dl_meta"]["chunk_id"] self.assertEqual(meta, rebuild_chunk_info["dl_meta"]) def test_2copies_content_0_byte_1broken_rebuild_pos_0_idx_0(self): self._test_rebuild("TWOCOPIES", 0, [(0, 0)], (0, 0)) def test_2copies_content_1_byte_1broken_rebuild_pos_0_idx_1(self): self._test_rebuild("TWOCOPIES", 1, [(0, 1)], (0, 1)) def test_3copies_content_chunksize_bytes_2broken_rebuild_pos_0_idx_1(self): if len(self.conf['rawx']) <= 3: self.skipTest("Need more than 3 rawx") self._test_rebuild("THREECOPIES", self.chunk_size, [(0, 0), (0, 1)], (0, 1)) def test_3copies_content_2xchksize_bytes_2broken_rebuild_pos_1_idx_2(self): if len(self.conf['rawx']) <= 3: self.skipTest("Need more than 3 rawx") self._test_rebuild("THREECOPIES", 2 * self.chunk_size, [(1, 0), (1, 2)], (1, 2)) def test_2copies_content_0_byte_2broken_rebuild_pos_0_idx_0(self): with ExpectedException(UnrecoverableContent): self._test_rebuild("TWOCOPIES", 0, [(0, 0), (0, 1)], (0, 0)) def _test_download(self, stgpol, data_size, broken_pos_list): data = random_data(data_size) content, _ = self._new_content(stgpol, data, broken_pos_list) downloaded_data = "".join(content.download()) self.assertEqual(downloaded_data, data) for pos, idx in broken_pos_list: # check nothing has been rebuilt c = content.chunks.filter(pos=pos)[0] self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url) def test_twocopies_download_content_0_byte_without_broken_chunks(self): self._test_download("TWOCOPIES", 0, []) def test_twocopies_download_content_0_byte_with_broken_0_0(self): self._test_download("TWOCOPIES", 0, [(0, 0)]) def test_twocopies_download_content_1_byte_without_broken_chunks(self): self._test_download("TWOCOPIES", 1, []) def test_twocopies_download_content_1_byte_with_broken_0_0(self): self._test_download("TWOCOPIES", 1, [(0, 0)]) def test_twocopies_download_chunksize_bytes_without_broken_chunks(self): self._test_download("TWOCOPIES", self.chunk_size, []) def test_twocopies_download_2xchuksize_bytes_with_broken_0_0_and_1_0(self): self._test_download("TWOCOPIES", self.chunk_size * 2, [(0, 0), (1, 0)]) def test_twocopies_download_content_chunksize_bytes_2_broken_chunks(self): data = random_data(self.chunk_size) content, _ = self._new_content("TWOCOPIES", data, [(0, 0), (0, 1)]) gen = content.download() self.assertRaises(UnrecoverableContent, gen.next) def test_single_download_content_1_byte_without_broken_chunks(self): self._test_download("SINGLE", 1, []) def test_single_download_chunksize_bytes_plus_1_without_broken_chunk(self): self._test_download("SINGLE", self.chunk_size * 2, [])
class TestContentFactory(BaseTestCase): def setUp(self): super(TestContentFactory, self).setUp() self.namespace = self.conf['namespace'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_name = "TestContentFactory%f" % time.time() self.blob_client = BlobClient() self.container_client = ContainerClient(self.gridconf) self.container_client.container_create(account=self.account, reference=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() self.stgpol = "SINGLE" self.stgpol_twocopies = "TWOCOPIES" self.stgpol_threecopies = "THREECOPIES" self.stgpol_ec = "EC" def tearDown(self): super(TestContentFactory, self).tearDown() def test_get_ec(self): meta = { "chunk_method": "ec/algo=liberasurecode_rs_vand,k=6,m=2", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash_method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime_type": "application/octet-stream", "name": "tox.ini", "policy": self.stgpol_ec, "version": "1450176946676289" } chunks = [{ "url": "http://127.0.0.1:6012/A0A0", "pos": "0.0", "size": 512, "hash": "E7D4E4AD460971CA2E3141F2102308D4" }, { "url": "http://127.0.0.1:6010/A01", "pos": "0.1", "size": 146, "hash": "760AB5DA7C51A3654F1CA622687CD6C3" }, { "url": "http://127.0.0.1:6011/A00", "pos": "0.2", "size": 512, "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB" }, { "url": "http://127.0.0.1:6013/A0A1", "pos": "0.3", "size": 512, "hash": "DA9D7F72AEEA5791565724424CE45C16" }] self.content_factory.container_client.content_locate = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id") self.assertEqual(type(c), ECContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual(c.version, "1450176946676289") # TODO test storage method self.assertEqual(len(c.chunks), 4) self.assertEqual(c.chunks[0].raw(), chunks[0]) self.assertEqual(c.chunks[1].raw(), chunks[1]) self.assertEqual(c.chunks[2].raw(), chunks[2]) self.assertEqual(c.chunks[3].raw(), chunks[3]) def test_get_plain(self): meta = { "chunk_method": "plain/nb_copy=2", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash_method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime_type": "application/octet-stream", "name": "tox.ini", "policy": self.stgpol_twocopies, "version": "1450176946676289" } chunks = [{ "url": "http://127.0.0.1:6010/A0", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73" }, { "url": "http://127.0.0.1:6011/A1", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73" }] self.content_factory.container_client.content_locate = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id") self.assertEqual(type(c), PlainContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual(c.version, "1450176946676289") # TODO test storage_method self.assertEqual(len(c.chunks), 2) self.assertEqual(c.chunks[0].raw(), chunks[0]) self.assertEqual(c.chunks[1].raw(), chunks[1]) def test_get_unknown_content(self): self.assertRaises(ContentNotFound, self.content_factory.get, self.container_id, "1234") def test_new_ec(self): meta = { "chunk_method": "ec/algo=liberasurecode_rs_vand,k=6,m=2", "ctime": "1450341162", "deleted": "False", "hash": "", "hash_method": "md5", "id": "F4B1C8DD132705007DE8B43D0709DAA2", "length": "1000", "mime_type": "application/octet-stream", "name": "titi", "policy": self.stgpol_ec, "version": "1450341162332663" } chunks = [{ "url": "http://127.0.0.1:6010/0_p1", "pos": "0.3", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6011/0_p0", "pos": "0.2", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6016/0_1", "pos": "0.1", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6017/0_0", "pos": "0.0", "size": 1048576, "hash": "00000000000000000000000000000000" }] self.content_factory.container_client.content_prepare = Mock( return_value=(meta, chunks)) c = self.content_factory.new("xxx_container_id", "titi", 1000, self.stgpol_ec) self.assertEqual(type(c), ECContent) self.assertEqual(c.content_id, "F4B1C8DD132705007DE8B43D0709DAA2") self.assertEqual(c.length, 1000) self.assertEqual(c.path, "titi") self.assertEqual(c.version, "1450341162332663") # TODO test storage_method self.assertEqual(len(c.chunks), 4) self.assertEqual(c.chunks[0].raw(), chunks[3]) self.assertEqual(c.chunks[1].raw(), chunks[2]) self.assertEqual(c.chunks[2].raw(), chunks[1]) self.assertEqual(c.chunks[3].raw(), chunks[0]) def _new_content(self, stgpol, data, path="titi"): old_content = self.content_factory.new(self.container_id, path, len(data), stgpol) old_content.create(BytesIO(data)) return self.content_factory.get(self.container_id, old_content.content_id) def _test_change_policy(self, data_size, old_policy, new_policy): data = random_data(data_size) obj_type = { self.stgpol: PlainContent, self.stgpol_twocopies: PlainContent, self.stgpol_threecopies: PlainContent, self.stgpol_ec: ECContent } old_content = self._new_content(old_policy, data) self.assertEqual(type(old_content), obj_type[old_policy]) changed_content = self.content_factory.change_policy( old_content.container_id, old_content.content_id, new_policy) self.assertRaises(NotFound, self.container_client.content_show, self.account, cid=old_content.container_id, content=old_content.content_id) new_content = self.content_factory.get(self.container_id, changed_content.content_id) self.assertEqual(type(new_content), obj_type[new_policy]) downloaded_data = "".join(new_content.fetch()) self.assertEqual(downloaded_data, data) @ec def test_change_content_0_byte_policy_single_to_ec(self): self._test_change_policy(0, self.stgpol, self.stgpol_ec) @ec def test_change_content_0_byte_policy_ec_to_twocopies(self): self._test_change_policy(0, self.stgpol_ec, self.stgpol_twocopies) @ec def test_change_content_1_byte_policy_single_to_ec(self): self._test_change_policy(1, self.stgpol, self.stgpol_ec) @ec def test_change_content_chunksize_bytes_policy_twocopies_to_ec(self): self._test_change_policy(self.chunk_size, self.stgpol_twocopies, self.stgpol_ec) @ec def test_change_content_2xchunksize_bytes_policy_threecopies_to_ec(self): self._test_change_policy(self.chunk_size * 2, self.stgpol_threecopies, self.stgpol_ec) @ec def test_change_content_1_byte_policy_ec_to_threecopies(self): self._test_change_policy(1, self.stgpol_ec, self.stgpol_threecopies) @ec def test_change_content_chunksize_bytes_policy_ec_to_twocopies(self): self._test_change_policy(self.chunk_size, self.stgpol_ec, self.stgpol_twocopies) @ec def test_change_content_2xchunksize_bytes_policy_ec_to_single(self): self._test_change_policy(self.chunk_size * 2, self.stgpol_ec, self.stgpol) def test_change_content_0_byte_policy_twocopies_to_threecopies(self): self._test_change_policy(0, self.stgpol_twocopies, self.stgpol_threecopies) def test_change_content_chunksize_bytes_policy_single_to_twocopies(self): self._test_change_policy(self.chunk_size, self.stgpol, self.stgpol_twocopies) def test_change_content_2xchunksize_bytes_policy_3copies_to_single(self): self._test_change_policy(self.chunk_size * 2, self.stgpol_threecopies, self.stgpol) def test_change_content_with_same_policy(self): data = random_data(10) old_content = self._new_content(self.stgpol_twocopies, data) changed_content = self.content_factory.change_policy( old_content.container_id, old_content.content_id, self.stgpol_twocopies) self.assertEqual(old_content.content_id, changed_content.content_id) def test_change_policy_unknown_content(self): self.assertRaises(ContentNotFound, self.content_factory.change_policy, self.container_id, "1234", self.stgpol) def test_change_policy_unknown_storage_policy(self): data = random_data(10) old_content = self._new_content(self.stgpol_twocopies, data) self.assertRaises(ClientException, self.content_factory.change_policy, self.container_id, old_content.content_id, "UnKnOwN") def _test_move_chunk(self, policy): data = random_data(self.chunk_size) content = self._new_content(policy, data) chunk_id = content.chunks.filter(metapos=0)[0].id chunk_url = content.chunks.filter(metapos=0)[0].url chunk_meta, chunk_stream = self.blob_client.chunk_get(chunk_url) chunk_hash = md5_stream(chunk_stream) new_chunk = content.move_chunk(chunk_id) content_updated = self.content_factory.get(self.container_id, content.content_id) hosts = [] for c in content_updated.chunks.filter(metapos=0): self.assertThat(hosts, Not(Contains(c.host))) self.assertNotEquals(c.id, chunk_id) hosts.append(c.host) new_chunk_meta, new_chunk_stream = self.blob_client.chunk_get( new_chunk["url"]) new_chunk_hash = md5_stream(new_chunk_stream) self.assertEqual(new_chunk_hash, chunk_hash) del chunk_meta["chunk_id"] del new_chunk_meta["chunk_id"] self.assertEqual(new_chunk_meta, chunk_meta) def test_single_move_chunk(self): self._test_move_chunk(self.stgpol) def test_twocopies_move_chunk(self): self._test_move_chunk(self.stgpol_twocopies) @ec def test_ec_move_chunk(self): self._test_move_chunk(self.stgpol_ec) def test_move_chunk_not_in_content(self): data = random_data(self.chunk_size) content = self._new_content(self.stgpol_twocopies, data) with ExpectedException(OrphanChunk): content.move_chunk("1234") def test_strange_paths(self): strange_paths = [ "Annual report.txt", "foo+bar=foobar.txt", "100%_bug_free.c", "forward/slash/allowed", "I\\put\\backslashes\\and$dollar$signs$in$file$names", "Je suis tombé sur la tête, mais ça va bien.", "%s%f%u%d%%", "carriage\rreturn", "line\nfeed", "ta\tbu\tla\ttion", "controlchars", ] answers = dict() for cname in strange_paths: content = self._new_content(self.stgpol, "nobody cares", cname) answers[cname] = content _, listing = self.container_client.content_list( self.account, self.container_name) obj_set = { k["name"].encode("utf8", "ignore") for k in listing["objects"] } try: # Ensure the saved path is the one we gave the object for cname in answers: self.assertEqual(cname, answers[cname].path) # Ensure all objects appear in listing for cname in strange_paths: self.assertIn(cname, obj_set) finally: # Cleanup for cname in answers: try: content.delete() except: pass
class TestRainContent(BaseTestCase): def setUp(self): super(TestRainContent, self).setUp() if len(self.conf['rawx']) < 12: self.skipTest("Not enough rawx. " "Rain tests needs more than 12 rawx to run") self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient() self.container_name = "TestRainContent%f" % time.time() self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() def tearDown(self): super(TestRainContent, self).tearDown() def _test_upload(self, data_size): data = random_data(data_size) content = self.content_factory.new(self.container_id, "titi", len(data), "RAIN") k = 6 m = 2 self.assertEqual(type(content), RainContent) content.upload(StringIO.StringIO(data)) meta, chunks = self.container_client.content_show( cid=self.container_id, content=content.content_id) chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], "RAIN") self.assertEqual(meta['name'], "titi") metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) if metachunk_nb == 0: metachunk_nb = 1 # special case for empty content nb_chunks_min = metachunk_nb * (1 + m) nb_chunks_max = metachunk_nb * (k + m) self.assertGreaterEqual(len(chunks), nb_chunks_min) self.assertLessEqual(len(chunks), nb_chunks_max) for metapos in range(metachunk_nb): chunks_at_pos = content.chunks.filter(metapos=metapos) data_chunks_at_pos = chunks_at_pos.filter(is_parity=False) parity_chunks_at_pos = chunks_at_pos.filter(is_parity=True) self.assertEquals(len(data_chunks_at_pos) >= 1, True) self.assertEquals(len(data_chunks_at_pos) <= k, True) self.assertEqual(len(parity_chunks_at_pos), m) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(md5_stream(stream), chunk.hash) self.assertEqual(meta['content_size'], str(len(data))) self.assertEqual(meta['content_path'], "titi") self.assertEqual(meta['content_cid'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], chunk.pos) self.assertEqual(meta['chunk_hash'], chunk.hash) data_begin = metapos * self.chunk_size data_end = metapos * self.chunk_size + self.chunk_size target_metachunk_hash = md5_data(data[data_begin:data_end]) metachunk_hash = hashlib.md5() for chunk in data_chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) for d in stream: metachunk_hash.update(d) self.assertEqual(metachunk_hash.hexdigest().upper(), target_metachunk_hash) def test_upload_0_byte(self): self._test_upload(0) def test_upload_1_byte(self): self._test_upload(1) def test_upload_chunksize_bytes(self): self._test_upload(self.chunk_size) def test_upload_chunksize_plus_1_bytes(self): self._test_upload(self.chunk_size + 1) def test_chunks_cleanup_when_upload_failed(self): data = random_data(2 * self.chunk_size) content = self.content_factory.new(self.container_id, "titi", len(data), "RAIN") self.assertEqual(type(content), RainContent) # set bad url for position 1 for chunk in content.chunks.filter(pos="1.p0"): chunk.url = "http://127.0.0.1:9/DEADBEEF" self.assertRaises(Exception, content.upload, StringIO.StringIO(data)) for chunk in content.chunks.exclude(pos="1.p0"): self.assertRaises(NotFound, self.blob_client.chunk_head, chunk.url) def _test_rebuild(self, data_size, broken_pos_list): data = os.urandom(data_size) old_content = self.content_factory.new(self.container_id, "titi", len(data), "RAIN") self.assertEqual(type(old_content), RainContent) old_content.upload(StringIO.StringIO(data)) # get the new structure of the uploaded content uploaded_content = self.content_factory.get(self.container_id, old_content.content_id) old_info = {} for pos in broken_pos_list: old_info[pos] = {} c = uploaded_content.chunks.filter(pos=pos)[0] old_info[pos]["url"] = c.url old_info[pos]["id"] = c.id old_info[pos]["hash"] = c.hash chunk_id_to_rebuild = c.id meta, stream = self.blob_client.chunk_get(c.url) old_info[pos]["dl_meta"] = meta old_info[pos]["dl_hash"] = md5_stream(stream) # delete the chunk self.blob_client.chunk_delete(c.url) # rebuild the broken chunks uploaded_content.rebuild_chunk(chunk_id_to_rebuild) # get the new structure of the content rebuilt_content = self.content_factory.get(self.container_id, uploaded_content.content_id) self.assertEqual(type(rebuilt_content), RainContent) for pos in broken_pos_list: c = rebuilt_content.chunks.filter(pos=pos)[0] rebuilt_meta, rebuilt_stream = self.blob_client.chunk_get(c.url) self.assertEqual(rebuilt_meta["chunk_id"], c.id) self.assertEqual(md5_stream(rebuilt_stream), old_info[pos]["dl_hash"]) self.assertEqual(c.hash, old_info[pos]["hash"]) self.assertThat(c.url, NotEquals(old_info[pos]["url"])) del old_info[pos]["dl_meta"]["chunk_id"] del rebuilt_meta["chunk_id"] self.assertEqual(rebuilt_meta, old_info[pos]["dl_meta"]) def test_content_0_byte_rebuild_pos_0_0(self): self._test_rebuild(0, ["0.0"]) def test_content_0_byte_rebuild_pos_0_0_and_0_p0(self): self._test_rebuild(0, ["0.0", "0.p0"]) def test_content_1_byte_rebuild_pos_0_0(self): self._test_rebuild(1, ["0.0"]) def test_content_1_byte_rebuild_pos_0_p0(self): self._test_rebuild(1, ["0.p0"]) def test_content_1_byte_rebuild_pos_0_0_and_0_p0(self): self._test_rebuild(1, ["0.0", "0.p0"]) def test_content_chunksize_bytes_rebuild_pos_0_0(self): self._test_rebuild(self.conf["chunk_size"], ["0.0"]) def test_content_chunksize_bytes_rebuild_pos_0_0_and_0_1(self): self._test_rebuild(self.conf["chunk_size"], ["0.0", "0.1"]) def test_content_chunksize_bytes_rebuild_pos_0_0_and_0_p0(self): self._test_rebuild(self.conf["chunk_size"], ["0.0", "0.p0"]) def test_content_chunksize_bytes_rebuild_pos_0_p0_and_0_p1(self): self._test_rebuild(self.conf["chunk_size"], ["0.p0", "0.p1"]) def test_content_chunksize_bytes_rebuild_more_than_k_chunk(self): self.assertRaises(UnrecoverableContent, self._test_rebuild, self.conf["chunk_size"], ["0.0", "0.1", "0.2"]) def _new_content(self, data, broken_pos_list=[]): old_content = self.content_factory.new(self.container_id, "titi", len(data), "RAIN") self.assertEqual(type(old_content), RainContent) old_content.upload(StringIO.StringIO(data)) for pos in broken_pos_list: c = old_content.chunks.filter(pos=pos)[0] self.blob_client.chunk_delete(c.url) # get the new structure of the uploaded content return self.content_factory.get(self.container_id, old_content.content_id) def test_orphan_chunk(self): content = self._new_content(random_data(10)) self.assertRaises(OrphanChunk, content.rebuild_chunk, "uNkNoWnId") def test_rebuild_on_the_fly(self): data = random_data(self.conf["chunk_size"]) content = self._new_content(data, ["0.0", "0.p0"]) stream = content.rebuild_metachunk("0", on_the_fly=True) dl_data = "".join(stream) self.assertEqual(dl_data, data) del_chunk_0_0 = content.chunks.filter(pos="0.0")[0] del_chunk_0_p0 = content.chunks.filter(pos="0.p0")[0] self.assertRaises(NotFound, self.blob_client.chunk_get, del_chunk_0_0.url) self.assertRaises(NotFound, self.blob_client.chunk_get, del_chunk_0_p0.url) def _test_download(self, data_size, broken_pos_list): data = random_data(data_size) content = self._new_content(data, broken_pos_list) downloaded_data = "".join(content.download()) self.assertEqual(downloaded_data, data) for pos in broken_pos_list: c = content.chunks.filter(pos=pos)[0] self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url) def test_download_content_0_byte_without_broken_chunks(self): self._test_download(0, []) def test_download_content_1_byte_without_broken_chunks(self): self._test_download(1, []) def test_download_content_chunksize_bytes_without_broken_chunks(self): self._test_download(self.conf["chunk_size"], []) def test_download_content_chunksize_plus_1_without_broken_chunks(self): self._test_download(self.conf["chunk_size"] + 1, []) def test_download_content_0_byte_with_broken_0_0_and_0_p0(self): self._test_download(0, ["0.0", "0.p0"]) def test_download_content_1_byte_with_broken_0_0_and_0_p0(self): self._test_download(1, ["0.0", "0.p0"]) def test_download_content_2xchunksize_with_broken_0_2_and_1_0(self): self._test_download(2 * self.conf["chunk_size"], ["0.2", "1.0"]) def test_download_content_chunksize_bytes_with_3_broken_chunks(self): data = random_data(self.conf["chunk_size"]) content = self._new_content(data, ["0.0", "0.1", "0.2"]) gen = content.download() self.assertRaises(UnrecoverableContent, gen.next) def test_download_interrupt_close(self): data = random_data(self.conf["chunk_size"]) content = self._new_content(data, ["0.p0"]) download_iter = content.download() dl_data = "" for buf in download_iter: dl_data += buf self.assertEqual(len(dl_data), len(data)) self.assertEqual(dl_data, data) download_iter.close()
class TestContentFactory(BaseTestCase): def setUp(self): super(TestContentFactory, self).setUp() self.namespace = self.conf['namespace'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_name = "TestContentFactory%f" % time.time() self.blob_client = BlobClient() self.container_client = ContainerClient(self.gridconf) self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() def tearDown(self): super(TestContentFactory, self).tearDown() def test_extract_datasec(self): self.content_factory.ns_info = { "data_security": { "DUPONETWO": "DUP:distance=1|nb_copy=2", "RAIN": "RAIN:k=6|m=2|algo=liber8tion" }, "storage_policy": { "RAIN": "NONE:RAIN:NONE", "SINGLE": "NONE:NONE:NONE", "TWOCOPIES": "NONE:DUPONETWO:NONE" } } ds_type, ds_args = self.content_factory._extract_datasec("RAIN") self.assertEqual(ds_type, "RAIN") self.assertEqual(ds_args, {"k": "6", "m": "2", "algo": "liber8tion"}) ds_type, ds_args = self.content_factory._extract_datasec("SINGLE") self.assertEqual(ds_type, "DUP") self.assertEqual(ds_args, {"nb_copy": "1", "distance": "0"}) ds_type, ds_args = self.content_factory._extract_datasec("TWOCOPIES") self.assertEqual(ds_type, "DUP") self.assertEqual(ds_args, {"nb_copy": "2", "distance": "1"}) self.assertRaises(InconsistentContent, self.content_factory._extract_datasec, "UnKnOwN") def test_get_rain(self): meta = { "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash-method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime-type": "application/octet-stream", "name": "tox.ini", "policy": "RAIN", "version": "1450176946676289" } chunks = [{ "url": "http://127.0.0.1:6012/A0A0", "pos": "0.p0", "size": 512, "hash": "E7D4E4AD460971CA2E3141F2102308D4" }, { "url": "http://127.0.0.1:6010/A01", "pos": "0.1", "size": 146, "hash": "760AB5DA7C51A3654F1CA622687CD6C3" }, { "url": "http://127.0.0.1:6011/A00", "pos": "0.0", "size": 512, "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB" }, { "url": "http://127.0.0.1:6013/A0A1", "pos": "0.p1", "size": 512, "hash": "DA9D7F72AEEA5791565724424CE45C16" }] self.content_factory.container_client.content_show = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id") self.assertEqual(type(c), RainContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual(c.version, "1450176946676289") self.assertEqual(c.algo, "liber8tion") self.assertEqual(c.k, 6) self.assertEqual(c.m, 2) self.assertEqual(len(c.chunks), 4) self.assertEqual(c.chunks[0].raw(), chunks[2]) self.assertEqual(c.chunks[1].raw(), chunks[1]) self.assertEqual(c.chunks[2].raw(), chunks[0]) self.assertEqual(c.chunks[3].raw(), chunks[3]) def test_get_dup(self): meta = { "chunk-method": "plain/bytes", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash-method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime-type": "application/octet-stream", "name": "tox.ini", "policy": "TWOCOPIES", "version": "1450176946676289" } chunks = [{ "url": "http://127.0.0.1:6010/A0", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73" }, { "url": "http://127.0.0.1:6011/A1", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73" }] self.content_factory.container_client.content_show = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id") self.assertEqual(type(c), DupContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual(c.version, "1450176946676289") self.assertEqual(c.nb_copy, 2) self.assertEqual(c.distance, 1) self.assertEqual(len(c.chunks), 2) self.assertEqual(c.chunks[0].raw(), chunks[0]) self.assertEqual(c.chunks[1].raw(), chunks[1]) def test_get_unknown_content(self): self.assertRaises(ContentNotFound, self.content_factory.get, self.container_id, "1234") def test_new_rain(self): meta = { "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2", "ctime": "1450341162", "deleted": "False", "hash": "", "hash-method": "md5", "id": "F4B1C8DD132705007DE8B43D0709DAA2", "length": "1000", "mime-type": "application/octet-stream", "name": "titi", "policy": "RAIN", "version": "1450341162332663" } chunks = [{ "url": "http://127.0.0.1:6010/0_p1", "pos": "0.p1", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6011/0_p0", "pos": "0.p0", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6016/0_1", "pos": "0.1", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6017/0_0", "pos": "0.0", "size": 1048576, "hash": "00000000000000000000000000000000" }] self.content_factory.container_client.content_prepare = Mock( return_value=(meta, chunks)) c = self.content_factory.new("xxx_container_id", "titi", 1000, "RAIN") self.assertEqual(type(c), RainContent) self.assertEqual(c.content_id, "F4B1C8DD132705007DE8B43D0709DAA2") self.assertEqual(c.length, 1000) self.assertEqual(c.path, "titi") self.assertEqual(c.version, "1450341162332663") self.assertEqual(c.algo, "liber8tion") self.assertEqual(c.k, 6) self.assertEqual(c.m, 2) self.assertEqual(len(c.chunks), 4) self.assertEqual(c.chunks[0].raw(), chunks[3]) self.assertEqual(c.chunks[1].raw(), chunks[2]) self.assertEqual(c.chunks[2].raw(), chunks[1]) self.assertEqual(c.chunks[3].raw(), chunks[0]) def _new_content(self, stgpol, data, path="titi"): old_content = self.content_factory.new(self.container_id, path, len(data), stgpol) old_content.upload(StringIO.StringIO(data)) return self.content_factory.get(self.container_id, old_content.content_id) def _test_change_policy(self, data_size, old_policy, new_policy): if (old_policy == "RAIN" or new_policy == "RAIN") \ and len(self.conf['rawx']) < 8: self.skipTest("RAIN: Need more than 8 rawx to run") data = random_data(data_size) obj_type = { "SINGLE": DupContent, "TWOCOPIES": DupContent, "THREECOPIES": DupContent, "RAIN": RainContent } old_content = self._new_content(old_policy, data) self.assertEqual(type(old_content), obj_type[old_policy]) changed_content = self.content_factory.change_policy( old_content.container_id, old_content.content_id, new_policy) self.assertRaises(NotFound, self.container_client.content_show, self.account, cid=old_content.container_id, content=old_content.content_id) new_content = self.content_factory.get(self.container_id, changed_content.content_id) self.assertEqual(type(new_content), obj_type[new_policy]) downloaded_data = "".join(new_content.download()) self.assertEqual(downloaded_data, data) def test_change_content_0_byte_policy_single_to_rain(self): self._test_change_policy(0, "SINGLE", "RAIN") def test_change_content_0_byte_policy_rain_to_twocopies(self): self._test_change_policy(0, "RAIN", "TWOCOPIES") def test_change_content_1_byte_policy_single_to_rain(self): self._test_change_policy(1, "SINGLE", "RAIN") def test_change_content_chunksize_bytes_policy_twocopies_to_rain(self): self._test_change_policy(self.chunk_size, "TWOCOPIES", "RAIN") def test_change_content_2xchunksize_bytes_policy_threecopies_to_rain(self): self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "RAIN") def test_change_content_1_byte_policy_rain_to_threecopies(self): self._test_change_policy(1, "RAIN", "THREECOPIES") def test_change_content_chunksize_bytes_policy_rain_to_twocopies(self): self._test_change_policy(self.chunk_size, "RAIN", "TWOCOPIES") def test_change_content_2xchunksize_bytes_policy_rain_to_single(self): self._test_change_policy(self.chunk_size * 2, "RAIN", "SINGLE") def test_change_content_0_byte_policy_twocopies_to_threecopies(self): self._test_change_policy(0, "TWOCOPIES", "THREECOPIES") def test_change_content_chunksize_bytes_policy_single_to_twocopies(self): self._test_change_policy(self.chunk_size, "SINGLE", "TWOCOPIES") def test_change_content_2xchunksize_bytes_policy_3copies_to_single(self): self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "SINGLE") def test_change_content_with_same_policy(self): data = random_data(10) old_content = self._new_content("TWOCOPIES", data) changed_content = self.content_factory.change_policy( old_content.container_id, old_content.content_id, "TWOCOPIES") self.assertEqual(old_content.content_id, changed_content.content_id) def test_change_policy_unknown_content(self): self.assertRaises(ContentNotFound, self.content_factory.change_policy, self.container_id, "1234", "SINGLE") def test_change_policy_unknown_storage_policy(self): data = random_data(10) old_content = self._new_content("TWOCOPIES", data) self.assertRaises(ClientException, self.content_factory.change_policy, self.container_id, old_content.content_id, "UnKnOwN") def _test_move_chunk(self, policy): data = random_data(self.chunk_size) content = self._new_content(policy, data) chunk_id = content.chunks.filter(metapos=0)[0].id chunk_url = content.chunks.filter(metapos=0)[0].url chunk_meta, chunk_stream = self.blob_client.chunk_get(chunk_url) chunk_hash = md5_stream(chunk_stream) new_chunk = content.move_chunk(chunk_id) content_updated = self.content_factory.get(self.container_id, content.content_id) hosts = [] for c in content_updated.chunks.filter(metapos=0): self.assertThat(hosts, Not(Contains(c.host))) self.assertNotEquals(c.id, chunk_id) hosts.append(c.host) new_chunk_meta, new_chunk_stream = self.blob_client.chunk_get( new_chunk["url"]) new_chunk_hash = md5_stream(new_chunk_stream) self.assertEqual(new_chunk_hash, chunk_hash) del chunk_meta["chunk_id"] del new_chunk_meta["chunk_id"] self.assertEqual(new_chunk_meta, chunk_meta) def test_single_move_chunk(self): self._test_move_chunk("SINGLE") def test_twocopies_move_chunk(self): self._test_move_chunk("TWOCOPIES") def test_rain_move_chunk(self): if len(self.conf['rawx']) < 9: self.skipTest("Need more than 8 rawx") self._test_move_chunk("RAIN") def test_move_chunk_not_in_content(self): data = random_data(self.chunk_size) content = self._new_content("TWOCOPIES", data) with ExpectedException(OrphanChunk): content.move_chunk("1234") def test_strange_paths(self): for cname in ( "Annual report.txt", "foo+bar=foobar.txt", "100%_bug_free.c", "forward/slash/allowed", "I\\put\\backslashes\\and$dollar$signs$in$file$names", "Je suis tombé sur la tête, mais ça va bien.", "%s%f%u%d%%", "carriage\rreturn", "line\nfeed", "ta\tbu\tla\ttion", "controlchars", ): content = self._new_content("SINGLE", "nobody cares", cname) try: self.assertEqual(cname, content.path) finally: pass # TODO: delete the content
class TestRebuilderCrawler(BaseTestCase): def setUp(self): super(TestRebuilderCrawler, self).setUp() self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.gridconf = {"namespace": self.namespace} self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient() self.container_name = "TestRebuilderCrawler%d" % int(time.time()) self.container_client.container_create(acct=self.account, ref=self.container_name) def _push_content(self, content): for c in content.chunks: self.blob_client.chunk_put(c.url, c.get_create_xattr(), c.data) self.container_client.content_create(acct=content.account, ref=content.container_name, path=content.content_name, size=content.size, checksum=content.hash, content_id=content.content_id, stgpol=content.stgpol, data=content.get_create_meta2()) def tearDown(self): super(TestRebuilderCrawler, self).tearDown() def test_rebuild_chunk(self): # push a new content content = TestContent(self.conf, self.account, self.container_name, "mycontent", "TWOCOPIES") data = "azerty" content.add_chunk(data, pos='0', rawx=0) content.add_chunk(data, pos='0', rawx=1) self._push_content(content) # rebuild the first rawx rebuilder = BlobRebuilderWorker(self.gridconf, None, self.conf['rawx'][0]['addr']) rebuilder.chunk_rebuild(content.container_id, content.content_id, content.chunks[0].id) # check meta2 information _, res = self.container_client.content_show(acct=content.account, ref=content.container_name, content=content.content_id) new_chunk_info = None for c in res: if (c['url'] != content.chunks[0].url and c['url'] != content.chunks[1].url): new_chunk_info = c new_chunk_id = new_chunk_info['url'].split('/')[-1] self.assertEqual(new_chunk_info['hash'], content.chunks[0].hash) self.assertEqual(new_chunk_info['pos'], content.chunks[0].pos) self.assertEqual(new_chunk_info['size'], content.chunks[0].size) # check chunk information meta, stream = self.blob_client.chunk_get(new_chunk_info['url']) self.assertEqual(meta['content_size'], str(content.chunks[0].size)) self.assertEqual(meta['content_path'], content.content_name) self.assertEqual(meta['content_cid'], content.container_id) self.assertEqual(meta['content_id'], content.content_id) self.assertEqual(meta['chunk_id'], new_chunk_id) self.assertEqual(meta['chunk_pos'], content.chunks[0].pos) self.assertEqual(meta['content_version'], content.version) self.assertEqual(meta['chunk_hash'], content.chunks[0].hash) self.assertEqual(stream.next(), content.chunks[0].data) # check rtime flag in rdir rdir_client = RdirClient(self.gridconf) res = rdir_client.chunk_fetch(self.conf['rawx'][0]['addr']) key = (content.container_id, content.content_id, content.chunks[0].id) for i_container, i_content, i_chunk, i_value in res: if (i_container, i_content, i_chunk) == key: check_value = i_value self.assertIsNotNone(check_value.get('rtime')) @unittest.skipIf( len(get_config()['rawx']) != 3, "The number of rawx must be 3") def test_rebuild_no_spare(self): # push a new content content = TestContent(self.conf, self.account, self.container_name, "mycontent", "THREECOPIES") data = "azerty" content.add_chunk(data, pos='0', rawx=0) content.add_chunk(data, pos='0', rawx=1) content.add_chunk(data, pos='0', rawx=2) self._push_content(content) # rebuild the first rawx rebuilder = BlobRebuilderWorker(self.gridconf, None, self.conf['rawx'][0]['addr']) self.assertRaises(SpareChunkException, rebuilder.chunk_rebuild, content.container_id, content.content_id, content.chunks[0].id) def test_rebuild_upload_failed(self): # push a new content content = TestContent(self.conf, self.account, self.container_name, "mycontent", "TWOCOPIES") data = "azerty" content.add_chunk(data, pos='0', rawx=0) content.add_chunk(data, pos='0', rawx=1) self._push_content(content) # rebuild the first rawx rebuilder = BlobRebuilderWorker(self.gridconf, None, self.conf['rawx'][0]['addr']) # Force upload to raise an exception with patch('oio.content.content.BlobClient') as MockClass: instance = MockClass.return_value instance.chunk_copy.side_effect = Exception("xx") self.assertRaises(UnrecoverableContent, rebuilder.chunk_rebuild, content.container_id, content.content_id, content.chunks[0].id) def test_rebuild_nonexistent_chunk(self): rebuilder = BlobRebuilderWorker(self.gridconf, None, self.conf['rawx'][0]['addr']) # try to rebuild an nonexistant chunk self.assertRaises(OrphanChunk, rebuilder.chunk_rebuild, 64 * '0', 32 * '0', 64 * '0') def test_rebuild_orphan_chunk(self): # push a new content content = TestContent(self.conf, self.account, self.container_name, "mycontent", "TWOCOPIES") data = "azerty" content.add_chunk(data, pos='0', rawx=0) content.add_chunk(data, pos='0', rawx=1) self._push_content(content) # rebuild the first rawx rebuilder = BlobRebuilderWorker(self.gridconf, None, self.conf['rawx'][0]['addr']) # try to rebuild an nonexistant chunk self.assertRaises(OrphanChunk, rebuilder.chunk_rebuild, content.container_id, content.content_id, 64 * '0') def test_rebuild_with_no_copy(self): # push a new content content = TestContent(self.conf, self.account, self.container_name, "mycontent", "SINGLE") data = "azerty" content.add_chunk(data, pos='0', rawx=0) self._push_content(content) # rebuild the first rawx rebuilder = BlobRebuilderWorker(self.gridconf, None, self.conf['rawx'][0]['addr']) # try to rebuild chunk without copy self.assertRaises(UnrecoverableContent, rebuilder.chunk_rebuild, content.container_id, content.content_id, content.chunks[0].id)
class TestBlobRebuilder(BaseTestCase): def setUp(self): super(TestBlobRebuilder, self).setUp() self.container = random_str(16) self.cid = cid_from_name(self.account, self.container) self.path = random_str(16) self.api = ObjectStorageApi(self.ns) self.blob_client = BlobClient(self.conf) self.api.container_create(self.account, self.container) _, chunks = self.api.container.content_prepare(self.account, self.container, self.path, 1) if len(chunks) < 2: self.skipTest("need at least 2 chunks to run") services = self.conscience.all_services('rawx') self.rawx_volumes = dict() for rawx in services: tags = rawx['tags'] service_id = tags.get('tag.service_id', None) if service_id is None: service_id = rawx['addr'] volume = tags.get('tag.vol', None) self.rawx_volumes[service_id] = volume self.api.object_create(self.account, self.container, obj_name=self.path, data="chunk") meta, self.chunks = self.api.object_locate(self.account, self.container, self.path) self.version = meta['version'] self.content_id = meta['id'] def _chunk_path(self, chunk): url = chunk['url'] volume_id = url.split('/', 3)[2] chunk_id = url.split('/', 3)[3] volume = self.rawx_volumes[volume_id] return volume + '/' + chunk_id[:3] + '/' + chunk_id def test_rebuild_old_chunk(self): if self._cls_conf['go_rawx']: self.skipTest('Rawx V2 read only new fullpath') for c in self.chunks: convert_to_old_chunk(self._chunk_path(c), self.account, self.container, self.path, self.version, self.content_id) chunk = random.choice(self.chunks) chunk_volume = chunk['url'].split('/')[2] chunk_id = chunk['url'].split('/')[3] chunk_headers, chunk_stream = self.blob_client.chunk_get(chunk['url']) os.remove(self._chunk_path(chunk)) chunks_kept = list(self.chunks) chunks_kept.remove(chunk) conf = self.conf.copy() conf['allow_same_rawx'] = True rebuilder = BlobRebuilder(conf, None, chunk_volume) rebuilder_worker = rebuilder._create_worker() rebuilder_worker.chunk_rebuild(self.cid, self.content_id, chunk_id) _, new_chunks = self.api.object_locate(self.account, self.container, self.path) new_chunk = list(new_chunks) self.assertEqual(len(new_chunks), len(chunks_kept) + 1) url_kept = [c['url'] for c in chunks_kept] new_chunk = None for c in new_chunks: if c['url'] not in url_kept: self.assertIsNone(new_chunk) new_chunk = c self.assertNotEqual(chunk['real_url'], new_chunk['real_url']) self.assertNotEqual(chunk['url'], new_chunk['url']) self.assertEqual(chunk['pos'], new_chunk['pos']) self.assertEqual(chunk['size'], new_chunk['size']) self.assertEqual(chunk['hash'], new_chunk['hash']) new_chunk_headers, new_chunk_stream = self.blob_client.chunk_get( new_chunk['url']) self.assertEqual(chunk_stream.read(), new_chunk_stream.read()) fullpath = encode_fullpath(self.account, self.container, self.path, self.version, self.content_id) self.assertEqual(fullpath, new_chunk_headers['full_path']) del new_chunk_headers['full_path'] self.assertNotEqual(chunk_headers['chunk_id'], new_chunk_headers['chunk_id']) new_chunk_id = new_chunk['url'].split('/')[3] self.assertEqual(new_chunk_id, new_chunk_headers['chunk_id']) del chunk_headers['chunk_id'] del new_chunk_headers['chunk_id'] self.assertEqual(OIO_VERSION, new_chunk_headers['oio_version']) del chunk_headers['oio_version'] del new_chunk_headers['oio_version'] self.assertEqual(chunk_headers, new_chunk_headers)
class TestDupContent(BaseTestCase): def setUp(self): super(TestDupContent, self).setUp() if len(self.conf['rawx']) < 3: self.skipTest("Not enough rawx. " "Dup tests needs more than 2 rawx to run") self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient() self.container_name = "TestDupContent%f" % time.time() self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() def tearDown(self): super(TestDupContent, self).tearDown() def _test_upload(self, stgpol, data_size): data = random_data(data_size) content = self.content_factory.new(self.container_id, "titi", len(data), stgpol) self.assertEqual(type(content), DupContent) content.upload(StringIO.StringIO(data)) meta, chunks = self.container_client.content_show( cid=self.container_id, content=content.content_id) chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], stgpol) self.assertEqual(meta['name'], "titi") metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) if metachunk_nb == 0: metachunk_nb = 1 # special case for empty content if stgpol == "THREECOPIES": nb_copy = 3 elif stgpol == "TWOCOPIES": nb_copy = 2 elif stgpol == "SINGLE": nb_copy = 1 self.assertEqual(len(chunks), metachunk_nb * nb_copy) for pos in range(metachunk_nb): chunks_at_pos = chunks.filter(pos=pos) self.assertEqual(len(chunks_at_pos), nb_copy) data_begin = pos * self.chunk_size data_end = pos * self.chunk_size + self.chunk_size chunk_hash = md5_data(data[data_begin:data_end]) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(md5_stream(stream), chunk_hash) self.assertEqual(meta['content_size'], str(len(data))) self.assertEqual(meta['content_path'], "titi") self.assertEqual(meta['content_cid'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], str(pos)) self.assertEqual(meta['chunk_hash'], chunk_hash) def test_twocopies_upload_0_byte(self): self._test_upload("TWOCOPIES", 0) def test_twocopies_upload_1_byte(self): self._test_upload("TWOCOPIES", 1) def test_twocopies_upload_chunksize_bytes(self): self._test_upload("TWOCOPIES", self.chunk_size) def test_twocopies_upload_chunksize_plus_1_bytes(self): self._test_upload("TWOCOPIES", self.chunk_size + 1) def test_single_upload_0_byte(self): self._test_upload("SINGLE", 0) def test_single_upload_chunksize_plus_1_bytes(self): self._test_upload("SINGLE", self.chunk_size + 1) def test_chunks_cleanup_when_upload_failed(self): data = random_data(2 * self.chunk_size) content = self.content_factory.new(self.container_id, "titi", len(data), "TWOCOPIES") self.assertEqual(type(content), DupContent) # set bad url for position 1 for chunk in content.chunks.filter(pos=1): chunk.url = "http://127.0.0.1:9/DEADBEEF" self.assertRaises(Exception, content.upload, StringIO.StringIO(data)) for chunk in content.chunks.exclude(pos=1): self.assertRaises(NotFound, self.blob_client.chunk_head, chunk.url) def _new_content(self, stgpol, data, broken_pos_list=[]): old_content = self.content_factory.new(self.container_id, "titi", len(data), stgpol) self.assertEqual(type(old_content), DupContent) old_content.upload(StringIO.StringIO(data)) broken_chunks_info = {} for pos, idx in broken_pos_list: c = old_content.chunks.filter(pos=pos)[idx] meta, stream = self.blob_client.chunk_get(c.url) if pos not in broken_chunks_info: broken_chunks_info[pos] = {} broken_chunks_info[pos][idx] = { "url": c.url, "id": c.id, "hash": c.hash, "dl_meta": meta, "dl_hash": md5_stream(stream) } self.blob_client.chunk_delete(c.url) # get the new structure of the uploaded content return (self.content_factory.get(self.container_id, old_content.content_id), broken_chunks_info) def _test_rebuild(self, stgpol, data_size, broken_pos_list, full_rebuild_pos): data = random_data(data_size) content, broken_chunks_info = self._new_content( stgpol, data, broken_pos_list) rebuild_pos, rebuild_idx = full_rebuild_pos rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx] content.rebuild_chunk(rebuild_chunk_info["id"]) # get the new structure of the content rebuilt_content = self.content_factory.get(self.container_id, content.content_id) self.assertEqual(type(rebuilt_content), DupContent) # find the rebuilt chunk for c in rebuilt_content.chunks.filter(pos=rebuild_pos): if len(content.chunks.filter(id=c.id)) > 0: # not the rebuilt chunk # if this chunk is broken, it must not have been rebuilt for b_c_i in broken_chunks_info[rebuild_pos].values(): if c.id == b_c_i["id"]: with ExpectedException(NotFound): _, _ = self.blob_client.chunk_get(c.url) continue meta, stream = self.blob_client.chunk_get(c.url) self.assertEqual(meta["chunk_id"], c.id) self.assertEqual(md5_stream(stream), rebuild_chunk_info["dl_hash"]) self.assertEqual(c.hash, rebuild_chunk_info["hash"]) self.assertThat(c.url, NotEquals(rebuild_chunk_info["url"])) del meta["chunk_id"] del rebuild_chunk_info["dl_meta"]["chunk_id"] self.assertEqual(meta, rebuild_chunk_info["dl_meta"]) def test_2copies_content_0_byte_1broken_rebuild_pos_0_idx_0(self): self._test_rebuild("TWOCOPIES", 0, [(0, 0)], (0, 0)) def test_2copies_content_1_byte_1broken_rebuild_pos_0_idx_1(self): self._test_rebuild("TWOCOPIES", 1, [(0, 1)], (0, 1)) def test_3copies_content_chunksize_bytes_2broken_rebuild_pos_0_idx_1(self): if len(self.conf['rawx']) <= 3: self.skipTest("Need more than 3 rawx") self._test_rebuild("THREECOPIES", self.chunk_size, [(0, 0), (0, 1)], (0, 1)) def test_3copies_content_2xchksize_bytes_2broken_rebuild_pos_1_idx_2(self): if len(self.conf['rawx']) <= 3: self.skipTest("Need more than 3 rawx") self._test_rebuild("THREECOPIES", 2 * self.chunk_size, [(1, 0), (1, 2)], (1, 2)) def test_2copies_content_0_byte_2broken_rebuild_pos_0_idx_0(self): with ExpectedException(UnrecoverableContent): self._test_rebuild("TWOCOPIES", 0, [(0, 0), (0, 1)], (0, 0)) def _test_download(self, stgpol, data_size, broken_pos_list): data = random_data(data_size) content, _ = self._new_content(stgpol, data, broken_pos_list) downloaded_data = "".join(content.download()) self.assertEqual(downloaded_data, data) for pos, idx in broken_pos_list: # check nothing has been rebuilt c = content.chunks.filter(pos=pos)[0] self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url) def test_twocopies_download_content_0_byte_without_broken_chunks(self): self._test_download("TWOCOPIES", 0, []) def test_twocopies_download_content_0_byte_with_broken_0_0(self): self._test_download("TWOCOPIES", 0, [(0, 0)]) def test_twocopies_download_content_1_byte_without_broken_chunks(self): self._test_download("TWOCOPIES", 1, []) def test_twocopies_download_content_1_byte_with_broken_0_0(self): self._test_download("TWOCOPIES", 1, [(0, 0)]) def test_twocopies_download_chunksize_bytes_without_broken_chunks(self): self._test_download("TWOCOPIES", self.chunk_size, []) def test_twocopies_download_2xchuksize_bytes_with_broken_0_0_and_1_0(self): self._test_download("TWOCOPIES", self.chunk_size * 2, [(0, 0), (1, 0)]) def test_twocopies_download_content_chunksize_bytes_2_broken_chunks(self): data = random_data(self.chunk_size) content, _ = self._new_content("TWOCOPIES", data, [(0, 0), (0, 1)]) gen = content.download() self.assertRaises(UnrecoverableContent, gen.next) def test_single_download_content_1_byte_without_broken_chunks(self): self._test_download("SINGLE", 1, []) def test_single_download_chunksize_bytes_plus_1_without_broken_chunk(self): self._test_download("SINGLE", self.chunk_size * 2, [])
class TestRainContent(BaseTestCase): def setUp(self): super(TestRainContent, self).setUp() if len(self.conf['rawx']) < 12: self.skipTest("Not enough rawx. " "Rain tests needs more than 12 rawx to run") self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient() self.container_name = "TestRainContent%f" % time.time() self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() def tearDown(self): super(TestRainContent, self).tearDown() def _test_upload(self, data_size): data = random_data(data_size) content = self.content_factory.new(self.container_id, "titi", len(data), "RAIN") k = 6 m = 2 self.assertEqual(type(content), RainContent) content.upload(StringIO.StringIO(data)) meta, chunks = self.container_client.content_show( cid=self.container_id, content=content.content_id) chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], "RAIN") self.assertEqual(meta['name'], "titi") metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) if metachunk_nb == 0: metachunk_nb = 1 # special case for empty content nb_chunks_min = metachunk_nb * (k + m) - (k - 1) nb_chunks_max = metachunk_nb * (k + m) self.assertEquals(len(chunks) >= nb_chunks_min, True) self.assertEquals(len(chunks) <= nb_chunks_max, True) for metapos in range(metachunk_nb): chunks_at_pos = content.chunks.filter(metapos=metapos) data_chunks_at_pos = chunks_at_pos.filter(is_parity=False) parity_chunks_at_pos = chunks_at_pos.filter(is_parity=True) if metapos < metachunk_nb - 1: self.assertEqual(len(data_chunks_at_pos), k) else: self.assertEquals(len(data_chunks_at_pos) >= 1, True) self.assertEquals(len(data_chunks_at_pos) <= k, True) self.assertEqual(len(parity_chunks_at_pos), m) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(md5_stream(stream), chunk.hash) self.assertEqual(meta['content_size'], str(len(data))) self.assertEqual(meta['content_path'], "titi") self.assertEqual(meta['content_cid'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], chunk.pos) self.assertEqual(meta['chunk_hash'], chunk.hash) data_begin = metapos * self.chunk_size data_end = metapos * self.chunk_size + self.chunk_size target_metachunk_hash = md5_data(data[data_begin:data_end]) metachunk_hash = hashlib.md5() for chunk in data_chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) for d in stream: metachunk_hash.update(d) self.assertEqual(metachunk_hash.hexdigest().upper(), target_metachunk_hash) def test_upload_0_byte(self): self._test_upload(0) def test_upload_1_byte(self): self._test_upload(1) def test_upload_chunksize_bytes(self): self._test_upload(self.chunk_size) def test_upload_chunksize_plus_1_bytes(self): self._test_upload(self.chunk_size + 1) def test_chunks_cleanup_when_upload_failed(self): data = random_data(2 * self.chunk_size) content = self.content_factory.new(self.container_id, "titi", len(data), "RAIN") self.assertEqual(type(content), RainContent) # set bad url for position 1 for chunk in content.chunks.filter(pos="1.p0"): chunk.url = "http://127.0.0.1:9/DEADBEEF" self.assertRaises(Exception, content.upload, StringIO.StringIO(data)) for chunk in content.chunks.exclude(pos="1.p0"): self.assertRaises(NotFound, self.blob_client.chunk_head, chunk.url) def _test_rebuild(self, data_size, broken_pos_list): data = os.urandom(data_size) old_content = self.content_factory.new(self.container_id, "titi", len(data), "RAIN") self.assertEqual(type(old_content), RainContent) old_content.upload(StringIO.StringIO(data)) # get the new structure of the uploaded content uploaded_content = self.content_factory.get(self.container_id, old_content.content_id) old_info = {} for pos in broken_pos_list: old_info[pos] = {} c = uploaded_content.chunks.filter(pos=pos)[0] old_info[pos]["url"] = c.url old_info[pos]["id"] = c.id old_info[pos]["hash"] = c.hash chunk_id_to_rebuild = c.id meta, stream = self.blob_client.chunk_get(c.url) old_info[pos]["dl_meta"] = meta old_info[pos]["dl_hash"] = md5_stream(stream) # delete the chunk self.blob_client.chunk_delete(c.url) # rebuild the broken chunks uploaded_content.rebuild_chunk(chunk_id_to_rebuild) # get the new structure of the content rebuilt_content = self.content_factory.get(self.container_id, uploaded_content.content_id) self.assertEqual(type(rebuilt_content), RainContent) for pos in broken_pos_list: c = rebuilt_content.chunks.filter(pos=pos)[0] rebuilt_meta, rebuilt_stream = self.blob_client.chunk_get(c.url) self.assertEqual(rebuilt_meta["chunk_id"], c.id) self.assertEqual(md5_stream(rebuilt_stream), old_info[pos]["dl_hash"]) self.assertEqual(c.hash, old_info[pos]["hash"]) self.assertThat(c.url, NotEquals(old_info[pos]["url"])) del old_info[pos]["dl_meta"]["chunk_id"] del rebuilt_meta["chunk_id"] self.assertEqual(rebuilt_meta, old_info[pos]["dl_meta"]) def test_content_0_byte_rebuild_pos_0_0(self): self._test_rebuild(0, ["0.0"]) def test_content_0_byte_rebuild_pos_0_0_and_0_p0(self): self._test_rebuild(0, ["0.0", "0.p0"]) def test_content_1_byte_rebuild_pos_0_0(self): self._test_rebuild(1, ["0.0"]) def test_content_1_byte_rebuild_pos_0_p0(self): self._test_rebuild(1, ["0.p0"]) def test_content_1_byte_rebuild_pos_0_0_and_0_p0(self): self._test_rebuild(1, ["0.0", "0.p0"]) def test_content_chunksize_bytes_rebuild_pos_0_0(self): self._test_rebuild(self.conf["chunk_size"], ["0.0"]) def test_content_chunksize_bytes_rebuild_pos_0_0_and_0_1(self): self._test_rebuild(self.conf["chunk_size"], ["0.0", "0.1"]) def test_content_chunksize_bytes_rebuild_pos_0_0_and_0_p0(self): self._test_rebuild(self.conf["chunk_size"], ["0.0", "0.p0"]) def test_content_chunksize_bytes_rebuild_pos_0_p0_and_0_p1(self): self._test_rebuild(self.conf["chunk_size"], ["0.p0", "0.p1"]) def test_content_chunksize_bytes_rebuild_more_than_k_chunk(self): self.assertRaises(UnrecoverableContent, self._test_rebuild, self.conf["chunk_size"], ["0.0", "0.1", "0.2"]) def _new_content(self, data, broken_pos_list=[]): old_content = self.content_factory.new(self.container_id, "titi", len(data), "RAIN") self.assertEqual(type(old_content), RainContent) old_content.upload(StringIO.StringIO(data)) for pos in broken_pos_list: c = old_content.chunks.filter(pos=pos)[0] self.blob_client.chunk_delete(c.url) # get the new structure of the uploaded content return self.content_factory.get(self.container_id, old_content.content_id) def test_orphan_chunk(self): content = self._new_content(random_data(10)) self.assertRaises(OrphanChunk, content.rebuild_chunk, "uNkNoWnId") def test_rebuild_on_the_fly(self): data = random_data(self.conf["chunk_size"]) content = self._new_content(data, ["0.0", "0.p0"]) stream = content.rebuild_metachunk("0", on_the_fly=True) dl_data = "".join(stream) self.assertEqual(dl_data, data) del_chunk_0_0 = content.chunks.filter(pos="0.0")[0] del_chunk_0_p0 = content.chunks.filter(pos="0.p0")[0] self.assertRaises(NotFound, self.blob_client.chunk_get, del_chunk_0_0.url) self.assertRaises(NotFound, self.blob_client.chunk_get, del_chunk_0_p0.url) def _test_download(self, data_size, broken_pos_list): data = random_data(data_size) content = self._new_content(data, broken_pos_list) downloaded_data = "".join(content.download()) self.assertEqual(downloaded_data, data) for pos in broken_pos_list: c = content.chunks.filter(pos=pos)[0] self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url) def test_download_content_0_byte_without_broken_chunks(self): self._test_download(0, []) def test_download_content_1_byte_without_broken_chunks(self): self._test_download(1, []) def test_download_content_chunksize_bytes_without_broken_chunks(self): self._test_download(self.conf["chunk_size"], []) def test_download_content_chunksize_plus_1_without_broken_chunks(self): self._test_download(self.conf["chunk_size"] + 1, []) def test_download_content_0_byte_with_broken_0_0_and_0_p0(self): self._test_download(0, ["0.0", "0.p0"]) def test_download_content_1_byte_with_broken_0_0_and_0_p0(self): self._test_download(1, ["0.0", "0.p0"]) def test_download_content_2xchunksize_with_broken_0_2_and_1_0(self): self._test_download(2 * self.conf["chunk_size"], ["0.2", "1.0"]) def test_download_content_chunksize_bytes_with_3_broken_chunks(self): data = random_data(self.conf["chunk_size"]) content = self._new_content(data, ["0.0", "0.1", "0.2"]) gen = content.download() self.assertRaises(UnrecoverableContent, gen.next) def test_download_interrupt_close(self): data = random_data(self.conf["chunk_size"]) content = self._new_content(data, ["0.p0"]) download_iter = content.download() self.assertEqual(download_iter.next(), data[0:READ_CHUNK_SIZE - 1]) download_iter.close()
class TestECContent(BaseTestCase): def setUp(self): super(TestECContent, self).setUp() if len(self.conf['services']['rawx']) < 12: self.skipTest("Not enough rawx. " "EC tests needs at least 12 rawx to run") self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient(self.conf) self.container_name = "TestECContent%f" % time.time() self.container_client.container_create(account=self.account, reference=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() self.content = "%s-%s" % (self.__class__.__name__, random_str(4)) self.stgpol = "EC" self.size = 1024 * 1024 + 320 self.k = 6 self.m = 3 def tearDown(self): super(TestECContent, self).tearDown() def random_chunks(self, nb): pos = random.sample(xrange(self.k + self.m), nb) return ["0.%s" % i for i in pos] def _test_create(self, data_size): # generate random test data data = random_data(data_size) # using factory create new EC content content = self.content_factory.new(self.container_id, self.content, len(data), self.stgpol) # verify the factory gave us an ECContent self.assertEqual(type(content), ECContent) # perform the content creation content.create(BytesIO(data)) meta, chunks = self.container_client.content_locate( cid=self.container_id, content=content.content_id) # verify metadata chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], self.stgpol) self.assertEqual(meta['name'], self.content) metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) \ if len(data) != 0 else 1 offset = 0 # verify each metachunk for metapos in range(metachunk_nb): chunks_at_pos = content.chunks.filter(metapos=metapos) if len(chunks_at_pos) < 1: break metachunk_size = chunks_at_pos[0].size metachunk_hash = md5_data(data[offset:offset + metachunk_size]) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(meta['metachunk_size'], str(chunk.size)) self.assertEqual(meta['metachunk_hash'], chunk.checksum) self.assertEqual(meta['content_path'], self.content) self.assertEqual(meta['container_id'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], chunk.pos) self.assertEqual(meta['chunk_hash'], md5_stream(stream)) full_path = encode_fullpath(self.account, self.container_name, self.content, meta['content_version'], meta['content_id']) self.assertEqual(meta['full_path'], full_path) self.assertEqual(meta['oio_version'], '4.2') self.assertEqual(metachunk_hash, chunk.checksum) offset += metachunk_size def test_create_0_byte(self): self._test_create(0) def test_create_1_byte(self): self._test_create(1) def test_create(self): self._test_create(DAT_LEGIT_SIZE) def test_create_6294503_bytes(self): self._test_create(6294503) def _test_rebuild(self, data_size, broken_pos_list): # generate test data data = os.urandom(data_size) # create initial content old_content = self.content_factory.new(self.container_id, self.content, len(data), self.stgpol) # verify factory work as intended self.assertEqual(type(old_content), ECContent) # perform initial content creation old_content.create(BytesIO(data)) uploaded_content = self.content_factory.get(self.container_id, old_content.content_id) # break the content old_info = {} for pos in broken_pos_list: old_info[pos] = {} c = uploaded_content.chunks.filter(pos=pos)[0] old_info[pos]["url"] = c.url old_info[pos]["id"] = c.id old_info[pos]["hash"] = c.checksum chunk_id_to_rebuild = c.id meta, stream = self.blob_client.chunk_get(c.url) old_info[pos]["dl_meta"] = meta old_info[pos]["dl_hash"] = md5_stream(stream) # delete the chunk self.blob_client.chunk_delete(c.url) # rebuild the broken chunks uploaded_content.rebuild_chunk(chunk_id_to_rebuild) rebuilt_content = self.content_factory.get(self.container_id, uploaded_content.content_id) # sanity check self.assertEqual(type(rebuilt_content), ECContent) # verify rebuild result for pos in broken_pos_list: c = rebuilt_content.chunks.filter(pos=pos)[0] rebuilt_meta, rebuilt_stream = self.blob_client.chunk_get(c.url) self.assertEqual(rebuilt_meta["chunk_id"], c.id) self.assertEqual(md5_stream(rebuilt_stream), old_info[pos]["dl_hash"]) self.assertEqual(c.checksum, old_info[pos]["hash"]) self.assertNotEqual(c.url, old_info[pos]["url"]) self.assertGreaterEqual(rebuilt_meta['chunk_mtime'], old_info[pos]['dl_meta']['chunk_mtime']) del old_info[pos]["dl_meta"]["chunk_mtime"] del rebuilt_meta["chunk_mtime"] del old_info[pos]["dl_meta"]["chunk_id"] del rebuilt_meta["chunk_id"] self.assertEqual(rebuilt_meta, old_info[pos]["dl_meta"]) def test_content_0_byte_rebuild(self): self._test_rebuild(0, self.random_chunks(1)) def test_content_0_byte_rebuild_advanced(self): self._test_rebuild(0, self.random_chunks(3)) def test_content_1_byte_rebuild(self): self._test_rebuild(1, self.random_chunks(1)) def test_content_1_byte_rebuild_advanced(self): self._test_rebuild(1, self.random_chunks(3)) def test_content_rebuild(self): self._test_rebuild(DAT_LEGIT_SIZE, self.random_chunks(1)) def test_content_rebuild_advanced(self): self._test_rebuild(DAT_LEGIT_SIZE, self.random_chunks(3)) def test_content_rebuild_unrecoverable(self): self.assertRaises(UnrecoverableContent, self._test_rebuild, DAT_LEGIT_SIZE, self.random_chunks(4)) def _new_content(self, data, broken_pos_list=[]): old_content = self.content_factory.new(self.container_id, self.content, len(data), self.stgpol) self.assertEqual(type(old_content), ECContent) old_content.create(BytesIO(data)) # break content for pos in broken_pos_list: c = old_content.chunks.filter(pos=pos)[0] self.blob_client.chunk_delete(c.url) # get the new structure of the uploaded content return self.content_factory.get(self.container_id, old_content.content_id) def test_orphan_chunk(self): content = self._new_content(random_data(10)) self.assertRaises(OrphanChunk, content.rebuild_chunk, "invalid") def _test_fetch(self, data_size, broken_pos_list=None): broken_pos_list = broken_pos_list or [] test_data = random_data(data_size) content = self._new_content(test_data, broken_pos_list) data = b''.join(content.fetch()) self.assertEqual(len(data), len(test_data)) self.assertEqual(md5_data(data), md5_data(test_data)) # verify that chunks are broken for pos in broken_pos_list: chunk = content.chunks.filter(pos=pos)[0] self.assertRaises(NotFound, self.blob_client.chunk_delete, chunk.url) def test_fetch_content_0_byte(self): self._test_fetch(0) def test_fetch_content_1_byte(self): self._test_fetch(1) def test_fetch_content(self): self._test_fetch(DAT_LEGIT_SIZE) def test_fetch_content_0_byte_broken(self): self._test_fetch(0, self.random_chunks(3)) def test_fetch_content_1_byte_broken(self): self._test_fetch(1, self.random_chunks(3)) def test_fetch_content_broken(self): self._test_fetch(DAT_LEGIT_SIZE, self.random_chunks(3)) def test_fetch_content_unrecoverable(self): broken_chunks = self.random_chunks(4) self.assertRaises(OioException, self._test_fetch, DAT_LEGIT_SIZE, broken_chunks)
class TestRebuilderCrawler(BaseTestCase): def setUp(self): super(TestRebuilderCrawler, self).setUp() self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.gridconf = {"namespace": self.namespace} self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient() self.container_name = "TestRebuilderCrawler%d" % int(time.time()) self.container_client.container_create(acct=self.account, ref=self.container_name) def _push_content(self, content): for c in content.chunks: self.blob_client.chunk_put(c.url, c.get_create_xattr(), c.data) self.container_client.content_create(acct=content.account, ref=content.container_name, path=content.content_name, size=content.size, checksum=content.hash, content_id=content.content_id, stgpol=content.stgpol, data=content.get_create_meta2()) def tearDown(self): super(TestRebuilderCrawler, self).tearDown() def test_rebuild_chunk(self): # push a new content content = TestContent(self.conf, self.account, self.container_name, "mycontent", "TWOCOPIES") data = "azerty" content.add_chunk(data, pos='0', rawx=0) content.add_chunk(data, pos='0', rawx=1) self._push_content(content) # rebuild the first rawx rebuilder = BlobRebuilderWorker(self.gridconf, None, self.conf['rawx'][0]['addr']) rebuilder.chunk_rebuild(content.container_id, content.content_id, content.chunks[0].id) # check meta2 information _, res = self.container_client.content_show(acct=content.account, ref=content.container_name, content=content.content_id) new_chunk_info = None for c in res: if (c['url'] != content.chunks[0].url and c['url'] != content.chunks[1].url): new_chunk_info = c new_chunk_id = new_chunk_info['url'].split('/')[-1] self.assertEqual(new_chunk_info['hash'], content.chunks[0].hash) self.assertEqual(new_chunk_info['pos'], content.chunks[0].pos) self.assertEqual(new_chunk_info['size'], content.chunks[0].size) # check chunk information meta, stream = self.blob_client.chunk_get(new_chunk_info['url']) self.assertEqual(meta['content_size'], str(content.chunks[0].size)) self.assertEqual(meta['content_path'], content.content_name) self.assertEqual(meta['content_cid'], content.container_id) self.assertEqual(meta['content_id'], content.content_id) self.assertEqual(meta['chunk_id'], new_chunk_id) self.assertEqual(meta['chunk_pos'], content.chunks[0].pos) self.assertEqual(meta['content_version'], content.version) self.assertEqual(meta['chunk_hash'], content.chunks[0].hash) self.assertEqual(stream.next(), content.chunks[0].data) # check rtime flag in rdir rdir_client = RdirClient(self.gridconf) res = rdir_client.chunk_fetch(self.conf['rawx'][0]['addr']) key = (content.container_id, content.content_id, content.chunks[0].id) for i_container, i_content, i_chunk, i_value in res: if (i_container, i_content, i_chunk) == key: check_value = i_value self.assertIsNotNone(check_value.get('rtime')) @unittest.skipIf(len(get_config()['rawx']) != 3, "The number of rawx must be 3") def test_rebuild_no_spare(self): # push a new content content = TestContent(self.conf, self.account, self.container_name, "mycontent", "THREECOPIES") data = "azerty" content.add_chunk(data, pos='0', rawx=0) content.add_chunk(data, pos='0', rawx=1) content.add_chunk(data, pos='0', rawx=2) self._push_content(content) # rebuild the first rawx rebuilder = BlobRebuilderWorker(self.gridconf, None, self.conf['rawx'][0]['addr']) self.assertRaises(SpareChunkException, rebuilder.chunk_rebuild, content.container_id, content.content_id, content.chunks[0].id) def test_rebuild_upload_failed(self): # push a new content content = TestContent(self.conf, self.account, self.container_name, "mycontent", "TWOCOPIES") data = "azerty" content.add_chunk(data, pos='0', rawx=0) content.add_chunk(data, pos='0', rawx=1) self._push_content(content) # rebuild the first rawx rebuilder = BlobRebuilderWorker(self.gridconf, None, self.conf['rawx'][0]['addr']) # Force upload to raise an exception with patch('oio.content.content.BlobClient') as MockClass: instance = MockClass.return_value instance.chunk_copy.side_effect = Exception("xx") self.assertRaises(UnrecoverableContent, rebuilder.chunk_rebuild, content.container_id, content.content_id, content.chunks[0].id) def test_rebuild_nonexistent_chunk(self): rebuilder = BlobRebuilderWorker(self.gridconf, None, self.conf['rawx'][0]['addr']) # try to rebuild an nonexistant chunk self.assertRaises(OrphanChunk, rebuilder.chunk_rebuild, 64 * '0', 32 * '0', 64 * '0') def test_rebuild_orphan_chunk(self): # push a new content content = TestContent(self.conf, self.account, self.container_name, "mycontent", "TWOCOPIES") data = "azerty" content.add_chunk(data, pos='0', rawx=0) content.add_chunk(data, pos='0', rawx=1) self._push_content(content) # rebuild the first rawx rebuilder = BlobRebuilderWorker(self.gridconf, None, self.conf['rawx'][0]['addr']) # try to rebuild an nonexistant chunk self.assertRaises(OrphanChunk, rebuilder.chunk_rebuild, content.container_id, content.content_id, 64 * '0') def test_rebuild_with_no_copy(self): # push a new content content = TestContent(self.conf, self.account, self.container_name, "mycontent", "SINGLE") data = "azerty" content.add_chunk(data, pos='0', rawx=0) self._push_content(content) # rebuild the first rawx rebuilder = BlobRebuilderWorker(self.gridconf, None, self.conf['rawx'][0]['addr']) # try to rebuild chunk without copy self.assertRaises(UnrecoverableContent, rebuilder.chunk_rebuild, content.container_id, content.content_id, content.chunks[0].id)
class TestPlainContent(BaseTestCase): def setUp(self): super(TestPlainContent, self).setUp() if len(self.conf['services']['rawx']) < 4: self.skipTest( "Plain tests needs more than 3 rawx to run") self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient(self.conf) self.container_name = "TestPlainContent-%f" % time.time() self.container_client.container_create(account=self.account, reference=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() self.content = random_str(64) self.stgpol = "SINGLE" self.stgpol_twocopies = "TWOCOPIES" self.stgpol_threecopies = "THREECOPIES" def _test_create(self, stgpol, data_size): data = random_data(data_size) content = self.content_factory.new(self.container_id, self.content, len(data), stgpol) content.create(BytesIO(data)) meta, chunks = self.container_client.content_locate( cid=self.container_id, content=content.content_id) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], stgpol) self.assertEqual(meta['name'], self.content) metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) if metachunk_nb == 0: metachunk_nb = 1 # special case for empty content chunks = ChunksHelper(chunks) # TODO NO NO NO if stgpol == self.stgpol_threecopies: nb_copy = 3 elif stgpol == self.stgpol_twocopies: nb_copy = 2 elif stgpol == self.stgpol: nb_copy = 1 self.assertEqual(len(chunks), metachunk_nb * nb_copy) for pos in range(metachunk_nb): chunks_at_pos = chunks.filter(pos=pos) self.assertEqual(len(chunks_at_pos), nb_copy) data_begin = pos * self.chunk_size data_end = pos * self.chunk_size + self.chunk_size chunk_hash = md5_data(data[data_begin:data_end]) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(md5_stream(stream), chunk_hash) self.assertEqual(meta['content_path'], self.content) self.assertEqual(meta['container_id'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], str(pos)) # Check that chunk data matches chunk hash from xattr self.assertEqual(meta['chunk_hash'], chunk_hash) # Check that chunk data matches chunk hash from database self.assertEqual(chunk.checksum, chunk_hash) full_path = encode_fullpath( self.account, self.container_name, self.content, meta['content_version'], meta['content_id']) self.assertEqual(meta['full_path'], full_path) self.assertEqual(meta['oio_version'], '4.2') def test_twocopies_create_0_byte(self): self._test_create(self.stgpol_twocopies, 0) def test_twocopies_create_1_byte(self): self._test_create(self.stgpol_twocopies, 1) def test_twocopies_create_chunksize_bytes(self): self._test_create(self.stgpol_twocopies, self.chunk_size) def test_twocopies_create_chunksize_plus_1_bytes(self): self._test_create(self.stgpol_twocopies, self.chunk_size + 1) def test_twocopies_create_6294503_bytes(self): self._test_create(self.stgpol_twocopies, 6294503) def test_single_create_0_byte(self): self._test_create(self.stgpol, 0) def test_single_create_chunksize_plus_1_bytes(self): self._test_create(self.stgpol, self.chunk_size + 1) def _new_content(self, stgpol, data, broken_pos_list=[]): old_content = self.content_factory.new( self.container_id, self.content, len(data), stgpol) old_content.create(BytesIO(data)) broken_chunks_info = {} for pos, idx in broken_pos_list: c = old_content.chunks.filter(pos=pos)[idx] meta, stream = self.blob_client.chunk_get(c.url) if pos not in broken_chunks_info: broken_chunks_info[pos] = {} broken_chunks_info[pos][idx] = { "url": c.url, "id": c.id, "hash": c.checksum, "dl_meta": meta, "dl_hash": md5_stream(stream) } self.blob_client.chunk_delete(c.url) # get the new structure of the uploaded content return (self.content_factory.get( self.container_id, old_content.content_id), broken_chunks_info) def _rebuild_and_check(self, content, broken_chunks_info, full_rebuild_pos, allow_frozen_container=False): rebuild_pos, rebuild_idx = full_rebuild_pos rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx] content.rebuild_chunk(rebuild_chunk_info["id"], allow_frozen_container=allow_frozen_container) # get the new structure of the content rebuilt_content = self.content_factory.get(self.container_id, content.content_id) # find the rebuilt chunk for c in rebuilt_content.chunks.filter(pos=rebuild_pos): if len(content.chunks.filter(id=c.id)) > 0: # not the rebuilt chunk # if this chunk is broken, it must not have been rebuilt for b_c_i in broken_chunks_info[rebuild_pos].values(): if c.id == b_c_i["id"]: with ExpectedException(NotFound): _, _ = self.blob_client.chunk_get(c.url) continue meta, stream = self.blob_client.chunk_get(c.url) self.assertEqual(meta["chunk_id"], c.id) self.assertEqual(md5_stream(stream), rebuild_chunk_info["dl_hash"]) self.assertEqual(c.checksum, rebuild_chunk_info["hash"]) self.assertThat(c.url, NotEquals(rebuild_chunk_info["url"])) del meta["chunk_id"] del rebuild_chunk_info["dl_meta"]["chunk_id"] self.assertEqual(meta, rebuild_chunk_info["dl_meta"]) def _test_rebuild(self, stgpol, data_size, broken_pos_list, full_rebuild_pos): data = random_data(data_size) content, broken_chunks_info = self._new_content( stgpol, data, broken_pos_list) self._rebuild_and_check(content, broken_chunks_info, full_rebuild_pos) def test_2copies_content_0_byte_1broken_rebuild_pos_0_idx_0(self): self._test_rebuild(self.stgpol_twocopies, 0, [(0, 0)], (0, 0)) def test_2copies_content_1_byte_1broken_rebuild_pos_0_idx_1(self): self._test_rebuild(self.stgpol_twocopies, 1, [(0, 1)], (0, 1)) def test_3copies_content_chunksize_bytes_2broken_rebuild_pos_0_idx_1(self): if len(self.conf['services']['rawx']) <= 3: self.skipTest("Need more than 3 rawx") self._test_rebuild(self.stgpol_threecopies, self.chunk_size, [(0, 0), (0, 1)], (0, 1)) def test_3copies_content_2xchksize_bytes_2broken_rebuild_pos_1_idx_2(self): self._test_rebuild(self.stgpol_threecopies, 2 * self.chunk_size, [(1, 0), (1, 2)], (1, 2)) def test_2copies_content_0_byte_2broken_rebuild_pos_0_idx_0(self): with ExpectedException(UnrecoverableContent): self._test_rebuild( self.stgpol_twocopies, 0, [(0, 0), (0, 1)], (0, 0)) def test_rebuild_chunk_in_frozen_container(self): data = random_data(self.chunk_size) content, broken_chunks_info = self._new_content( self.stgpol_twocopies, data, [(0, 0)]) system = dict() system['sys.status'] = str(OIO_DB_FROZEN) self.container_client.container_set_properties( self.account, self.container_name, None, system=system) try: full_rebuild_pos = (0, 0) rebuild_pos, rebuild_idx = full_rebuild_pos rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx] self.assertRaises(ServiceBusy, content.rebuild_chunk, rebuild_chunk_info["id"]) finally: system['sys.status'] = str(OIO_DB_ENABLED) self.container_client.container_set_properties( self.account, self.container_name, None, system=system) self._rebuild_and_check(content, broken_chunks_info, full_rebuild_pos, allow_frozen_container=True) def _test_fetch(self, stgpol, data_size, broken_pos_list): data = random_data(data_size) content, _ = self._new_content(stgpol, data, broken_pos_list) fetched_data = "".join(content.fetch()) self.assertEqual(fetched_data, data) for pos, idx in broken_pos_list: # check nothing has been rebuilt c = content.chunks.filter(pos=pos)[0] self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url) def test_twocopies_fetch_content_0_byte_without_broken_chunks(self): self._test_fetch(self.stgpol_twocopies, 0, []) def test_twocopies_fetch_content_0_byte_with_broken_0_0(self): self._test_fetch(self.stgpol_twocopies, 0, [(0, 0)]) def test_twocopies_fetch_content_1_byte_without_broken_chunks(self): self._test_fetch(self.stgpol_twocopies, 1, []) def test_twocopies_fetch_content_1_byte_with_broken_0_0(self): self._test_fetch(self.stgpol_twocopies, 1, [(0, 0)]) def test_twocopies_fetch_chunksize_bytes_without_broken_chunks(self): self._test_fetch(self.stgpol_twocopies, self.chunk_size, []) def test_twocopies_fetch_2xchuksize_bytes_with_broken_0_0_and_1_0(self): self._test_fetch( self.stgpol_twocopies, self.chunk_size * 2, [(0, 0), (1, 0)]) def test_twocopies_fetch_content_chunksize_bytes_2_broken_chunks(self): data = random_data(self.chunk_size) content, _ = self._new_content( self.stgpol_twocopies, data, [(0, 0), (0, 1)]) gen = content.fetch() self.assertRaises(UnrecoverableContent, gen.next) def test_single_fetch_content_1_byte_without_broken_chunks(self): self._test_fetch(self.stgpol, 1, []) def test_single_fetch_chunksize_bytes_plus_1_without_broken_chunk(self): self._test_fetch(self.stgpol, self.chunk_size * 2, [])
class TestECContent(BaseTestCase): def setUp(self): super(TestECContent, self).setUp() if len(self.conf['services']['rawx']) < 12: self.skipTest("Not enough rawx. " "EC tests needs at least 12 rawx to run") self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient() self.container_name = "TestECContent%f" % time.time() self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() self.content = random_str(64) self.stgpol = "EC" self.size = 1024*1024 + 320 self.k = 6 self.m = 3 def tearDown(self): super(TestECContent, self).tearDown() def random_chunks(self, nb): l = random.sample(xrange(self.k + self.m), nb) return ["0.%s" % i for i in l] def _test_create(self, data_size): # generate random test data data = random_data(data_size) # using factory create new EC content content = self.content_factory.new( self.container_id, self.content, len(data), self.stgpol) # verify the factory gave us an ECContent self.assertEqual(type(content), ECContent) # perform the content creation content.create(StringIO(data)) meta, chunks = self.container_client.content_show( cid=self.container_id, content=content.content_id) # verify metadata chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], self.stgpol) self.assertEqual(meta['name'], self.content) metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) \ if len(data) != 0 else 1 # verify each metachunk for metapos in range(metachunk_nb): chunks_at_pos = content.chunks.filter(metapos=metapos) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(meta['metachunk_size'], str(chunk.size)) self.assertEqual(meta['metachunk_hash'], chunk.checksum) self.assertEqual(meta['content_path'], self.content) self.assertEqual(meta['container_id'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], chunk.pos) self.assertEqual(meta['chunk_hash'], md5_stream(stream)) def test_create_0_byte(self): self._test_create(0) def test_create_1_byte(self): self._test_create(1) def test_create(self): self._test_create(DAT_LEGIT_SIZE) def _test_rebuild(self, data_size, broken_pos_list): # generate test data data = os.urandom(data_size) # create initial content old_content = self.content_factory.new( self.container_id, self.content, len(data), self.stgpol) # verify factory work as intended self.assertEqual(type(old_content), ECContent) # perform initial content creation old_content.create(StringIO(data)) uploaded_content = self.content_factory.get(self.container_id, old_content.content_id) # break the content old_info = {} for pos in broken_pos_list: old_info[pos] = {} c = uploaded_content.chunks.filter(pos=pos)[0] old_info[pos]["url"] = c.url old_info[pos]["id"] = c.id old_info[pos]["hash"] = c.checksum chunk_id_to_rebuild = c.id meta, stream = self.blob_client.chunk_get(c.url) old_info[pos]["dl_meta"] = meta old_info[pos]["dl_hash"] = md5_stream(stream) # delete the chunk self.blob_client.chunk_delete(c.url) # rebuild the broken chunks uploaded_content.rebuild_chunk(chunk_id_to_rebuild) rebuilt_content = self.content_factory.get(self.container_id, uploaded_content.content_id) # sanity check self.assertEqual(type(rebuilt_content), ECContent) # verify rebuild result for pos in broken_pos_list: c = rebuilt_content.chunks.filter(pos=pos)[0] rebuilt_meta, rebuilt_stream = self.blob_client.chunk_get(c.url) self.assertEqual(rebuilt_meta["chunk_id"], c.id) self.assertEqual(md5_stream(rebuilt_stream), old_info[pos]["dl_hash"]) self.assertEqual(c.checksum, old_info[pos]["hash"]) self.assertNotEqual(c.url, old_info[pos]["url"]) del old_info[pos]["dl_meta"]["chunk_id"] del rebuilt_meta["chunk_id"] self.assertEqual(rebuilt_meta, old_info[pos]["dl_meta"]) def test_content_0_byte_rebuild(self): self._test_rebuild(0, self.random_chunks(1)) def test_content_0_byte_rebuild_advanced(self): self._test_rebuild(0, self.random_chunks(3)) def test_content_1_byte_rebuild(self): self._test_rebuild(1, self.random_chunks(1)) def test_content_1_byte_rebuild_advanced(self): self._test_rebuild(1, self.random_chunks(3)) def test_content_rebuild(self): self._test_rebuild(DAT_LEGIT_SIZE, self.random_chunks(1)) def test_content_rebuild_advanced(self): self._test_rebuild(DAT_LEGIT_SIZE, self.random_chunks(3)) def test_content_rebuild_unrecoverable(self): self.assertRaises( UnrecoverableContent, self._test_rebuild, DAT_LEGIT_SIZE, self.random_chunks(4)) def _new_content(self, data, broken_pos_list=[]): old_content = self.content_factory.new( self.container_id, self.content, len(data), self.stgpol) self.assertEqual(type(old_content), ECContent) old_content.create(StringIO(data)) # break content for pos in broken_pos_list: c = old_content.chunks.filter(pos=pos)[0] self.blob_client.chunk_delete(c.url) # get the new structure of the uploaded content return self.content_factory.get(self.container_id, old_content.content_id) def test_orphan_chunk(self): content = self._new_content(random_data(10)) self.assertRaises(OrphanChunk, content.rebuild_chunk, "invalid") def _test_fetch(self, data_size, broken_pos_list=None): broken_pos_list = broken_pos_list or [] test_data = random_data(data_size) content = self._new_content(test_data, broken_pos_list) data = "".join(content.fetch()) self.assertEqual(len(data), len(test_data)) self.assertEqual(md5_data(data), md5_data(test_data)) # verify that chunks are broken for pos in broken_pos_list: chunk = content.chunks.filter(pos=pos)[0] self.assertRaises( NotFound, self.blob_client.chunk_delete, chunk.url) def test_fetch_content_0_byte(self): self._test_fetch(0) def test_fetch_content_1_byte(self): self._test_fetch(1) def test_fetch_content(self): self._test_fetch(DAT_LEGIT_SIZE) def test_fetch_content_0_byte_broken(self): self._test_fetch(0, self.random_chunks(3)) def test_fetch_content_1_byte_broken(self): self._test_fetch(1, self.random_chunks(3)) def test_fetch_content_broken(self): self._test_fetch(DAT_LEGIT_SIZE, self.random_chunks(3)) def test_fetch_content_unrecoverable(self): broken_chunks = self.random_chunks(4) self.assertRaises( OioException, self._test_fetch, DAT_LEGIT_SIZE, broken_chunks)