class ChunkOperator(object): """ Execute maintenance operations on chunks. """ def __init__(self, conf, logger=None): self.conf = conf self.logger = logger or get_logger(conf) self.rdir_client = RdirClient(conf, logger=self.logger) self.content_factory = ContentFactory(conf, logger=self.logger) def rebuild(self, container_id, content_id, chunk_id_or_pos, rawx_id=None, try_chunk_delete=False, allow_same_rawx=True): """ Try to find the chunk in the metadata of the specified object, then rebuild it. """ try: content = self.content_factory.get(container_id, content_id) except ContentNotFound: raise OrphanChunk('Content not found: possible orphan chunk') chunk_size = 0 chunk_pos = None if len(chunk_id_or_pos) < 32: chunk_pos = chunk_id_or_pos chunk_id = None metapos = int(chunk_pos.split('.', 1)[0]) chunk_size = content.chunks.filter(metapos=metapos).all()[0].size else: if '/' in chunk_id_or_pos: chunk_id = chunk_id_or_pos.rsplit('/', 1)[-1] else: chunk_id = chunk_id_or_pos chunk = content.chunks.filter(id=chunk_id).one() if chunk is None: raise OrphanChunk( 'Chunk not found in content: possible orphan chunk') elif rawx_id and chunk.host != rawx_id: raise ValueError('Chunk does not belong to this rawx') chunk_size = chunk.size content.rebuild_chunk( chunk_id, allow_same_rawx=allow_same_rawx, chunk_pos=chunk_pos) if try_chunk_delete: try: content.blob_client.chunk_delete(chunk.url) self.logger.info("Chunk %s deleted", chunk.url) except NotFound as exc: self.logger.debug("Chunk %s: %s", chunk.url, exc) # This call does not raise exception if chunk is not referenced if chunk_id is not None: self.rdir_client.chunk_delete( chunk.host, container_id, content_id, chunk_id) return chunk_size
class TestFilters(BaseTestCase): def setUp(self): with mock.patch('oio.container.client.gen_headers', gen_headers_mock): super(TestFilters, self).setUp() self.account = self.conf['account'] self.namespace = self.conf['namespace'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {'namespace': self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_name = 'TestFilter%f' % time.time() self.blob_client = BlobClient() self.container_client = ContainerClient(self.gridconf) self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() self.stgpol = "SINGLE" def _new_content(self, data, path): old_content = self.content_factory.new(self.container_id, path, len(data), self.stgpol) old_content.create(StringIO.StringIO(data)) return self.content_factory.get(self.container_id, old_content.content_id) def test_slave_and_admin(self): if not os.getenv("SLAVE"): self.skipTest("must be in slave mode") data = random_data(10) path = 'test_slave' try: self._new_content(data, path) self.assertTrue(None) except ClientException as e: print str(e) self.assertTrue(str(e).find('NS slave!') != -1) with mock.patch('oio.container.client.gen_headers', gen_headers_mock): content = self._new_content(data, path) content.delete() def test_worm_and_admin(self): if not os.getenv("WORM"): self.skipTest("must be in worm mode") data = random_data(10) path = 'test_worm' content = self._new_content(data, path) try: content.delete() self.assertTrue(None) except ClientException as e: self.assertTrue(str(e).find('NS wormed!') != -1) downloaded_data = ''.join(content.fetch()) self.assertEqual(downloaded_data, data) with mock.patch('oio.container.client.gen_headers', gen_headers_mock): content.delete()
class TestFilters(BaseTestCase): def setUp(self): with mock.patch('oio.container.client.gen_headers', gen_headers_mock): super(TestFilters, self).setUp() self.account = self.conf['account'] self.namespace = self.conf['namespace'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {'namespace': self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_name = 'TestFilter%f' % time.time() self.blob_client = BlobClient() self.container_client = ContainerClient(self.gridconf) self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() self.stgpol = "SINGLE" def _new_content(self, data, path): old_content = self.content_factory.new(self.container_id, path, len(data), self.stgpol) old_content.create(StringIO.StringIO(data)) return self.content_factory.get(self.container_id, old_content.content_id) def test_slave_and_admin(self): if not os.getenv("SLAVE"): self.skipTest("must be in slave mode") data = random_data(10) path = 'test_slave' try: self._new_content(data, path) self.assertTrue(None) except ClientException as e: print str(e) self.assertTrue(str(e).find('NS slave!') != -1) with mock.patch('oio.container.client.gen_headers', gen_headers_mock): content = self._new_content(data, path) content.delete() def test_worm_and_admin(self): if not os.getenv("WORM"): self.skipTest("must be in worm mode") data = random_data(10) path = 'test_worm' content = self._new_content(data, path) try: content.delete() self.assertTrue(None) except ClientException as e: self.assertTrue(str(e).find('NS wormed!') != -1) downloaded_data = ''.join(content.fetch()) self.assertEqual(downloaded_data, data) with mock.patch('oio.container.client.gen_headers', gen_headers_mock): content.delete()
class TestContentFactory(BaseTestCase): def setUp(self): super(TestContentFactory, self).setUp() self.namespace = self.conf['namespace'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_name = "TestContentFactory%f" % time.time() self.container_client = ContainerClient(self.gridconf) self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() def tearDown(self): super(TestContentFactory, self).tearDown() def test_extract_datasec(self): self.content_factory.ns_info = { "data_security": { "DUPONETWO": "DUP:distance=1|nb_copy=2", "RAIN": "RAIN:k=6|m=2|algo=liber8tion" }, "storage_policy": { "RAIN": "NONE:RAIN:NONE", "SINGLE": "NONE:NONE:NONE", "TWOCOPIES": "NONE:DUPONETWO:NONE" } } ds_type, ds_args = self.content_factory._extract_datasec("RAIN") self.assertEqual(ds_type, "RAIN") self.assertEqual(ds_args, {"k": "6", "m": "2", "algo": "liber8tion"}) ds_type, ds_args = self.content_factory._extract_datasec("SINGLE") self.assertEqual(ds_type, "DUP") self.assertEqual(ds_args, {"nb_copy": "1", "distance": "0"}) ds_type, ds_args = self.content_factory._extract_datasec("TWOCOPIES") self.assertEqual(ds_type, "DUP") self.assertEqual(ds_args, {"nb_copy": "2", "distance": "1"}) self.assertRaises(InconsistentContent, self.content_factory._extract_datasec, "UnKnOwN") def test_get_rain(self): meta = { "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash-method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime-type": "application/octet-stream", "name": "tox.ini", "policy": "RAIN", "version": "1450176946676289" } chunks = [{ "url": "http://127.0.0.1:6012/A0A0", "pos": "0.p0", "size": 512, "hash": "E7D4E4AD460971CA2E3141F2102308D4" }, { "url": "http://127.0.0.1:6010/A01", "pos": "0.1", "size": 146, "hash": "760AB5DA7C51A3654F1CA622687CD6C3" }, { "url": "http://127.0.0.1:6011/A00", "pos": "0.0", "size": 512, "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB" }, { "url": "http://127.0.0.1:6013/A0A1", "pos": "0.p1", "size": 512, "hash": "DA9D7F72AEEA5791565724424CE45C16" }] self.content_factory.container_client.content_show = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id") self.assertEqual(type(c), RainContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual(c.version, "1450176946676289") self.assertEqual(c.algo, "liber8tion") self.assertEqual(c.k, 6) self.assertEqual(c.m, 2) self.assertEqual(len(c.chunks), 4) self.assertEqual(c.chunks[0].raw(), chunks[2]) self.assertEqual(c.chunks[1].raw(), chunks[1]) self.assertEqual(c.chunks[2].raw(), chunks[0]) self.assertEqual(c.chunks[3].raw(), chunks[3]) def test_get_dup(self): meta = { "chunk-method": "plain/bytes", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash-method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime-type": "application/octet-stream", "name": "tox.ini", "policy": "TWOCOPIES", "version": "1450176946676289" } chunks = [{ "url": "http://127.0.0.1:6010/A0", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73" }, { "url": "http://127.0.0.1:6011/A1", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73" }] self.content_factory.container_client.content_show = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id") self.assertEqual(type(c), DupContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual(c.version, "1450176946676289") self.assertEqual(c.nb_copy, 2) self.assertEqual(c.distance, 1) self.assertEqual(len(c.chunks), 2) self.assertEqual(c.chunks[0].raw(), chunks[0]) self.assertEqual(c.chunks[1].raw(), chunks[1]) def test_new_rain(self): meta = { "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2", "ctime": "1450341162", "deleted": "False", "hash": "", "hash-method": "md5", "id": "F4B1C8DD132705007DE8B43D0709DAA2", "length": "1000", "mime-type": "application/octet-stream", "name": "titi", "policy": "RAIN", "version": "1450341162332663" } chunks = [{ "url": "http://127.0.0.1:6010/0_p1", "pos": "0.p1", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6011/0_p0", "pos": "0.p0", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6016/0_1", "pos": "0.1", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6017/0_0", "pos": "0.0", "size": 1048576, "hash": "00000000000000000000000000000000" }] self.content_factory.container_client.content_prepare = Mock( return_value=(meta, chunks)) c = self.content_factory.new("xxx_container_id", "titi", 1000, "RAIN") self.assertEqual(type(c), RainContent) self.assertEqual(c.content_id, "F4B1C8DD132705007DE8B43D0709DAA2") self.assertEqual(c.length, 1000) self.assertEqual(c.path, "titi") self.assertEqual(c.version, "1450341162332663") self.assertEqual(c.algo, "liber8tion") self.assertEqual(c.k, 6) self.assertEqual(c.m, 2) self.assertEqual(len(c.chunks), 4) self.assertEqual(c.chunks[0].raw(), chunks[3]) self.assertEqual(c.chunks[1].raw(), chunks[2]) self.assertEqual(c.chunks[2].raw(), chunks[1]) self.assertEqual(c.chunks[3].raw(), chunks[0]) def _new_content(self, stgpol, data): old_content = self.content_factory.new(self.container_id, "titi", len(data), stgpol) old_content.upload(StringIO.StringIO(data)) return self.content_factory.get(self.container_id, old_content.content_id) def _test_change_policy(self, data_size, old_policy, new_policy): if (old_policy == "RAIN" or new_policy == "RAIN") \ and len(self.conf['rawx']) < 8: self.skipTest("RAIN: Need more than 8 rawx to run") data = random_data(data_size) obj_type = { "SINGLE": DupContent, "TWOCOPIES": DupContent, "THREECOPIES": DupContent, "RAIN": RainContent } old_content = self._new_content(old_policy, data) self.assertEqual(type(old_content), obj_type[old_policy]) changed_content = self.content_factory.change_policy( old_content.container_id, old_content.content_id, new_policy) self.assertRaises(NotFound, self.container_client.content_show, self.account, cid=old_content.container_id, content=old_content.content_id) new_content = self.content_factory.get(self.container_id, changed_content.content_id) self.assertEqual(type(new_content), obj_type[new_policy]) downloaded_data = "".join(new_content.download()) self.assertEqual(downloaded_data, data) # TODO add tests with RAIN empty contents when supported def test_change_content_1_byte_policy_single_to_rain(self): self._test_change_policy(1, "SINGLE", "RAIN") def test_change_content_chunksize_bytes_policy_twocopies_to_rain(self): self._test_change_policy(self.chunk_size, "TWOCOPIES", "RAIN") def test_change_content_2xchunksize_bytes_policy_threecopies_to_rain(self): self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "RAIN") def test_change_content_1_byte_policy_rain_to_threecopies(self): self._test_change_policy(1, "RAIN", "THREECOPIES") def test_change_content_chunksize_bytes_policy_rain_to_twocopies(self): self._test_change_policy(self.chunk_size, "RAIN", "TWOCOPIES") def test_change_content_2xchunksize_bytes_policy_rain_to_single(self): self._test_change_policy(self.chunk_size * 2, "RAIN", "SINGLE") def test_change_content_0_byte_policy_twocopies_to_threecopies(self): self._test_change_policy(0, "TWOCOPIES", "THREECOPIES") def test_change_content_chunksize_bytes_policy_single_to_twocopies(self): self._test_change_policy(self.chunk_size, "SINGLE", "TWOCOPIES") def test_change_content_2xchunksize_bytes_policy_3copies_to_single(self): self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "SINGLE") def test_change_content_with_same_policy(self): data = random_data(10) old_content = self._new_content("TWOCOPIES", data) changed_content = self.content_factory.change_policy( old_content.container_id, old_content.content_id, "TWOCOPIES") self.assertEqual(old_content.content_id, changed_content.content_id) def test_change_policy_unknown_content(self): self.assertRaises(ContentNotFound, self.content_factory.change_policy, self.container_id, "1234", "SINGLE") def test_change_policy_unknown_storage_policy(self): data = random_data(10) old_content = self._new_content("TWOCOPIES", data) self.assertRaises(ClientException, self.content_factory.change_policy, self.container_id, old_content.content_id, "UnKnOwN")
class TestDupContent(BaseTestCase): def setUp(self): super(TestDupContent, self).setUp() if len(self.conf['rawx']) < 3: self.skipTest("Not enough rawx. " "Dup tests needs more than 2 rawx to run") self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient() self.container_name = "TestDupContent%f" % time.time() self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() def tearDown(self): super(TestDupContent, self).tearDown() def _test_upload(self, stgpol, data_size): data = random_data(data_size) content = self.content_factory.new(self.container_id, "titi", len(data), stgpol) self.assertEqual(type(content), DupContent) content.upload(StringIO.StringIO(data)) meta, chunks = self.container_client.content_show( cid=self.container_id, content=content.content_id) chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], stgpol) self.assertEqual(meta['name'], "titi") metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) if metachunk_nb == 0: metachunk_nb = 1 # special case for empty content if stgpol == "THREECOPIES": nb_copy = 3 elif stgpol == "TWOCOPIES": nb_copy = 2 elif stgpol == "SINGLE": nb_copy = 1 self.assertEqual(len(chunks), metachunk_nb * nb_copy) for pos in range(metachunk_nb): chunks_at_pos = chunks.filter(pos=pos) self.assertEqual(len(chunks_at_pos), nb_copy) data_begin = pos * self.chunk_size data_end = pos * self.chunk_size + self.chunk_size chunk_hash = md5_data(data[data_begin:data_end]) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(md5_stream(stream), chunk_hash) self.assertEqual(meta['content_size'], str(len(data))) self.assertEqual(meta['content_path'], "titi") self.assertEqual(meta['content_cid'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], str(pos)) self.assertEqual(meta['chunk_hash'], chunk_hash) def test_twocopies_upload_0_byte(self): self._test_upload("TWOCOPIES", 0) def test_twocopies_upload_1_byte(self): self._test_upload("TWOCOPIES", 1) def test_twocopies_upload_chunksize_bytes(self): self._test_upload("TWOCOPIES", self.chunk_size) def test_twocopies_upload_chunksize_plus_1_bytes(self): self._test_upload("TWOCOPIES", self.chunk_size + 1) def test_single_upload_0_byte(self): self._test_upload("SINGLE", 0) def test_single_upload_chunksize_plus_1_bytes(self): self._test_upload("SINGLE", self.chunk_size + 1) def test_chunks_cleanup_when_upload_failed(self): data = random_data(2 * self.chunk_size) content = self.content_factory.new(self.container_id, "titi", len(data), "TWOCOPIES") self.assertEqual(type(content), DupContent) # set bad url for position 1 for chunk in content.chunks.filter(pos=1): chunk.url = "http://127.0.0.1:9/DEADBEEF" self.assertRaises(Exception, content.upload, StringIO.StringIO(data)) for chunk in content.chunks.exclude(pos=1): self.assertRaises(NotFound, self.blob_client.chunk_head, chunk.url) def _new_content(self, stgpol, data, broken_pos_list): old_content = self.content_factory.new(self.container_id, "titi", len(data), stgpol) self.assertEqual(type(old_content), DupContent) old_content.upload(StringIO.StringIO(data)) for pos, idx in broken_pos_list: c = old_content.chunks.filter(pos=pos)[idx] self.blob_client.chunk_delete(c.url) # get the new structure of the uploaded content return self.content_factory.get(self.container_id, old_content.content_id) def _test_download(self, stgpol, data_size, broken_pos_list): data = random_data(data_size) content = self._new_content(stgpol, data, broken_pos_list) downloaded_data = "".join(content.download()) self.assertEqual(downloaded_data, data) for pos, idx in broken_pos_list: # check nothing has been rebuilt c = content.chunks.filter(pos=pos)[0] self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url) def test_twocopies_download_content_0_byte_without_broken_chunks(self): self._test_download("TWOCOPIES", 0, []) def test_twocopies_download_content_0_byte_with_broken_0_0(self): self._test_download("TWOCOPIES", 0, [(0, 0)]) def test_twocopies_download_content_1_byte_without_broken_chunks(self): self._test_download("TWOCOPIES", 1, []) def test_twocopies_download_content_1_byte_with_broken_0_0(self): self._test_download("TWOCOPIES", 1, [(0, 0)]) def test_twocopies_download_chunksize_bytes_without_broken_chunks(self): self._test_download("TWOCOPIES", self.chunk_size, []) def test_twocopies_download_2xchuksize_bytes_with_broken_0_0_and_1_0(self): self._test_download("TWOCOPIES", self.chunk_size * 2, [(0, 0), (1, 0)]) def test_twocopies_download_content_chunksize_bytes_2_broken_chunks(self): data = random_data(self.chunk_size) content = self._new_content("TWOCOPIES", data, [(0, 0), (0, 1)]) gen = content.download() self.assertRaises(UnrecoverableContent, gen.next) def test_single_download_content_1_byte_without_broken_chunks(self): self._test_download("SINGLE", 1, []) def test_single_download_chunksize_bytes_plus_1_without_broken_chunk(self): self._test_download("SINGLE", self.chunk_size * 2, [])
class TestDupContent(BaseTestCase): def setUp(self): super(TestDupContent, self).setUp() if len(self.conf['rawx']) < 3: self.skipTest("Not enough rawx. " "Dup tests needs more than 2 rawx to run") self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient() self.container_name = "TestDupContent%f" % time.time() self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() def tearDown(self): super(TestDupContent, self).tearDown() def _test_upload(self, stgpol, data_size): data = random_data(data_size) content = self.content_factory.new(self.container_id, "titi", len(data), stgpol) self.assertEqual(type(content), DupContent) content.upload(StringIO.StringIO(data)) meta, chunks = self.container_client.content_show( cid=self.container_id, content=content.content_id) chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], stgpol) self.assertEqual(meta['name'], "titi") metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) if metachunk_nb == 0: metachunk_nb = 1 # special case for empty content if stgpol == "THREECOPIES": nb_copy = 3 elif stgpol == "TWOCOPIES": nb_copy = 2 elif stgpol == "SINGLE": nb_copy = 1 self.assertEqual(len(chunks), metachunk_nb * nb_copy) for pos in range(metachunk_nb): chunks_at_pos = chunks.filter(pos=pos) self.assertEqual(len(chunks_at_pos), nb_copy) data_begin = pos * self.chunk_size data_end = pos * self.chunk_size + self.chunk_size chunk_hash = md5_data(data[data_begin:data_end]) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(md5_stream(stream), chunk_hash) self.assertEqual(meta['content_size'], str(len(data))) self.assertEqual(meta['content_path'], "titi") self.assertEqual(meta['content_cid'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], str(pos)) self.assertEqual(meta['chunk_hash'], chunk_hash) def test_twocopies_upload_0_byte(self): self._test_upload("TWOCOPIES", 0) def test_twocopies_upload_1_byte(self): self._test_upload("TWOCOPIES", 1) def test_twocopies_upload_chunksize_bytes(self): self._test_upload("TWOCOPIES", self.chunk_size) def test_twocopies_upload_chunksize_plus_1_bytes(self): self._test_upload("TWOCOPIES", self.chunk_size + 1) def test_single_upload_0_byte(self): self._test_upload("SINGLE", 0) def test_single_upload_chunksize_plus_1_bytes(self): self._test_upload("SINGLE", self.chunk_size + 1) def test_chunks_cleanup_when_upload_failed(self): data = random_data(2 * self.chunk_size) content = self.content_factory.new(self.container_id, "titi", len(data), "TWOCOPIES") self.assertEqual(type(content), DupContent) # set bad url for position 1 for chunk in content.chunks.filter(pos=1): chunk.url = "http://127.0.0.1:9/DEADBEEF" self.assertRaises(Exception, content.upload, StringIO.StringIO(data)) for chunk in content.chunks.exclude(pos=1): self.assertRaises(NotFound, self.blob_client.chunk_head, chunk.url) def _new_content(self, stgpol, data, broken_pos_list=[]): old_content = self.content_factory.new(self.container_id, "titi", len(data), stgpol) self.assertEqual(type(old_content), DupContent) old_content.upload(StringIO.StringIO(data)) broken_chunks_info = {} for pos, idx in broken_pos_list: c = old_content.chunks.filter(pos=pos)[idx] meta, stream = self.blob_client.chunk_get(c.url) if pos not in broken_chunks_info: broken_chunks_info[pos] = {} broken_chunks_info[pos][idx] = { "url": c.url, "id": c.id, "hash": c.hash, "dl_meta": meta, "dl_hash": md5_stream(stream) } self.blob_client.chunk_delete(c.url) # get the new structure of the uploaded content return (self.content_factory.get( self.container_id, old_content.content_id), broken_chunks_info) def _test_rebuild(self, stgpol, data_size, broken_pos_list, full_rebuild_pos): data = random_data(data_size) content, broken_chunks_info = self._new_content(stgpol, data, broken_pos_list) rebuild_pos, rebuild_idx = full_rebuild_pos rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx] content.rebuild_chunk(rebuild_chunk_info["id"]) # get the new structure of the content rebuilt_content = self.content_factory.get(self.container_id, content.content_id) self.assertEqual(type(rebuilt_content), DupContent) # find the rebuilt chunk for c in rebuilt_content.chunks.filter(pos=rebuild_pos): if len(content.chunks.filter(id=c.id)) > 0: # not the rebuilt chunk # if this chunk is broken, it must not have been rebuilt for b_c_i in broken_chunks_info[rebuild_pos].values(): if c.id == b_c_i["id"]: with ExpectedException(NotFound): _, _ = self.blob_client.chunk_get(c.url) continue meta, stream = self.blob_client.chunk_get(c.url) self.assertEqual(meta["chunk_id"], c.id) self.assertEqual(md5_stream(stream), rebuild_chunk_info["dl_hash"]) self.assertEqual(c.hash, rebuild_chunk_info["hash"]) self.assertThat(c.url, NotEquals(rebuild_chunk_info["url"])) del meta["chunk_id"] del rebuild_chunk_info["dl_meta"]["chunk_id"] self.assertEqual(meta, rebuild_chunk_info["dl_meta"]) def test_2copies_content_0_byte_1broken_rebuild_pos_0_idx_0(self): self._test_rebuild("TWOCOPIES", 0, [(0, 0)], (0, 0)) def test_2copies_content_1_byte_1broken_rebuild_pos_0_idx_1(self): self._test_rebuild("TWOCOPIES", 1, [(0, 1)], (0, 1)) def test_3copies_content_chunksize_bytes_2broken_rebuild_pos_0_idx_1(self): if len(self.conf['rawx']) <= 3: self.skipTest("Need more than 3 rawx") self._test_rebuild("THREECOPIES", self.chunk_size, [(0, 0), (0, 1)], (0, 1)) def test_3copies_content_2xchksize_bytes_2broken_rebuild_pos_1_idx_2(self): if len(self.conf['rawx']) <= 3: self.skipTest("Need more than 3 rawx") self._test_rebuild("THREECOPIES", 2 * self.chunk_size, [(1, 0), (1, 2)], (1, 2)) def test_2copies_content_0_byte_2broken_rebuild_pos_0_idx_0(self): with ExpectedException(UnrecoverableContent): self._test_rebuild("TWOCOPIES", 0, [(0, 0), (0, 1)], (0, 0)) def _test_download(self, stgpol, data_size, broken_pos_list): data = random_data(data_size) content, _ = self._new_content(stgpol, data, broken_pos_list) downloaded_data = "".join(content.download()) self.assertEqual(downloaded_data, data) for pos, idx in broken_pos_list: # check nothing has been rebuilt c = content.chunks.filter(pos=pos)[0] self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url) def test_twocopies_download_content_0_byte_without_broken_chunks(self): self._test_download("TWOCOPIES", 0, []) def test_twocopies_download_content_0_byte_with_broken_0_0(self): self._test_download("TWOCOPIES", 0, [(0, 0)]) def test_twocopies_download_content_1_byte_without_broken_chunks(self): self._test_download("TWOCOPIES", 1, []) def test_twocopies_download_content_1_byte_with_broken_0_0(self): self._test_download("TWOCOPIES", 1, [(0, 0)]) def test_twocopies_download_chunksize_bytes_without_broken_chunks(self): self._test_download("TWOCOPIES", self.chunk_size, []) def test_twocopies_download_2xchuksize_bytes_with_broken_0_0_and_1_0(self): self._test_download("TWOCOPIES", self.chunk_size * 2, [(0, 0), (1, 0)]) def test_twocopies_download_content_chunksize_bytes_2_broken_chunks(self): data = random_data(self.chunk_size) content, _ = self._new_content("TWOCOPIES", data, [(0, 0), (0, 1)]) gen = content.download() self.assertRaises(UnrecoverableContent, gen.next) def test_single_download_content_1_byte_without_broken_chunks(self): self._test_download("SINGLE", 1, []) def test_single_download_chunksize_bytes_plus_1_without_broken_chunk(self): self._test_download("SINGLE", self.chunk_size * 2, [])
class BlobConverter(object): def __init__(self, conf, logger=None, **kwargs): self.conf = conf self.logger = logger or get_logger(conf) volume = conf.get('volume') if not volume: raise ConfigurationException('No volume specified for converter') self.volume = volume self.namespace, self.volume_id = check_volume(self.volume) # cache self.name_by_cid = CacheDict() self.content_id_by_name = CacheDict() # client self.container_client = ContainerClient(conf, **kwargs) self.content_factory = ContentFactory(conf, self.container_client, logger=self.logger) # stats/logs self.errors = 0 self.passes = 0 self.total_chunks_processed = 0 self.start_time = 0 self.last_reported = 0 self.report_interval = int_value(conf.get('report_interval'), 3600) # speed self.chunks_run_time = 0 self.max_chunks_per_second = int_value(conf.get('chunks_per_second'), 30) # backup self.no_backup = true_value(conf.get('no_backup', False)) self.backup_dir = conf.get('backup_dir') or tempfile.gettempdir() self.backup_name = 'backup_%s_%f' \ % (self.volume_id, time.time()) # dry run self.dry_run = true_value(conf.get('dry_run', False)) def save_xattr(self, chunk_id, xattr): if self.no_backup: return dirname = self.backup_dir + '/' + self.backup_name + '/' + chunk_id[:3] try: os.makedirs(dirname) except OSError: if not os.path.isdir(dirname): raise with open(dirname + '/' + chunk_id, 'w') as backup_fd: # same format as getfattr backup_fd.write('# file: ' + self._get_path(chunk_id) + '\n') for k, v in xattr.iteritems(): backup_fd.write('user.' + k + '="' + v + '"\n') def _save_container(self, cid, account, container): cid = cid.upper() self.name_by_cid[cid] = (account, container) return cid, account, container def _save_content(self, cid, path, version, content_id): cid = cid.upper() content_id = content_id.upper() self.content_id_by_name[(cid, path, version)] = content_id return cid, path, version, content_id def _get_path(self, chunk_id): return self.volume + '/' + chunk_id[:3] + '/' + chunk_id def cid_from_name(self, account, container): cid = cid_from_name(account, container) cid, account, container = self._save_container(cid, account, container) return cid def name_from_cid(self, cid): name = self.name_by_cid.get(cid) if name: return name properties = self.container_client.container_get_properties(cid=cid) account = properties['system']['sys.account'] container = properties['system']['sys.user.name'] cid, account, container = self._save_container(cid, account, container) return account, container def content_id_from_name(self, cid, path, version, search=False): content_id = self.content_id_by_name.get((cid, path, version)) if content_id or not search: return content_id properties = self.container_client.content_get_properties( cid=cid, path=path, version=version) content_id = properties['id'] cid, path, version, content_id = self._save_content( cid, path, version, content_id) return content_id def decode_fullpath(self, fullpath): account, container, path, version, content_id = decode_fullpath( fullpath) cid = self.cid_from_name(account, container) cid, path, version, content_id = self._save_content( cid, path, version, content_id) return account, container, cid, path, version, content_id def decode_old_fullpath(self, old_fullpath): account, container, path, version = decode_old_fullpath(old_fullpath) cid = self.cid_from_name(account, container) content_id = self.content_id_from_name(cid, path, version) return account, container, cid, path, version, content_id def encode_fullpath(self, chunk_inode, chunk_id, account, container, path, version, content_id): # check if chunk exists and has the same inode if not is_hexa(chunk_id) or len(chunk_id) != STRLEN_CHUNKID: raise ValueError('chunk ID must be hexadecimal (%s)' % STRLEN_CHUNKID) try: chunk_inode2 = os.stat(self._get_path(chunk_id)).st_ino except OSError: raise OrphanChunk('No such chunk: possible orphan chunk') if chunk_inode2 != chunk_inode: raise OrphanChunk('Not the same inode: possible orphan chunk') # check fullpath and chunk ID if isinstance(version, basestring): try: version = int(version) except ValueError: raise ValueError('version must be a number') if version <= 0: raise ValueError('version must be positive') if not is_hexa(content_id): raise ValueError('content ID must be hexadecimal') fullpath = encode_fullpath(account, container, path, version, content_id.upper()) return chunk_id.upper(), fullpath def _get_chunk_id_and_fullpath(self, chunk_inode, chunk_pos, content, chunk_id=None): content.container_id, content.account, content.container_name = \ self._save_container(content.container_id, content.account, content.container_name) content.container_id, content.path, content.version, \ content.content_id = self._save_content( content.container_id, content.path, content.version, content.content_id) chunks = content.chunks.filter(host=self.volume_id) if chunk_id: chunks = chunks.filter(id=chunk_id) chunk = chunks.filter(pos=chunk_pos).one() if chunk is None: raise OrphanChunk('Chunk not found in content:' 'possible orphan chunk') chunk_id, new_fullpath = self.encode_fullpath( chunk_inode, chunk.id, content.account, content.container_name, content.path, content.version, content.content_id) return chunk_id, new_fullpath def get_chunk_id_and_fullpath(self, chunk_inode, chunk_pos, container_id, path, version, chunk_id=None, account=None, container=None, content_id=None): if account is None or container is None: account, container = self.name_from_cid(container_id) if content_id: try: content = self.content_factory.get(container_id, content_id, account=account, container_name=container) return self._get_chunk_id_and_fullpath(chunk_inode, chunk_pos, content, chunk_id=chunk_id) except Exception as exc: self.logger.warn( 'chunk_id=%s chunk_pos=%s object=%s/%s/%s/%s/%s/%s: %s', chunk_id, chunk_pos, str(account), str(container), container_id, path, str(version), str(content_id), exc) # version must be integer try: version = str(int(version)) except Exception: version = None try: content = self.content_factory.get_by_path_and_version( container_id, path, version, account=account, container_name=container) except ContentNotFound: raise OrphanChunk('Content not found: possible orphan chunk') return self._get_chunk_id_and_fullpath(chunk_inode, chunk_pos, content, chunk_id=chunk_id) def convert_chunk(self, fd, chunk_id): meta, raw_meta = read_chunk_metadata(fd, chunk_id, check_chunk_id=False) links = meta.get('links', dict()) for chunk_id2, fullpath2 in links.iteritems(): self.decode_fullpath(fullpath2) fullpath = meta.get('full_path') if fullpath is not None: self.decode_fullpath(fullpath) if meta.get('oio_version') == OIO_VERSION: return True, meta chunk_inode = os.fstat(fd.fileno()).st_ino raw_chunk_id = None chunk_id = chunk_id.upper() chunk_pos = meta['chunk_pos'] container_id = meta['container_id'].upper() path = meta['content_path'] version = meta['content_version'] content_id = meta['content_id'].upper() new_fullpaths = dict() xattr_to_remove = list() success = True for k, v in raw_meta.iteritems(): # fetch raw chunk ID if k == XATTR_CHUNK_ID: raw_chunk_id = v.upper() # search old fullpaths if not k.startswith(XATTR_OLD_FULLPATH) \ or not is_hexa(k[4:], size=64): continue try: account2, container2, container_id2, path2, version2, \ content_id2 = self.decode_old_fullpath(v) if container_id == container_id2 and path == path2 \ and version == version2: if content_id2 is None: content_id2 = self.content_id_from_name(container_id2, path2, version2, search=True) chunk_id, new_fullpath = self.encode_fullpath( chunk_inode, chunk_id, account2, container2, path2, version2, content_id2) new_fullpaths[chunk_id] = new_fullpath else: chunk_id2, new_fullpath = self.get_chunk_id_and_fullpath( chunk_inode, chunk_pos, container_id2, path2, version2, account=account2, container=container2, content_id=content_id2) new_fullpaths[chunk_id2] = new_fullpath xattr_to_remove.append(k) except Exception as exc: success = False self.logger.warn('chunk_id=%s old_fullpath=%s: %s', chunk_id, k, exc) # old xattr if raw_chunk_id is not None: try: if raw_chunk_id != chunk_id and raw_chunk_id not in links: if raw_chunk_id not in new_fullpaths: meta2, _ = read_chunk_metadata(fd, raw_chunk_id) container_id2 = meta2['container_id'].upper() path2 = meta2['content_path'] version2 = meta2['content_version'] content_id2 = meta2['content_id'].upper() raw_chunk_id, new_fullpath = \ self.get_chunk_id_and_fullpath( chunk_inode, chunk_pos, container_id2, path2, version2, chunk_id=raw_chunk_id, content_id=content_id2) new_fullpaths[raw_chunk_id] = new_fullpath elif raw_chunk_id == chunk_id and fullpath is None: if raw_chunk_id not in new_fullpaths: raw_chunk_id, new_fullpath = \ self.get_chunk_id_and_fullpath( chunk_inode, chunk_pos, container_id, path, version, chunk_id=raw_chunk_id, content_id=content_id) new_fullpaths[raw_chunk_id] = new_fullpath except Exception as exc: success = False self.logger.warn('chunk_id=%s (old xattr): %s', raw_chunk_id, exc) self.save_xattr(chunk_id, raw_meta) if self.dry_run: self.logger.info( "[dryrun] Converting chunk %s: success=%s new_fullpaths=%s " "xattr_to_remove=%s", chunk_id, str(success), str(new_fullpaths), str(xattr_to_remove)) else: # for security, if there is an error, we don't delete old xattr modify_xattr(fd, new_fullpaths, success, xattr_to_remove) return success, None def safe_convert_chunk(self, path, fd=None, chunk_id=None): if chunk_id is None: chunk_id = path.rsplit('/', 1)[-1] if len(chunk_id) != STRLEN_CHUNKID: self.logger.warn('Not a chunk %s' % path) return for c in chunk_id: if c not in hexdigits: self.logger.warn('Not a chunk %s' % path) return success = False self.total_chunks_processed += 1 try: if fd is None: with open(path) as fd: success, _ = self.convert_chunk(fd, chunk_id) else: success, _ = self.convert_chunk(fd, chunk_id) except Exception: self.logger.exception('ERROR while conversion %s', path) if not success: self.errors += 1 else: self.logger.debug('Converted %s', path) self.passes += 1 def _fetch_chunks_from_file(self, input_file): with open(input_file, 'r') as ifile: for line in ifile: chunk_id = line.strip() if chunk_id and not chunk_id.startswith('#'): yield self._get_path(chunk_id) def paths_gen(self, input_file=None): if input_file: return self._fetch_chunks_from_file(input_file) else: return paths_gen(self.volume) def converter_pass(self, input_file=None): def report(tag, now=None): if now is None: now = time.time() total_time = now - self.start_time self.logger.info( '%(tag)s %(volume)s ' 'started=%(start_time)s ' 'passes=%(passes)d ' 'errors=%(errors)d ' 'chunks=%(nb_chunks)d %(c_rate).2f/s ' 'total_time=%(total_time).2f ' '(converter: %(success_rate).2f%%)' % { 'tag': tag, 'volume': self.volume_id, 'start_time': datetime.fromtimestamp(int(self.start_time)).isoformat(), 'passes': self.passes, 'errors': self.errors, 'nb_chunks': self.total_chunks_processed, 'c_rate': self.total_chunks_processed / total_time, 'total_time': total_time, 'success_rate': 100 * ((self.total_chunks_processed - self.errors) / float(self.total_chunks_processed)) }) self.passes = 0 self.last_reported = now self.start_time = time.time() self.errors = 0 self.passes = 0 self.backup_name = 'backup_%s_%f' % (self.volume_id, self.start_time) paths = self.paths_gen(input_file=input_file) for path in paths: self.safe_convert_chunk(path) now = time.time() if now - self.last_reported >= self.report_interval: report('RUN', now=now) self.chunks_run_time = ratelimit(self.chunks_run_time, self.max_chunks_per_second) report('DONE') return self.errors == 0
class BlobConverter(object): def __init__(self, conf, logger=None, **kwargs): self.conf = conf self.logger = logger or get_logger(conf) volume = conf.get('volume') if not volume: raise ConfigurationException('No volume specified for converter') self.volume = volume self.namespace, self.volume_id = check_volume(self.volume) # cache self.name_by_cid = CacheDict() self.content_id_by_name = CacheDict() # client self.container_client = ContainerClient(conf, **kwargs) self.content_factory = ContentFactory(conf, self.container_client, logger=self.logger) self._rdir = None # we may never need it # stats/logs self.errors = 0 self.passes = 0 self.total_chunks_processed = 0 self.start_time = 0 self.last_reported = 0 self.report_interval = int_value(conf.get('report_interval'), 3600) # speed self.chunks_run_time = 0 self.max_chunks_per_second = int_value(conf.get('chunks_per_second'), 30) # backup self.no_backup = true_value(conf.get('no_backup', False)) self.backup_dir = conf.get('backup_dir') or tempfile.gettempdir() self.backup_name = 'backup_%s_%f' \ % (self.volume_id, time.time()) # dry run self.dry_run = true_value(conf.get('dry_run', False)) @property def rdir(self): """Get an instance of `RdirClient`.""" if self._rdir is None: self._rdir = RdirClient( self.conf, pool_manager=self.container_client.pool_manager) return self._rdir def save_xattr(self, fd, chunk_id, xattr): if self.no_backup: return dirname = self.backup_dir + '/' + self.backup_name + '/' + chunk_id[:3] try: os.makedirs(dirname) except OSError: if not os.path.isdir(dirname): raise with open(dirname + '/' + chunk_id, 'w') as backup_fd: # same format as getfattr backup_fd.write('# file: ' + self._get_path(fd, chunk_id) + '\n') for k, v in xattr.items(): backup_fd.write('user.' + k + '="' + v + '"\n') def _save_container(self, cid, account, container): cid = cid.upper() self.name_by_cid[cid] = (account, container) return cid, account, container def _save_content(self, cid, path, version, content_id): cid = cid.upper() content_id = content_id.upper() self.content_id_by_name[(cid, path, version)] = content_id return cid, path, version, content_id def _get_path(self, fd, chunk_id): chunk_path = self.volume chunk_path_split = fd.name[len(self.volume):].split('/') start = 0 for chunk_part in chunk_path_split[:-1]: end = start + len(chunk_part) chunk_path += '/' + chunk_id[start:end] start = end chunk_path += '/' + chunk_path_split[-1] return chunk_path def cid_from_name(self, account, container): cid = cid_from_name(account, container) cid, account, container = self._save_container(cid, account, container) return cid def name_from_cid(self, cid): name = self.name_by_cid.get(cid) if name: return name properties = self.container_client.container_get_properties(cid=cid) account = properties['system']['sys.account'] container = properties['system']['sys.user.name'] cid, account, container = self._save_container(cid, account, container) return account, container def content_id_from_name(self, cid, path, version, search=False): content_id = self.content_id_by_name.get((cid, path, version)) if content_id or not search: return content_id properties = self.container_client.content_get_properties( cid=cid, path=path, version=version) content_id = properties['id'] cid, path, version, content_id = self._save_content( cid, path, version, content_id) return content_id def decode_fullpath(self, fullpath): # pylint: disable=unbalanced-tuple-unpacking account, container, path, version, content_id = decode_fullpath( fullpath) cid = self.cid_from_name(account, container) cid, path, version, content_id = self._save_content( cid, path, version, content_id) return account, container, cid, path, version, content_id def decode_old_fullpath(self, old_fullpath): # pylint: disable=unbalanced-tuple-unpacking try: account, container, path, version = decode_old_fullpath( old_fullpath) cid = self.cid_from_name(account, container) content_id = self.content_id_from_name(cid, path, version) except ValueError: # We never know, let's try to decode the fullpath as if it was new account, container, path, version, content_id = decode_fullpath( old_fullpath) cid = self.cid_from_name(account, container) return account, container, cid, path, version, content_id def encode_fullpath(self, fd, chunk_id, account, container, path, version, content_id): # check if chunk exists and has the same inode if not is_hexa(chunk_id) or len(chunk_id) != STRLEN_CHUNKID: raise ValueError('chunk ID must be hexadecimal (%s)' % STRLEN_CHUNKID) try: chunk_inode = os.fstat(fd.fileno()).st_ino chunk_inode2 = os.stat(self._get_path(fd, chunk_id)).st_ino if chunk_inode2 != chunk_inode: raise OrphanChunk('Not the same inode: possible orphan chunk') except OSError: raise OrphanChunk('No such chunk: possible orphan chunk') # check fullpath and chunk ID if isinstance(version, string_types): try: version = int(version) except ValueError: raise ValueError('version must be a number') if version <= 0: raise ValueError('version must be positive') if not is_hexa(content_id): raise ValueError('content ID must be hexadecimal') fullpath = encode_fullpath(account, container, path, version, content_id.upper()) return chunk_id.upper(), fullpath def _get_chunk_id_and_fullpath(self, fd, chunk_pos, content, chunk_id=None): content.container_id, content.account, content.container_name = \ self._save_container(content.container_id, content.account, content.container_name) content.container_id, content.path, content.version, \ content.content_id = self._save_content( content.container_id, content.path, content.version, content.content_id) chunks = content.chunks.filter(host=self.volume_id) if chunk_id: chunks = chunks.filter(id=chunk_id) chunk = chunks.filter(pos=chunk_pos).one() if chunk is None: raise OrphanChunk('Chunk not found in content:' 'possible orphan chunk') chunk_id, new_fullpath = self.encode_fullpath( fd, chunk.id, content.account, content.container_name, content.path, content.version, content.content_id) return chunk_id, new_fullpath def get_chunk_id_and_fullpath(self, fd, chunk_pos, container_id, path, version, chunk_id=None, account=None, container=None, content_id=None): if account is None or container is None: account, container = self.name_from_cid(container_id) if content_id: try: content = self.content_factory.get(container_id, content_id, account=account, container_name=container) return self._get_chunk_id_and_fullpath(fd, chunk_pos, content, chunk_id=chunk_id) except Exception as exc: self.logger.warn( 'chunk_id=%s chunk_pos=%s object=%s/%s/%s/%s/%s/%s: %s', chunk_id, chunk_pos, str(account), str(container), container_id, path, str(version), str(content_id), exc) # version must be integer try: version = str(int(version)) except Exception: version = None try: content = self.content_factory.get_by_path_and_version( container_id, path, version, account=account, container_name=container) except ContentNotFound: raise OrphanChunk('Content not found: possible orphan chunk') return self._get_chunk_id_and_fullpath(fd, chunk_pos, content, chunk_id=chunk_id) def convert_chunk(self, fd, chunk_id): meta, raw_meta = read_chunk_metadata(fd, chunk_id, for_conversion=True) links = meta.get('links', dict()) for chunk_id2, fullpath2 in links.items(): self.decode_fullpath(fullpath2) fullpath = meta.get('full_path') if fullpath is not None: self.decode_fullpath(fullpath) if meta.get('oio_version') == OIO_VERSION: return True, meta raw_chunk_id = None chunk_id = chunk_id.upper() chunk_pos = meta['chunk_pos'] container_id = meta['container_id'].upper() path = meta['content_path'] version = meta['content_version'] content_id = meta['content_id'].upper() new_fullpaths = dict() xattr_to_remove = list() success = True for k, v in raw_meta.items(): # fetch raw chunk ID if k == XATTR_CHUNK_ID: raw_chunk_id = v.upper() # search old fullpaths if not k.startswith(XATTR_OLD_FULLPATH) \ or not is_hexa(k[4:], size=64): continue try: account2, container2, container_id2, path2, version2, \ content_id2 = self.decode_old_fullpath(v) if meta['chunk_id'] == chunk_id \ and container_id == container_id2 \ and path == path2 \ and version == version2: if content_id2 is None: content_id2 = self.content_id_from_name(container_id2, path2, version2, search=True) chunk_id2, new_fullpath = self.encode_fullpath( fd, chunk_id, account2, container2, path2, version2, content_id2) new_fullpaths[chunk_id2] = new_fullpath else: chunk_id2, new_fullpath = self.get_chunk_id_and_fullpath( fd, chunk_pos, container_id2, path2, version2, account=account2, container=container2, content_id=content_id2) new_fullpaths[chunk_id2] = new_fullpath xattr_to_remove.append(k) except Exception as exc: success = False self.logger.warn('chunk_id=%s old_fullpath=%s: %s', chunk_id, k, exc) # old xattr if raw_chunk_id is not None: try: if raw_chunk_id != chunk_id and raw_chunk_id not in links: if raw_chunk_id not in new_fullpaths: meta2, _ = read_chunk_metadata(fd, raw_chunk_id) container_id2 = meta2['container_id'].upper() path2 = meta2['content_path'] version2 = meta2['content_version'] content_id2 = meta2['content_id'].upper() raw_chunk_id2, new_fullpath = \ self.get_chunk_id_and_fullpath( fd, chunk_pos, container_id2, path2, version2, chunk_id=raw_chunk_id, content_id=content_id2) new_fullpaths[raw_chunk_id2] = new_fullpath elif raw_chunk_id == chunk_id and fullpath is None: if raw_chunk_id not in new_fullpaths: raw_chunk_id2, new_fullpath = \ self.get_chunk_id_and_fullpath( fd, chunk_pos, container_id, path, version, chunk_id=raw_chunk_id, content_id=content_id) new_fullpaths[raw_chunk_id2] = new_fullpath except Exception as exc: success = False self.logger.warn('chunk_id=%s (old xattr): %s', raw_chunk_id, exc) self.save_xattr(fd, chunk_id, raw_meta) if self.dry_run: self.logger.info( "[dryrun] Converting chunk %s: success=%s new_fullpaths=%s " "xattr_to_remove=%s", chunk_id, str(success), str(new_fullpaths), str(xattr_to_remove)) else: # for security, if there is an error, we don't delete old xattr set_fullpath_xattr(fd, new_fullpaths, success, xattr_to_remove) return success, None def is_fullpath_error(self, err): if (isinstance(err, MissingAttribute) and (err.attribute.startswith(CHUNK_XATTR_CONTENT_FULLPATH_PREFIX) or err.attribute == CHUNK_XATTR_KEYS['content_path'] or err.attribute.startswith(XATTR_OLD_FULLPATH))): return True elif isinstance(err, FaultyChunk): return any(self.is_fullpath_error(x) for x in err.args) return False def safe_convert_chunk(self, path, chunk_id=None): if chunk_id is None: chunk_id = path.rsplit('/', 1)[-1] if len(chunk_id) != STRLEN_CHUNKID: self.logger.warn('Not a chunk %s' % path) return for char in chunk_id: if char not in hexdigits: self.logger.warn('Not a chunk %s' % path) return success = False self.total_chunks_processed += 1 try: with open(path) as fd: success, _ = self.convert_chunk(fd, chunk_id) except (FaultyChunk, MissingAttribute) as err: if self.is_fullpath_error(err): self.logger.warn( "Cannot convert %s: %s, will try to recover 'fullpath'", path, err) try: success = self.recover_chunk_fullpath(path, chunk_id) except Exception as err2: self.logger.error('Could not recover fullpath: %s', err2) else: self.logger.exception('ERROR while converting %s', path) except Exception: self.logger.exception('ERROR while converting %s', path) if not success: self.errors += 1 else: self.logger.debug('Converted %s', path) self.passes += 1 def recover_chunk_fullpath(self, path, chunk_id=None): if not chunk_id: chunk_id = path.rsplit('/', 1)[-1] # 1. Fetch chunk list from rdir (could be cached). # Unfortunately we cannot seek for a chunk ID. entries = [ x for x in self.rdir.chunk_fetch(self.volume_id, limit=-1) if x[2] == chunk_id ] if not entries: raise KeyError('Chunk %s not found in rdir' % chunk_id) elif len(entries) > 1: self.logger.info('Chunk %s appears in %d objects', chunk_id, len(entries)) # 2. Find content and container IDs cid, content_id = entries[0][0:2] # 3a. Call ContainerClient.content_locate() # with the container ID and content ID try: meta, chunks = self.container_client.content_locate( cid=cid, content=content_id) except NotFound as err: raise OrphanChunk('Cannot check %s is valid: %s' % (path, err)) # 3b. Resolve container ID into account and container names. # FIXME(FVE): get account and container names from meta1 cmeta = self.container_client.container_get_properties(cid=cid) aname = cmeta['system']['sys.account'] cname = cmeta['system']['sys.user.name'] fullpath = encode_fullpath(aname, cname, meta['name'], meta['version'], content_id) # 4. Check if the chunk actually belongs to the object chunk_url = 'http://%s/%s' % (self.volume_id, chunk_id) if chunk_url not in [x['url'] for x in chunks]: raise OrphanChunk('Chunk %s not found in object %s' % (chunk_url, fullpath)) # 5. Regenerate the fullpath with open(path, 'w') as fd: set_fullpath_xattr(fd, {chunk_id: fullpath}) return True def _fetch_chunks_from_file(self, input_file): with open(input_file, 'r') as ifile: for line in ifile: chunk_path = line.strip() if chunk_path and not chunk_path.startswith('#'): yield self.volume + '/' + chunk_path def paths_gen(self, input_file=None): if input_file: return self._fetch_chunks_from_file(input_file) else: return paths_gen(self.volume) def converter_pass(self, input_file=None): def report(tag, now=None): if now is None: now = time.time() total_time = now - self.start_time self.logger.info( '%(tag)s %(volume)s ' 'started=%(start_time)s ' 'passes=%(passes)d ' 'errors=%(errors)d ' 'chunks=%(nb_chunks)d %(c_rate).2f/s ' 'total_time=%(total_time).2f ' '(converter: %(success_rate).2f%%)' % { 'tag': tag, 'volume': self.volume_id, 'start_time': datetime.fromtimestamp(int(self.start_time)).isoformat(), 'passes': self.passes, 'errors': self.errors, 'nb_chunks': self.total_chunks_processed, 'c_rate': self.total_chunks_processed / total_time, 'total_time': total_time, 'success_rate': 100 * ((self.total_chunks_processed - self.errors) / (float(self.total_chunks_processed) or 1.0)) }) self.passes = 0 self.last_reported = now self.start_time = time.time() self.errors = 0 self.passes = 0 self.backup_name = 'backup_%s_%f' % (self.volume_id, self.start_time) paths = self.paths_gen(input_file=input_file) for path in paths: self.safe_convert_chunk(path) now = time.time() if now - self.last_reported >= self.report_interval: report('RUN', now=now) self.chunks_run_time = ratelimit(self.chunks_run_time, self.max_chunks_per_second) report('DONE') return self.errors == 0
class BlobRebuilderWorker(object): def __init__(self, conf, logger, volume, input_file=None, try_chunk_delete=False, beanstalkd_addr=None): self.conf = conf self.logger = logger or get_logger(conf) self.volume = volume self.run_time = 0 self.passes = 0 self.errors = 0 self.last_reported = 0 self.chunks_run_time = 0 self.bytes_running_time = 0 self.bytes_processed = 0 self.total_bytes_processed = 0 self.total_chunks_processed = 0 self.dry_run = true_value(conf.get('dry_run', False)) self.report_interval = int_value(conf.get('report_interval'), 3600) self.max_chunks_per_second = int_value(conf.get('chunks_per_second'), 30) self.max_bytes_per_second = int_value(conf.get('bytes_per_second'), 10000000) self.rdir_fetch_limit = int_value(conf.get('rdir_fetch_limit'), 100) self.allow_same_rawx = true_value(conf.get('allow_same_rawx')) self.input_file = input_file self.rdir_client = RdirClient(conf, logger=self.logger) self.content_factory = ContentFactory(conf) self.try_chunk_delete = try_chunk_delete self.beanstalkd_addr = beanstalkd_addr self.beanstalkd_tube = conf.get('beanstalkd_tube', 'rebuild') self.beanstalk = None def _fetch_chunks_from_event(self, job_id, data): env = json.loads(data) for chunk_pos in env['data']['missing_chunks']: yield [ env['url']['id'], env['url']['content'], str(chunk_pos), None ] def _connect_to_beanstalk(self): self.beanstalk = Beanstalk.from_url(self.beanstalkd_addr) self.beanstalk.use(self.beanstalkd_tube) self.beanstalk.watch(self.beanstalkd_tube) def _handle_beanstalk_event(self, conn_error): try: job_id, data = self.beanstalk.reserve() if conn_error: self.logger.warn("beanstalk reconnected") except ConnectionError: if not conn_error: self.logger.warn("beanstalk connection error") raise try: for chunk in self._fetch_chunks_from_event(job_id, data): yield chunk self.beanstalk.delete(job_id) except Exception: self.logger.exception("handling event %s (bury)", job_id) self.beanstalk.bury(job_id) def _fetch_chunks_from_beanstalk(self): conn_error = False while 1: try: self._connect_to_beanstalk() for chunk in self._handle_beanstalk_event(conn_error): conn_error = False yield chunk except ConnectionError: conn_error = True time.sleep(1.0) def _fetch_chunks_from_file(self): with open(self.input_file, 'r') as ifile: for line in ifile: stripped = line.strip() if stripped and not stripped.startswith('#'): yield stripped.split('|', 3)[:3] + [None] def _fetch_chunks(self): if self.input_file: return self._fetch_chunks_from_file() elif self.beanstalkd_addr: return self._fetch_chunks_from_beanstalk() else: return self.rdir_client.chunk_fetch(self.volume, limit=self.rdir_fetch_limit, rebuild=True) def rebuilder_pass_with_lock(self): self.rdir_client.admin_lock(self.volume, "rebuilder on %s" % gethostname()) try: self.rebuilder_pass() finally: self.rdir_client.admin_unlock(self.volume) def rebuilder_pass(self): start_time = report_time = time.time() rebuilder_time = 0 chunks = self._fetch_chunks() for cid, content_id, chunk_id_or_pos, _ in chunks: loop_time = time.time() if self.dry_run: self.dryrun_chunk_rebuild(cid, content_id, chunk_id_or_pos) else: self.safe_chunk_rebuild(cid, content_id, chunk_id_or_pos) self.chunks_run_time = ratelimit(self.chunks_run_time, self.max_chunks_per_second) self.total_chunks_processed += 1 now = time.time() if now - self.last_reported >= self.report_interval: self.logger.info( 'RUN %(volume)s ' 'started=%(start_time)s ' 'passes=%(passes)d ' 'errors=%(errors)d ' 'chunks=%(nb_chunks)d %(c_rate).2f/s ' 'bytes=%(nb_bytes)d %(b_rate).2fB/s ' 'elapsed=%(total).2f ' '(rebuilder: %(success_rate).2f%%)' % { 'volume': self.volume, 'start_time': datetime.fromtimestamp(int(report_time)).isoformat(), 'passes': self.passes, 'errors': self.errors, 'nb_chunks': self.total_chunks_processed, 'nb_bytes': self.total_bytes_processed, 'c_rate': self.passes / (now - report_time), 'b_rate': self.bytes_processed / (now - report_time), 'total': (now - start_time), 'rebuilder_time': rebuilder_time, 'success_rate': 100 * ((self.total_chunks_processed - self.errors) / float(self.total_chunks_processed)) }) report_time = now self.passes = 0 self.bytes_processed = 0 self.last_reported = now rebuilder_time += (now - loop_time) end_time = time.time() elapsed = (end_time - start_time) or 0.000001 self.logger.info( 'DONE %(volume)s ' 'started=%(start_time)s ' 'ended=%(end_time)s ' 'passes=%(passes)d ' 'elapsed=%(elapsed).02f ' 'errors=%(errors)d ' 'chunks=%(nb_chunks)d %(c_rate).2f/s ' 'bytes=%(nb_bytes)d %(b_rate).2fB/s ' 'elapsed=%(rebuilder_time).2f ' '(rebuilder: %(success_rate).2f%%)' % { 'volume': self.volume, 'start_time': datetime.fromtimestamp(int(start_time)).isoformat(), 'end_time': datetime.fromtimestamp(int(end_time)).isoformat(), 'passes': self.passes, 'elapsed': elapsed, 'errors': self.errors, 'nb_chunks': self.total_chunks_processed, 'nb_bytes': self.total_bytes_processed, 'c_rate': self.total_chunks_processed / elapsed, 'b_rate': self.total_bytes_processed / elapsed, 'rebuilder_time': rebuilder_time, 'success_rate': 100 * ((self.total_chunks_processed - self.errors) / float(self.total_chunks_processed or 1)) }) def dryrun_chunk_rebuild(self, container_id, content_id, chunk_id_or_pos): self.logger.info( "[dryrun] Rebuilding " "container %s, content %s, chunk %s", container_id, content_id, chunk_id_or_pos) self.passes += 1 def safe_chunk_rebuild(self, container_id, content_id, chunk_id_or_pos): try: self.chunk_rebuild(container_id, content_id, chunk_id_or_pos) except Exception as e: self.errors += 1 self.logger.error('ERROR while rebuilding chunk %s|%s|%s: %s', container_id, content_id, chunk_id_or_pos, e) self.passes += 1 def chunk_rebuild(self, container_id, content_id, chunk_id_or_pos): self.logger.info('Rebuilding (container %s, content %s, chunk %s)', container_id, content_id, chunk_id_or_pos) try: content = self.content_factory.get(container_id, content_id) except ContentNotFound: raise OrphanChunk('Content not found: possible orphan chunk') chunk_size = 0 chunk_pos = None if len(chunk_id_or_pos) < 32: chunk_pos = chunk_id_or_pos chunk_id = None metapos = int(chunk_pos.split('.', 1)[0]) chunk_size = content.chunks.filter(metapos=metapos).all()[0].size else: if '/' in chunk_id_or_pos: chunk_id = chunk_id_or_pos.rsplit('/', 1)[-1] else: chunk_id = chunk_id_or_pos chunk = content.chunks.filter(id=chunk_id).one() if chunk is None: raise OrphanChunk(("Chunk not found in content:" "possible orphan chunk")) elif self.volume and chunk.host != self.volume: raise ValueError("Chunk does not belong to this volume") chunk_size = chunk.size content.rebuild_chunk(chunk_id, allow_same_rawx=self.allow_same_rawx, chunk_pos=chunk_pos) if self.try_chunk_delete: try: content.blob_client.chunk_delete(chunk.url) self.logger.info("Chunk %s deleted", chunk.url) except NotFound as exc: self.logger.debug("Chunk %s: %s", chunk.url, exc) # This call does not raise exception if chunk is not referenced if chunk_id is not None: self.rdir_client.chunk_delete(chunk.host, container_id, content_id, chunk_id) self.bytes_processed += chunk_size self.total_bytes_processed += chunk_size
class TestFilters(BaseTestCase): def setUp(self): super(TestFilters, self).setUp() self.account = self.conf['account'] self.namespace = self.conf['namespace'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {'namespace': self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_name = 'TestFilter%f' % time.time() self.container_client = ContainerClient(self.gridconf) self.container_client.container_create(account=self.account, reference=self.container_name, admin_mode=True) self.container_id = cid_from_name(self.account, self.container_name).upper() self.stgpol = "SINGLE" def _prepare_content(self, path, content_id, admin_mode): self.container_client.content_prepare(account=self.account, reference=self.container_name, path=path, content_id=content_id, size=1, stgpol=self.stgpol, autocreate=True, admin_mode=admin_mode) def _new_content(self, data, path, admin_mode): old_content = self.content_factory.new(self.container_id, path, len(data), self.stgpol, admin_mode=admin_mode) old_content.create(BytesIO(data), admin_mode=admin_mode) return self.content_factory.get(self.container_id, old_content.content_id) def test_slave_and_admin(self): if not os.getenv("SLAVE"): self.skipTest("must be in slave mode") data = random_data(10) path = 'test_slave' try: self._new_content(data, path, False) except ClientException as exc: self.assertIn('NS slave!', text_type(exc)) else: self.fail("New content: no exception") content = self._new_content(data, path, True) content.delete(admin_mode=True) def test_worm_and_admin(self): if not os.getenv("WORM"): self.skipTest("must be in worm mode") data = random_data(10) path = 'test_worm' content = self._new_content(data, path, True) # Prepare without admin mode: # Since the 'prepare' step is done in the proxy, there is no check # on the pre-existence of the content. The subsequent prepare MUST # now work despite the presence of the content. self._prepare_content(path, None, False) self._prepare_content(path, content.content_id, False) self._prepare_content('test_worm_prepare', content.content_id, False) self._prepare_content(path, random_id(32), False) # Overwrite without admin mode data2 = random_data(11) self.assertRaises(Conflict, self._new_content, data2, path, False) # Prepare with admin mode self._prepare_content(path, None, True) self._prepare_content(path, content.content_id, True) self._prepare_content('test_worm_prepare', content.content_id, True) self._prepare_content(path, random_id(32), True) # Overwrite with admin mode content = self._new_content(data2, path, True) # Delete without admin mode try: content.delete() except ClientException as exc: self.assertIn('worm', str(exc)) else: self.fail("Delete without admin mode: no exception") downloaded_data = ''.join(content.fetch()) self.assertEqual(downloaded_data, data2) # Delete with admin mode content.delete(admin_mode=True)
class RawxDecommissionTask(XcuteTask): def __init__(self, conf, job_params, logger=None): super(RawxDecommissionTask, self).__init__(conf, job_params, logger=logger) self.service_id = job_params['service_id'] self.rawx_timeout = job_params['rawx_timeout'] self.min_chunk_size = job_params['min_chunk_size'] self.max_chunk_size = job_params['max_chunk_size'] self.excluded_rawx = job_params['excluded_rawx'] self.blob_client = BlobClient(self.conf, logger=self.logger) self.content_factory = ContentFactory(self.conf) self.conscience_client = ConscienceClient(self.conf, logger=self.logger) self.fake_excluded_chunks = self._generate_fake_excluded_chunks( self.excluded_rawx) def _generate_fake_excluded_chunks(self, excluded_rawx): fake_excluded_chunks = list() fake_chunk_id = '0' * 64 for service_id in excluded_rawx: service_addr = self.conscience_client.resolve_service_id( 'rawx', service_id) chunk = dict() chunk['hash'] = '0000000000000000000000000000000000' chunk['pos'] = '0' chunk['size'] = 1 chunk['score'] = 1 chunk['url'] = 'http://{}/{}'.format(service_id, fake_chunk_id) chunk['real_url'] = 'http://{}/{}'.format(service_addr, fake_chunk_id) fake_excluded_chunks.append(chunk) return fake_excluded_chunks def process(self, task_id, task_payload, reqid=None): container_id = task_payload['container_id'] content_id = task_payload['content_id'] chunk_id = task_payload['chunk_id'] chunk_url = 'http://{}/{}'.format(self.service_id, chunk_id) try: meta = self.blob_client.chunk_head(chunk_url, timeout=self.rawx_timeout, reqid=reqid) except NotFound: # The chunk is still present in the rdir, # but the chunk no longer exists in the rawx. # We ignore it because there is nothing to move. return {'skipped_chunks_no_longer_exist': 1} if container_id != meta['container_id']: raise ValueError('Mismatch container ID: %s != %s', container_id, meta['container_id']) if content_id != meta['content_id']: raise ValueError('Mismatch content ID: %s != %s', content_id, meta['content_id']) chunk_size = int(meta['chunk_size']) # Maybe skip the chunk because it doesn't match the size constaint if chunk_size < self.min_chunk_size: self.logger.debug('[reqid=%s] SKIP %s too small', reqid, chunk_url) return {'skipped_chunks_too_small': 1} if self.max_chunk_size > 0 and chunk_size > self.max_chunk_size: self.logger.debug('[reqid=%s] SKIP %s too big', reqid, chunk_url) return {'skipped_chunks_too_big': 1} # Start moving the chunk try: content = self.content_factory.get(container_id, content_id, reqid=reqid) content.move_chunk(chunk_id, fake_excluded_chunks=self.fake_excluded_chunks, reqid=reqid) except (ContentNotFound, OrphanChunk): return {'orphan_chunks': 1} return {'moved_chunks': 1, 'moved_bytes': chunk_size}
class TestFilters(BaseTestCase): def setUp(self): super(TestFilters, self).setUp() self.account = self.conf['account'] self.namespace = self.conf['namespace'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {'namespace': self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_name = 'TestFilter%f' % time.time() self.container_client = ContainerClient(self.gridconf, admin_mode=True) self.container_client.container_create(account=self.account, reference=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() self.stgpol = "SINGLE" def _new_content(self, data, path, admin_mode=True): old_content = self.content_factory.new(self.container_id, path, len(data), self.stgpol, admin_mode=admin_mode) old_content.create(BytesIO(data), admin_mode=admin_mode) return self.content_factory.get(self.container_id, old_content.content_id) def test_slave_and_admin(self): if not os.getenv("SLAVE"): self.skipTest("must be in slave mode") data = random_data(10) path = 'test_slave' try: self._new_content(data, path) except ClientException as exc: print(str(exc)) self.assertTrue(str(exc).find('NS slave!') != -1) content = self._new_content(data, path) content.delete(admin_mode=True) def test_worm_and_admin(self): if not os.getenv("WORM"): self.skipTest("must be in worm mode") data = random_data(10) path = 'test_worm' content = self._new_content(data, path) # Overwrite without admin mode data2 = random_data(11) try: content = self._new_content(data2, path, admin_mode=False) except ClientException as exc: self.assertIsInstance(exc, Conflict) # Overwrite with admin mode content = self._new_content(data2, path) # Delete without admin mode try: content.delete() except ClientException as exc: self.assertTrue(str(exc).lower().find('worm') != -1) downloaded_data = ''.join(content.fetch()) self.assertEqual(downloaded_data, data2) # Delete with admin mode content.delete(admin_mode=True)
class TestRainContent(BaseTestCase): def setUp(self): super(TestRainContent, self).setUp() if len(self.conf['rawx']) < 12: self.skipTest("Not enough rawx. " "Rain tests needs more than 12 rawx to run") self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient() self.container_name = "TestRainContent%f" % time.time() self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() def tearDown(self): super(TestRainContent, self).tearDown() def _test_upload(self, data_size): data = random_data(data_size) content = self.content_factory.new(self.container_id, "titi", len(data), "RAIN") k = 6 m = 2 self.assertEqual(type(content), RainContent) content.upload(StringIO.StringIO(data)) meta, chunks = self.container_client.content_show( cid=self.container_id, content=content.content_id) chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], "RAIN") self.assertEqual(meta['name'], "titi") metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) if metachunk_nb == 0: metachunk_nb = 1 # special case for empty content nb_chunks_min = metachunk_nb * (k + m) - (k - 1) nb_chunks_max = metachunk_nb * (k + m) self.assertEquals(len(chunks) >= nb_chunks_min, True) self.assertEquals(len(chunks) <= nb_chunks_max, True) for metapos in range(metachunk_nb): chunks_at_pos = content.chunks.filter(metapos=metapos) data_chunks_at_pos = chunks_at_pos.filter(is_parity=False) parity_chunks_at_pos = chunks_at_pos.filter(is_parity=True) if metapos < metachunk_nb - 1: self.assertEqual(len(data_chunks_at_pos), k) else: self.assertEquals(len(data_chunks_at_pos) >= 1, True) self.assertEquals(len(data_chunks_at_pos) <= k, True) self.assertEqual(len(parity_chunks_at_pos), m) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(md5_stream(stream), chunk.hash) self.assertEqual(meta['content_size'], str(len(data))) self.assertEqual(meta['content_path'], "titi") self.assertEqual(meta['content_cid'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], chunk.pos) self.assertEqual(meta['chunk_hash'], chunk.hash) data_begin = metapos * self.chunk_size data_end = metapos * self.chunk_size + self.chunk_size target_metachunk_hash = md5_data(data[data_begin:data_end]) metachunk_hash = hashlib.md5() for chunk in data_chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) for d in stream: metachunk_hash.update(d) self.assertEqual(metachunk_hash.hexdigest().upper(), target_metachunk_hash) def test_upload_0_byte(self): self._test_upload(0) def test_upload_1_byte(self): self._test_upload(1) def test_upload_chunksize_bytes(self): self._test_upload(self.chunk_size) def test_upload_chunksize_plus_1_bytes(self): self._test_upload(self.chunk_size + 1) def test_chunks_cleanup_when_upload_failed(self): data = random_data(2 * self.chunk_size) content = self.content_factory.new(self.container_id, "titi", len(data), "RAIN") self.assertEqual(type(content), RainContent) # set bad url for position 1 for chunk in content.chunks.filter(pos="1.p0"): chunk.url = "http://127.0.0.1:9/DEADBEEF" self.assertRaises(Exception, content.upload, StringIO.StringIO(data)) for chunk in content.chunks.exclude(pos="1.p0"): self.assertRaises(NotFound, self.blob_client.chunk_head, chunk.url) def _test_rebuild(self, data_size, broken_pos_list): data = os.urandom(data_size) old_content = self.content_factory.new(self.container_id, "titi", len(data), "RAIN") self.assertEqual(type(old_content), RainContent) old_content.upload(StringIO.StringIO(data)) # get the new structure of the uploaded content uploaded_content = self.content_factory.get(self.container_id, old_content.content_id) old_info = {} for pos in broken_pos_list: old_info[pos] = {} c = uploaded_content.chunks.filter(pos=pos)[0] old_info[pos]["url"] = c.url old_info[pos]["id"] = c.id old_info[pos]["hash"] = c.hash chunk_id_to_rebuild = c.id meta, stream = self.blob_client.chunk_get(c.url) old_info[pos]["dl_meta"] = meta old_info[pos]["dl_hash"] = md5_stream(stream) # delete the chunk self.blob_client.chunk_delete(c.url) # rebuild the broken chunks uploaded_content.rebuild_chunk(chunk_id_to_rebuild) # get the new structure of the content rebuilt_content = self.content_factory.get(self.container_id, uploaded_content.content_id) self.assertEqual(type(rebuilt_content), RainContent) for pos in broken_pos_list: c = rebuilt_content.chunks.filter(pos=pos)[0] rebuilt_meta, rebuilt_stream = self.blob_client.chunk_get(c.url) self.assertEqual(rebuilt_meta["chunk_id"], c.id) self.assertEqual(md5_stream(rebuilt_stream), old_info[pos]["dl_hash"]) self.assertEqual(c.hash, old_info[pos]["hash"]) self.assertThat(c.url, NotEquals(old_info[pos]["url"])) del old_info[pos]["dl_meta"]["chunk_id"] del rebuilt_meta["chunk_id"] self.assertEqual(rebuilt_meta, old_info[pos]["dl_meta"]) def test_content_0_byte_rebuild_pos_0_0(self): self._test_rebuild(0, ["0.0"]) def test_content_0_byte_rebuild_pos_0_0_and_0_p0(self): self._test_rebuild(0, ["0.0", "0.p0"]) def test_content_1_byte_rebuild_pos_0_0(self): self._test_rebuild(1, ["0.0"]) def test_content_1_byte_rebuild_pos_0_p0(self): self._test_rebuild(1, ["0.p0"]) def test_content_1_byte_rebuild_pos_0_0_and_0_p0(self): self._test_rebuild(1, ["0.0", "0.p0"]) def test_content_chunksize_bytes_rebuild_pos_0_0(self): self._test_rebuild(self.conf["chunk_size"], ["0.0"]) def test_content_chunksize_bytes_rebuild_pos_0_0_and_0_1(self): self._test_rebuild(self.conf["chunk_size"], ["0.0", "0.1"]) def test_content_chunksize_bytes_rebuild_pos_0_0_and_0_p0(self): self._test_rebuild(self.conf["chunk_size"], ["0.0", "0.p0"]) def test_content_chunksize_bytes_rebuild_pos_0_p0_and_0_p1(self): self._test_rebuild(self.conf["chunk_size"], ["0.p0", "0.p1"]) def test_content_chunksize_bytes_rebuild_more_than_k_chunk(self): self.assertRaises(UnrecoverableContent, self._test_rebuild, self.conf["chunk_size"], ["0.0", "0.1", "0.2"]) def _new_content(self, data, broken_pos_list=[]): old_content = self.content_factory.new(self.container_id, "titi", len(data), "RAIN") self.assertEqual(type(old_content), RainContent) old_content.upload(StringIO.StringIO(data)) for pos in broken_pos_list: c = old_content.chunks.filter(pos=pos)[0] self.blob_client.chunk_delete(c.url) # get the new structure of the uploaded content return self.content_factory.get(self.container_id, old_content.content_id) def test_orphan_chunk(self): content = self._new_content(random_data(10)) self.assertRaises(OrphanChunk, content.rebuild_chunk, "uNkNoWnId") def test_rebuild_on_the_fly(self): data = random_data(self.conf["chunk_size"]) content = self._new_content(data, ["0.0", "0.p0"]) stream = content.rebuild_metachunk("0", on_the_fly=True) dl_data = "".join(stream) self.assertEqual(dl_data, data) del_chunk_0_0 = content.chunks.filter(pos="0.0")[0] del_chunk_0_p0 = content.chunks.filter(pos="0.p0")[0] self.assertRaises(NotFound, self.blob_client.chunk_get, del_chunk_0_0.url) self.assertRaises(NotFound, self.blob_client.chunk_get, del_chunk_0_p0.url) def _test_download(self, data_size, broken_pos_list): data = random_data(data_size) content = self._new_content(data, broken_pos_list) downloaded_data = "".join(content.download()) self.assertEqual(downloaded_data, data) for pos in broken_pos_list: c = content.chunks.filter(pos=pos)[0] self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url) def test_download_content_0_byte_without_broken_chunks(self): self._test_download(0, []) def test_download_content_1_byte_without_broken_chunks(self): self._test_download(1, []) def test_download_content_chunksize_bytes_without_broken_chunks(self): self._test_download(self.conf["chunk_size"], []) def test_download_content_chunksize_plus_1_without_broken_chunks(self): self._test_download(self.conf["chunk_size"] + 1, []) def test_download_content_0_byte_with_broken_0_0_and_0_p0(self): self._test_download(0, ["0.0", "0.p0"]) def test_download_content_1_byte_with_broken_0_0_and_0_p0(self): self._test_download(1, ["0.0", "0.p0"]) def test_download_content_2xchunksize_with_broken_0_2_and_1_0(self): self._test_download(2 * self.conf["chunk_size"], ["0.2", "1.0"]) def test_download_content_chunksize_bytes_with_3_broken_chunks(self): data = random_data(self.conf["chunk_size"]) content = self._new_content(data, ["0.0", "0.1", "0.2"]) gen = content.download() self.assertRaises(UnrecoverableContent, gen.next) def test_download_interrupt_close(self): data = random_data(self.conf["chunk_size"]) content = self._new_content(data, ["0.p0"]) download_iter = content.download() self.assertEqual(download_iter.next(), data[0:READ_CHUNK_SIZE - 1]) download_iter.close()
class TestRainContent(BaseTestCase): def setUp(self): super(TestRainContent, self).setUp() if len(self.conf['rawx']) < 12: self.skipTest("Not enough rawx. " "Rain tests needs more than 12 rawx to run") self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient() self.container_name = "TestRainContent%f" % time.time() self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() def tearDown(self): super(TestRainContent, self).tearDown() def _test_upload(self, data_size): data = random_data(data_size) content = self.content_factory.new(self.container_id, "titi", len(data), "RAIN") k = 6 m = 2 self.assertEqual(type(content), RainContent) content.upload(StringIO.StringIO(data)) meta, chunks = self.container_client.content_show( cid=self.container_id, content=content.content_id) chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], "RAIN") self.assertEqual(meta['name'], "titi") metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) if metachunk_nb == 0: metachunk_nb = 1 # special case for empty content nb_chunks_min = metachunk_nb * (1 + m) nb_chunks_max = metachunk_nb * (k + m) self.assertGreaterEqual(len(chunks), nb_chunks_min) self.assertLessEqual(len(chunks), nb_chunks_max) for metapos in range(metachunk_nb): chunks_at_pos = content.chunks.filter(metapos=metapos) data_chunks_at_pos = chunks_at_pos.filter(is_parity=False) parity_chunks_at_pos = chunks_at_pos.filter(is_parity=True) self.assertEquals(len(data_chunks_at_pos) >= 1, True) self.assertEquals(len(data_chunks_at_pos) <= k, True) self.assertEqual(len(parity_chunks_at_pos), m) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(md5_stream(stream), chunk.hash) self.assertEqual(meta['content_size'], str(len(data))) self.assertEqual(meta['content_path'], "titi") self.assertEqual(meta['content_cid'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], chunk.pos) self.assertEqual(meta['chunk_hash'], chunk.hash) data_begin = metapos * self.chunk_size data_end = metapos * self.chunk_size + self.chunk_size target_metachunk_hash = md5_data(data[data_begin:data_end]) metachunk_hash = hashlib.md5() for chunk in data_chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) for d in stream: metachunk_hash.update(d) self.assertEqual(metachunk_hash.hexdigest().upper(), target_metachunk_hash) def test_upload_0_byte(self): self._test_upload(0) def test_upload_1_byte(self): self._test_upload(1) def test_upload_chunksize_bytes(self): self._test_upload(self.chunk_size) def test_upload_chunksize_plus_1_bytes(self): self._test_upload(self.chunk_size + 1) def test_chunks_cleanup_when_upload_failed(self): data = random_data(2 * self.chunk_size) content = self.content_factory.new(self.container_id, "titi", len(data), "RAIN") self.assertEqual(type(content), RainContent) # set bad url for position 1 for chunk in content.chunks.filter(pos="1.p0"): chunk.url = "http://127.0.0.1:9/DEADBEEF" self.assertRaises(Exception, content.upload, StringIO.StringIO(data)) for chunk in content.chunks.exclude(pos="1.p0"): self.assertRaises(NotFound, self.blob_client.chunk_head, chunk.url) def _test_rebuild(self, data_size, broken_pos_list): data = os.urandom(data_size) old_content = self.content_factory.new(self.container_id, "titi", len(data), "RAIN") self.assertEqual(type(old_content), RainContent) old_content.upload(StringIO.StringIO(data)) # get the new structure of the uploaded content uploaded_content = self.content_factory.get(self.container_id, old_content.content_id) old_info = {} for pos in broken_pos_list: old_info[pos] = {} c = uploaded_content.chunks.filter(pos=pos)[0] old_info[pos]["url"] = c.url old_info[pos]["id"] = c.id old_info[pos]["hash"] = c.hash chunk_id_to_rebuild = c.id meta, stream = self.blob_client.chunk_get(c.url) old_info[pos]["dl_meta"] = meta old_info[pos]["dl_hash"] = md5_stream(stream) # delete the chunk self.blob_client.chunk_delete(c.url) # rebuild the broken chunks uploaded_content.rebuild_chunk(chunk_id_to_rebuild) # get the new structure of the content rebuilt_content = self.content_factory.get(self.container_id, uploaded_content.content_id) self.assertEqual(type(rebuilt_content), RainContent) for pos in broken_pos_list: c = rebuilt_content.chunks.filter(pos=pos)[0] rebuilt_meta, rebuilt_stream = self.blob_client.chunk_get(c.url) self.assertEqual(rebuilt_meta["chunk_id"], c.id) self.assertEqual(md5_stream(rebuilt_stream), old_info[pos]["dl_hash"]) self.assertEqual(c.hash, old_info[pos]["hash"]) self.assertThat(c.url, NotEquals(old_info[pos]["url"])) del old_info[pos]["dl_meta"]["chunk_id"] del rebuilt_meta["chunk_id"] self.assertEqual(rebuilt_meta, old_info[pos]["dl_meta"]) def test_content_0_byte_rebuild_pos_0_0(self): self._test_rebuild(0, ["0.0"]) def test_content_0_byte_rebuild_pos_0_0_and_0_p0(self): self._test_rebuild(0, ["0.0", "0.p0"]) def test_content_1_byte_rebuild_pos_0_0(self): self._test_rebuild(1, ["0.0"]) def test_content_1_byte_rebuild_pos_0_p0(self): self._test_rebuild(1, ["0.p0"]) def test_content_1_byte_rebuild_pos_0_0_and_0_p0(self): self._test_rebuild(1, ["0.0", "0.p0"]) def test_content_chunksize_bytes_rebuild_pos_0_0(self): self._test_rebuild(self.conf["chunk_size"], ["0.0"]) def test_content_chunksize_bytes_rebuild_pos_0_0_and_0_1(self): self._test_rebuild(self.conf["chunk_size"], ["0.0", "0.1"]) def test_content_chunksize_bytes_rebuild_pos_0_0_and_0_p0(self): self._test_rebuild(self.conf["chunk_size"], ["0.0", "0.p0"]) def test_content_chunksize_bytes_rebuild_pos_0_p0_and_0_p1(self): self._test_rebuild(self.conf["chunk_size"], ["0.p0", "0.p1"]) def test_content_chunksize_bytes_rebuild_more_than_k_chunk(self): self.assertRaises(UnrecoverableContent, self._test_rebuild, self.conf["chunk_size"], ["0.0", "0.1", "0.2"]) def _new_content(self, data, broken_pos_list=[]): old_content = self.content_factory.new(self.container_id, "titi", len(data), "RAIN") self.assertEqual(type(old_content), RainContent) old_content.upload(StringIO.StringIO(data)) for pos in broken_pos_list: c = old_content.chunks.filter(pos=pos)[0] self.blob_client.chunk_delete(c.url) # get the new structure of the uploaded content return self.content_factory.get(self.container_id, old_content.content_id) def test_orphan_chunk(self): content = self._new_content(random_data(10)) self.assertRaises(OrphanChunk, content.rebuild_chunk, "uNkNoWnId") def test_rebuild_on_the_fly(self): data = random_data(self.conf["chunk_size"]) content = self._new_content(data, ["0.0", "0.p0"]) stream = content.rebuild_metachunk("0", on_the_fly=True) dl_data = "".join(stream) self.assertEqual(dl_data, data) del_chunk_0_0 = content.chunks.filter(pos="0.0")[0] del_chunk_0_p0 = content.chunks.filter(pos="0.p0")[0] self.assertRaises(NotFound, self.blob_client.chunk_get, del_chunk_0_0.url) self.assertRaises(NotFound, self.blob_client.chunk_get, del_chunk_0_p0.url) def _test_download(self, data_size, broken_pos_list): data = random_data(data_size) content = self._new_content(data, broken_pos_list) downloaded_data = "".join(content.download()) self.assertEqual(downloaded_data, data) for pos in broken_pos_list: c = content.chunks.filter(pos=pos)[0] self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url) def test_download_content_0_byte_without_broken_chunks(self): self._test_download(0, []) def test_download_content_1_byte_without_broken_chunks(self): self._test_download(1, []) def test_download_content_chunksize_bytes_without_broken_chunks(self): self._test_download(self.conf["chunk_size"], []) def test_download_content_chunksize_plus_1_without_broken_chunks(self): self._test_download(self.conf["chunk_size"] + 1, []) def test_download_content_0_byte_with_broken_0_0_and_0_p0(self): self._test_download(0, ["0.0", "0.p0"]) def test_download_content_1_byte_with_broken_0_0_and_0_p0(self): self._test_download(1, ["0.0", "0.p0"]) def test_download_content_2xchunksize_with_broken_0_2_and_1_0(self): self._test_download(2 * self.conf["chunk_size"], ["0.2", "1.0"]) def test_download_content_chunksize_bytes_with_3_broken_chunks(self): data = random_data(self.conf["chunk_size"]) content = self._new_content(data, ["0.0", "0.1", "0.2"]) gen = content.download() self.assertRaises(UnrecoverableContent, gen.next) def test_download_interrupt_close(self): data = random_data(self.conf["chunk_size"]) content = self._new_content(data, ["0.p0"]) download_iter = content.download() dl_data = "" for buf in download_iter: dl_data += buf self.assertEqual(len(dl_data), len(data)) self.assertEqual(dl_data, data) download_iter.close()
class BlobMoverWorker(object): def __init__(self, conf, logger, volume): self.conf = conf self.logger = logger or get_logger(conf) self.volume = volume self.namespace, self.address = check_volume(self.volume) self.running = False self.run_time = 0 self.passes = 0 self.errors = 0 self.last_reported = 0 self.last_usage_check = 0 self.chunks_run_time = 0 self.bytes_running_time = 0 self.bytes_processed = 0 self.total_bytes_processed = 0 self.total_chunks_processed = 0 self.concurrency = int_value(conf.get('concurrency'), 10) self.usage_target = int_value( conf.get('usage_target'), 0) self.usage_check_interval = int_value( conf.get('usage_check_interval'), 3600) self.report_interval = int_value( conf.get('report_interval'), 3600) self.max_chunks_per_second = int_value( conf.get('chunks_per_second'), 30) self.limit = int_value(conf.get('limit'), 0) self.allow_links = true_value(conf.get('allow_links', True)) self.blob_client = BlobClient(conf) self.container_client = ContainerClient(conf, logger=self.logger) self.content_factory = ContentFactory(conf) def mover_pass(self, **kwargs): start_time = report_time = time.time() total_errors = 0 mover_time = 0 pool = GreenPool(self.concurrency) paths = paths_gen(self.volume) for path in paths: loop_time = time.time() now = time.time() if now - self.last_usage_check >= self.usage_check_interval: free_ratio = statfs(self.volume) usage = (1-float(free_ratio)) * 100 if usage <= self.usage_target: self.logger.info( 'current usage %.2f%%: target reached (%.2f%%)', usage, self.usage_target) self.last_usage_check = now break # Spawn a chunk move task. # The call will block if no green thread is available. pool.spawn_n(self.safe_chunk_move, path) self.chunks_run_time = ratelimit( self.chunks_run_time, self.max_chunks_per_second ) self.total_chunks_processed += 1 now = time.time() if now - self.last_reported >= self.report_interval: self.logger.info( '%(start_time)s ' '%(passes)d ' '%(errors)d ' '%(c_rate).2f ' '%(b_rate).2f ' '%(total).2f ' '%(mover_time).2f' '%(mover_rate).2f' % { 'start_time': time.ctime(report_time), 'passes': self.passes, 'errors': self.errors, 'c_rate': self.passes / (now - report_time), 'b_rate': self.bytes_processed / (now - report_time), 'total': (now - start_time), 'mover_time': mover_time, 'mover_rate': mover_time / (now - start_time) } ) report_time = now total_errors += self.errors self.passes = 0 self.bytes_processed = 0 self.last_reported = now mover_time += (now - loop_time) if self.limit != 0 and self.total_chunks_processed >= self.limit: break elapsed = (time.time() - start_time) or 0.000001 self.logger.info( '%(elapsed).02f ' '%(errors)d ' '%(chunk_rate).2f ' '%(bytes_rate).2f ' '%(mover_time).2f ' '%(mover_rate).2f' % { 'elapsed': elapsed, 'errors': total_errors + self.errors, 'chunk_rate': self.total_chunks_processed / elapsed, 'bytes_rate': self.total_bytes_processed / elapsed, 'mover_time': mover_time, 'mover_rate': mover_time / elapsed } ) def safe_chunk_move(self, path): chunk_id = path.rsplit('/', 1)[-1] if len(chunk_id) != STRLEN_CHUNKID: self.logger.warn('WARN Not a chunk %s' % path) return for char in chunk_id: if char not in hexdigits: self.logger.warn('WARN Not a chunk %s' % path) return try: self.chunk_move(path, chunk_id) except Exception as err: self.errors += 1 self.logger.error('ERROR while moving chunk %s: %s', path, err) self.passes += 1 def load_chunk_metadata(self, path, chunk_id): with open(path) as file_: meta, _ = read_chunk_metadata(file_, chunk_id) return meta def chunk_move(self, path, chunk_id): meta = self.load_chunk_metadata(path, chunk_id) container_id = meta['container_id'] content_id = meta['content_id'] chunk_id = meta['chunk_id'] try: content = self.content_factory.get(container_id, content_id) except ContentNotFound: raise exc.OrphanChunk('Content not found') new_chunk = content.move_chunk(chunk_id) self.logger.info( 'moved chunk http://%s/%s to %s', self.address, chunk_id, new_chunk['url']) if self.allow_links: old_links = meta['links'] for chunk_id, fullpath in old_links.iteritems(): # pylint: disable=unbalanced-tuple-unpacking account, container, _, _, content_id = \ decode_fullpath(fullpath) container_id = cid_from_name(account, container) try: content = self.content_factory.get(container_id, content_id) except ContentNotFound: raise exc.OrphanChunk('Content not found') new_linked_chunk = content.move_linked_chunk( chunk_id, new_chunk['url']) self.logger.info( 'moved chunk http://%s/%s to %s', self.address, chunk_id, new_linked_chunk['url'])
class TestDupContent(BaseTestCase): def setUp(self): super(TestDupContent, self).setUp() if len(self.conf['rawx']) < 3: self.skipTest("Not enough rawx. " "Dup tests needs more than 2 rawx to run") self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient() self.container_name = "TestDupContent%f" % time.time() self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() def tearDown(self): super(TestDupContent, self).tearDown() def _test_upload(self, stgpol, data_size): data = random_data(data_size) content = self.content_factory.new(self.container_id, "titi", len(data), stgpol) self.assertEqual(type(content), DupContent) content.upload(StringIO.StringIO(data)) meta, chunks = self.container_client.content_show( cid=self.container_id, content=content.content_id) chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], stgpol) self.assertEqual(meta['name'], "titi") metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) if metachunk_nb == 0: metachunk_nb = 1 # special case for empty content if stgpol == "THREECOPIES": nb_copy = 3 elif stgpol == "TWOCOPIES": nb_copy = 2 elif stgpol == "SINGLE": nb_copy = 1 self.assertEqual(len(chunks), metachunk_nb * nb_copy) for pos in range(metachunk_nb): chunks_at_pos = chunks.filter(pos=pos) self.assertEqual(len(chunks_at_pos), nb_copy) data_begin = pos * self.chunk_size data_end = pos * self.chunk_size + self.chunk_size chunk_hash = md5_data(data[data_begin:data_end]) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(md5_stream(stream), chunk_hash) self.assertEqual(meta['content_size'], str(len(data))) self.assertEqual(meta['content_path'], "titi") self.assertEqual(meta['content_cid'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], str(pos)) self.assertEqual(meta['chunk_hash'], chunk_hash) def test_twocopies_upload_0_byte(self): self._test_upload("TWOCOPIES", 0) def test_twocopies_upload_1_byte(self): self._test_upload("TWOCOPIES", 1) def test_twocopies_upload_chunksize_bytes(self): self._test_upload("TWOCOPIES", self.chunk_size) def test_twocopies_upload_chunksize_plus_1_bytes(self): self._test_upload("TWOCOPIES", self.chunk_size + 1) def test_single_upload_0_byte(self): self._test_upload("SINGLE", 0) def test_single_upload_chunksize_plus_1_bytes(self): self._test_upload("SINGLE", self.chunk_size + 1) def test_chunks_cleanup_when_upload_failed(self): data = random_data(2 * self.chunk_size) content = self.content_factory.new(self.container_id, "titi", len(data), "TWOCOPIES") self.assertEqual(type(content), DupContent) # set bad url for position 1 for chunk in content.chunks.filter(pos=1): chunk.url = "http://127.0.0.1:9/DEADBEEF" self.assertRaises(Exception, content.upload, StringIO.StringIO(data)) for chunk in content.chunks.exclude(pos=1): self.assertRaises(NotFound, self.blob_client.chunk_head, chunk.url) def _new_content(self, stgpol, data, broken_pos_list=[]): old_content = self.content_factory.new(self.container_id, "titi", len(data), stgpol) self.assertEqual(type(old_content), DupContent) old_content.upload(StringIO.StringIO(data)) broken_chunks_info = {} for pos, idx in broken_pos_list: c = old_content.chunks.filter(pos=pos)[idx] meta, stream = self.blob_client.chunk_get(c.url) if pos not in broken_chunks_info: broken_chunks_info[pos] = {} broken_chunks_info[pos][idx] = { "url": c.url, "id": c.id, "hash": c.hash, "dl_meta": meta, "dl_hash": md5_stream(stream) } self.blob_client.chunk_delete(c.url) # get the new structure of the uploaded content return (self.content_factory.get(self.container_id, old_content.content_id), broken_chunks_info) def _test_rebuild(self, stgpol, data_size, broken_pos_list, full_rebuild_pos): data = random_data(data_size) content, broken_chunks_info = self._new_content( stgpol, data, broken_pos_list) rebuild_pos, rebuild_idx = full_rebuild_pos rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx] content.rebuild_chunk(rebuild_chunk_info["id"]) # get the new structure of the content rebuilt_content = self.content_factory.get(self.container_id, content.content_id) self.assertEqual(type(rebuilt_content), DupContent) # find the rebuilt chunk for c in rebuilt_content.chunks.filter(pos=rebuild_pos): if len(content.chunks.filter(id=c.id)) > 0: # not the rebuilt chunk # if this chunk is broken, it must not have been rebuilt for b_c_i in broken_chunks_info[rebuild_pos].values(): if c.id == b_c_i["id"]: with ExpectedException(NotFound): _, _ = self.blob_client.chunk_get(c.url) continue meta, stream = self.blob_client.chunk_get(c.url) self.assertEqual(meta["chunk_id"], c.id) self.assertEqual(md5_stream(stream), rebuild_chunk_info["dl_hash"]) self.assertEqual(c.hash, rebuild_chunk_info["hash"]) self.assertThat(c.url, NotEquals(rebuild_chunk_info["url"])) del meta["chunk_id"] del rebuild_chunk_info["dl_meta"]["chunk_id"] self.assertEqual(meta, rebuild_chunk_info["dl_meta"]) def test_2copies_content_0_byte_1broken_rebuild_pos_0_idx_0(self): self._test_rebuild("TWOCOPIES", 0, [(0, 0)], (0, 0)) def test_2copies_content_1_byte_1broken_rebuild_pos_0_idx_1(self): self._test_rebuild("TWOCOPIES", 1, [(0, 1)], (0, 1)) def test_3copies_content_chunksize_bytes_2broken_rebuild_pos_0_idx_1(self): if len(self.conf['rawx']) <= 3: self.skipTest("Need more than 3 rawx") self._test_rebuild("THREECOPIES", self.chunk_size, [(0, 0), (0, 1)], (0, 1)) def test_3copies_content_2xchksize_bytes_2broken_rebuild_pos_1_idx_2(self): if len(self.conf['rawx']) <= 3: self.skipTest("Need more than 3 rawx") self._test_rebuild("THREECOPIES", 2 * self.chunk_size, [(1, 0), (1, 2)], (1, 2)) def test_2copies_content_0_byte_2broken_rebuild_pos_0_idx_0(self): with ExpectedException(UnrecoverableContent): self._test_rebuild("TWOCOPIES", 0, [(0, 0), (0, 1)], (0, 0)) def _test_download(self, stgpol, data_size, broken_pos_list): data = random_data(data_size) content, _ = self._new_content(stgpol, data, broken_pos_list) downloaded_data = "".join(content.download()) self.assertEqual(downloaded_data, data) for pos, idx in broken_pos_list: # check nothing has been rebuilt c = content.chunks.filter(pos=pos)[0] self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url) def test_twocopies_download_content_0_byte_without_broken_chunks(self): self._test_download("TWOCOPIES", 0, []) def test_twocopies_download_content_0_byte_with_broken_0_0(self): self._test_download("TWOCOPIES", 0, [(0, 0)]) def test_twocopies_download_content_1_byte_without_broken_chunks(self): self._test_download("TWOCOPIES", 1, []) def test_twocopies_download_content_1_byte_with_broken_0_0(self): self._test_download("TWOCOPIES", 1, [(0, 0)]) def test_twocopies_download_chunksize_bytes_without_broken_chunks(self): self._test_download("TWOCOPIES", self.chunk_size, []) def test_twocopies_download_2xchuksize_bytes_with_broken_0_0_and_1_0(self): self._test_download("TWOCOPIES", self.chunk_size * 2, [(0, 0), (1, 0)]) def test_twocopies_download_content_chunksize_bytes_2_broken_chunks(self): data = random_data(self.chunk_size) content, _ = self._new_content("TWOCOPIES", data, [(0, 0), (0, 1)]) gen = content.download() self.assertRaises(UnrecoverableContent, gen.next) def test_single_download_content_1_byte_without_broken_chunks(self): self._test_download("SINGLE", 1, []) def test_single_download_chunksize_bytes_plus_1_without_broken_chunk(self): self._test_download("SINGLE", self.chunk_size * 2, [])
class BlobRebuilderWorker(RebuilderWorker): def __init__(self, conf, logger, volume, try_chunk_delete=False, **kwargs): super(BlobRebuilderWorker, self).__init__(conf, logger, **kwargs) self.volume = volume self.bytes_processed = 0 self.total_bytes_processed = 0 self.dry_run = true_value(conf.get('dry_run', False)) self.allow_same_rawx = true_value(conf.get('allow_same_rawx')) self.rdir_client = RdirClient(conf, logger=self.logger) self.content_factory = ContentFactory(conf, logger=self.logger) self.try_chunk_delete = try_chunk_delete def _rebuild_one(self, chunk, **kwargs): cid, content_id, chunk_id_or_pos, _ = chunk if self.dry_run: self.dryrun_chunk_rebuild(cid, content_id, chunk_id_or_pos) else: self.safe_chunk_rebuild(cid, content_id, chunk_id_or_pos) def _get_report(self, num, start_time, end_time, total_time, report_time, **kwargs): return ('RUN %(volume)s ' 'worker=%(num)d ' 'started=%(start_time)s ' 'passes=%(passes)d ' 'errors=%(errors)d ' 'chunks=%(nb_chunks)d %(c_rate).2f/s ' 'bytes=%(nb_bytes)d %(b_rate).2fB/s ' 'waiting_time=%(waiting_time).2f ' 'rebuilder_time=%(rebuilder_time).2f ' 'total_time=%(total_time).2f ' '(rebuilder: %(success_rate).2f%%)' % { 'volume': self.volume, 'num': num, 'start_time': datetime.fromtimestamp(int(report_time)).isoformat(), 'passes': self.passes, 'errors': self.errors, 'nb_chunks': self.total_items_processed, 'nb_bytes': self.total_bytes_processed, 'c_rate': self.passes / (end_time - report_time), 'b_rate': self.bytes_processed / (end_time - report_time), 'waiting_time': self.waiting_time, 'rebuilder_time': self.rebuilder_time, 'total_time': (end_time - start_time), 'success_rate': 100 * ((self.total_items_processed - self.errors) / float(self.total_items_processed)) }) def dryrun_chunk_rebuild(self, container_id, content_id, chunk_id_or_pos): self.logger.info( "[dryrun] Rebuilding " "container %s, content %s, chunk %s", container_id, content_id, chunk_id_or_pos) self.passes += 1 def safe_chunk_rebuild(self, container_id, content_id, chunk_id_or_pos): try: self.chunk_rebuild(container_id, content_id, chunk_id_or_pos) except Exception as e: self.errors += 1 self.logger.error('ERROR while rebuilding chunk %s|%s|%s: %s', container_id, content_id, chunk_id_or_pos, e) self.passes += 1 def chunk_rebuild(self, container_id, content_id, chunk_id_or_pos): self.logger.info('Rebuilding (container %s, content %s, chunk %s)', container_id, content_id, chunk_id_or_pos) try: content = self.content_factory.get(container_id, content_id) except ContentNotFound: raise OrphanChunk('Content not found: possible orphan chunk') chunk_size = 0 chunk_pos = None if len(chunk_id_or_pos) < 32: chunk_pos = chunk_id_or_pos chunk_id = None metapos = int(chunk_pos.split('.', 1)[0]) chunk_size = content.chunks.filter(metapos=metapos).all()[0].size else: if '/' in chunk_id_or_pos: chunk_id = chunk_id_or_pos.rsplit('/', 1)[-1] else: chunk_id = chunk_id_or_pos chunk = content.chunks.filter(id=chunk_id).one() if chunk is None: raise OrphanChunk(("Chunk not found in content:" 'possible orphan chunk')) elif self.volume and chunk.host != self.volume: raise ValueError("Chunk does not belong to this volume") chunk_size = chunk.size content.rebuild_chunk(chunk_id, allow_same_rawx=self.allow_same_rawx, chunk_pos=chunk_pos) if self.try_chunk_delete: try: content.blob_client.chunk_delete(chunk.url) self.logger.info("Chunk %s deleted", chunk.url) except NotFound as exc: self.logger.debug("Chunk %s: %s", chunk.url, exc) # This call does not raise exception if chunk is not referenced if chunk_id is not None: self.rdir_client.chunk_delete(chunk.host, container_id, content_id, chunk_id) self.bytes_processed += chunk_size self.total_bytes_processed += chunk_size
class TestContentFactory(BaseTestCase): def setUp(self): super(TestContentFactory, self).setUp() self.namespace = self.conf['namespace'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_name = "TestContentFactory%f" % time.time() self.container_client = ContainerClient(self.gridconf) self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() def tearDown(self): super(TestContentFactory, self).tearDown() def test_extract_datasec(self): self.content_factory.ns_info = { "data_security": { "DUPONETWO": "DUP:distance=1|nb_copy=2", "RAIN": "RAIN:k=6|m=2|algo=liber8tion" }, "storage_policy": { "RAIN": "NONE:RAIN:NONE", "SINGLE": "NONE:NONE:NONE", "TWOCOPIES": "NONE:DUPONETWO:NONE" } } ds_type, ds_args = self.content_factory._extract_datasec("RAIN") self.assertEqual(ds_type, "RAIN") self.assertEqual(ds_args, { "k": "6", "m": "2", "algo": "liber8tion" }) ds_type, ds_args = self.content_factory._extract_datasec("SINGLE") self.assertEqual(ds_type, "DUP") self.assertEqual(ds_args, { "nb_copy": "1", "distance": "0" }) ds_type, ds_args = self.content_factory._extract_datasec("TWOCOPIES") self.assertEqual(ds_type, "DUP") self.assertEqual(ds_args, { "nb_copy": "2", "distance": "1" }) self.assertRaises(InconsistentContent, self.content_factory._extract_datasec, "UnKnOwN") def test_get_rain(self): meta = { "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash-method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime-type": "application/octet-stream", "name": "tox.ini", "policy": "RAIN", "version": "1450176946676289" } chunks = [ { "url": "http://127.0.0.1:6012/A0A0", "pos": "0.p0", "size": 512, "hash": "E7D4E4AD460971CA2E3141F2102308D4"}, { "url": "http://127.0.0.1:6010/A01", "pos": "0.1", "size": 146, "hash": "760AB5DA7C51A3654F1CA622687CD6C3"}, { "url": "http://127.0.0.1:6011/A00", "pos": "0.0", "size": 512, "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB"}, { "url": "http://127.0.0.1:6013/A0A1", "pos": "0.p1", "size": 512, "hash": "DA9D7F72AEEA5791565724424CE45C16"} ] self.content_factory.container_client.content_show = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id") self.assertEqual(type(c), RainContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual(c.version, "1450176946676289") self.assertEqual(c.algo, "liber8tion") self.assertEqual(c.k, 6) self.assertEqual(c.m, 2) self.assertEqual(len(c.chunks), 4) self.assertEqual(c.chunks[0].raw(), chunks[2]) self.assertEqual(c.chunks[1].raw(), chunks[1]) self.assertEqual(c.chunks[2].raw(), chunks[0]) self.assertEqual(c.chunks[3].raw(), chunks[3]) def test_get_dup(self): meta = { "chunk-method": "plain/bytes", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash-method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime-type": "application/octet-stream", "name": "tox.ini", "policy": "TWOCOPIES", "version": "1450176946676289" } chunks = [ { "url": "http://127.0.0.1:6010/A0", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73"}, { "url": "http://127.0.0.1:6011/A1", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73"} ] self.content_factory.container_client.content_show = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id") self.assertEqual(type(c), DupContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual(c.version, "1450176946676289") self.assertEqual(c.nb_copy, 2) self.assertEqual(c.distance, 1) self.assertEqual(len(c.chunks), 2) self.assertEqual(c.chunks[0].raw(), chunks[0]) self.assertEqual(c.chunks[1].raw(), chunks[1]) def test_new_rain(self): meta = { "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2", "ctime": "1450341162", "deleted": "False", "hash": "", "hash-method": "md5", "id": "F4B1C8DD132705007DE8B43D0709DAA2", "length": "1000", "mime-type": "application/octet-stream", "name": "titi", "policy": "RAIN", "version": "1450341162332663" } chunks = [ { "url": "http://127.0.0.1:6010/0_p1", "pos": "0.p1", "size": 1048576, "hash": "00000000000000000000000000000000"}, { "url": "http://127.0.0.1:6011/0_p0", "pos": "0.p0", "size": 1048576, "hash": "00000000000000000000000000000000"}, { "url": "http://127.0.0.1:6016/0_1", "pos": "0.1", "size": 1048576, "hash": "00000000000000000000000000000000"}, { "url": "http://127.0.0.1:6017/0_0", "pos": "0.0", "size": 1048576, "hash": "00000000000000000000000000000000"} ] self.content_factory.container_client.content_prepare = Mock( return_value=(meta, chunks)) c = self.content_factory.new("xxx_container_id", "titi", 1000, "RAIN") self.assertEqual(type(c), RainContent) self.assertEqual(c.content_id, "F4B1C8DD132705007DE8B43D0709DAA2") self.assertEqual(c.length, 1000) self.assertEqual(c.path, "titi") self.assertEqual(c.version, "1450341162332663") self.assertEqual(c.algo, "liber8tion") self.assertEqual(c.k, 6) self.assertEqual(c.m, 2) self.assertEqual(len(c.chunks), 4) self.assertEqual(c.chunks[0].raw(), chunks[3]) self.assertEqual(c.chunks[1].raw(), chunks[2]) self.assertEqual(c.chunks[2].raw(), chunks[1]) self.assertEqual(c.chunks[3].raw(), chunks[0]) def _new_content(self, stgpol, data): old_content = self.content_factory.new(self.container_id, "titi", len(data), stgpol) old_content.upload(StringIO.StringIO(data)) return self.content_factory.get(self.container_id, old_content.content_id) def _test_change_policy(self, data_size, old_policy, new_policy): if (old_policy == "RAIN" or new_policy == "RAIN") \ and len(self.conf['rawx']) < 8: self.skipTest("RAIN: Need more than 8 rawx to run") data = random_data(data_size) obj_type = { "SINGLE": DupContent, "TWOCOPIES": DupContent, "THREECOPIES": DupContent, "RAIN": RainContent } old_content = self._new_content(old_policy, data) self.assertEqual(type(old_content), obj_type[old_policy]) changed_content = self.content_factory.change_policy( old_content.container_id, old_content.content_id, new_policy) self.assertRaises(NotFound, self.container_client.content_show, self.account, cid=old_content.container_id, content=old_content.content_id) new_content = self.content_factory.get(self.container_id, changed_content.content_id) self.assertEqual(type(new_content), obj_type[new_policy]) downloaded_data = "".join(new_content.download()) self.assertEqual(downloaded_data, data) def test_change_content_0_byte_policy_single_to_rain(self): self._test_change_policy(0, "SINGLE", "RAIN") def test_change_content_0_byte_policy_rain_to_twocopies(self): self._test_change_policy(0, "RAIN", "TWOCOPIES") def test_change_content_1_byte_policy_single_to_rain(self): self._test_change_policy(1, "SINGLE", "RAIN") def test_change_content_chunksize_bytes_policy_twocopies_to_rain(self): self._test_change_policy(self.chunk_size, "TWOCOPIES", "RAIN") def test_change_content_2xchunksize_bytes_policy_threecopies_to_rain(self): self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "RAIN") def test_change_content_1_byte_policy_rain_to_threecopies(self): self._test_change_policy(1, "RAIN", "THREECOPIES") def test_change_content_chunksize_bytes_policy_rain_to_twocopies(self): self._test_change_policy(self.chunk_size, "RAIN", "TWOCOPIES") def test_change_content_2xchunksize_bytes_policy_rain_to_single(self): self._test_change_policy(self.chunk_size * 2, "RAIN", "SINGLE") def test_change_content_0_byte_policy_twocopies_to_threecopies(self): self._test_change_policy(0, "TWOCOPIES", "THREECOPIES") def test_change_content_chunksize_bytes_policy_single_to_twocopies(self): self._test_change_policy(self.chunk_size, "SINGLE", "TWOCOPIES") def test_change_content_2xchunksize_bytes_policy_3copies_to_single(self): self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "SINGLE") def test_change_content_with_same_policy(self): data = random_data(10) old_content = self._new_content("TWOCOPIES", data) changed_content = self.content_factory.change_policy( old_content.container_id, old_content.content_id, "TWOCOPIES") self.assertEqual(old_content.content_id, changed_content.content_id) def test_change_policy_unknown_content(self): self.assertRaises(ContentNotFound, self.content_factory.change_policy, self.container_id, "1234", "SINGLE") def test_change_policy_unknown_storage_policy(self): data = random_data(10) old_content = self._new_content("TWOCOPIES", data) self.assertRaises(ClientException, self.content_factory.change_policy, self.container_id, old_content.content_id, "UnKnOwN")
class BlobRebuilderWorker(object): def __init__(self, conf, logger, volume): self.conf = conf self.logger = logger or get_logger(conf) self.volume = volume self.run_time = 0 self.passes = 0 self.errors = 0 self.last_reported = 0 self.chunks_run_time = 0 self.bytes_running_time = 0 self.bytes_processed = 0 self.total_bytes_processed = 0 self.total_chunks_processed = 0 self.dry_run = true_value( conf.get('dry_run', False)) self.report_interval = int_value( conf.get('report_interval'), 3600) self.max_chunks_per_second = int_value( conf.get('chunks_per_second'), 30) self.max_bytes_per_second = int_value( conf.get('bytes_per_second'), 10000000) self.rdir_fetch_limit = int_value( conf.get('rdir_fetch_limit'), 100) self.rdir_client = RdirClient(conf) self.content_factory = ContentFactory(conf) def rebuilder_pass_with_lock(self): self.rdir_client.admin_lock(self.volume, "rebuilder on %s" % gethostname()) try: self.rebuilder_pass() finally: self.rdir_client.admin_unlock(self.volume) def rebuilder_pass(self): start_time = report_time = time.time() total_errors = 0 rebuilder_time = 0 chunks = self.rdir_client.chunk_fetch(self.volume, limit=self.rdir_fetch_limit, rebuild=True) for container_id, content_id, chunk_id, data in chunks: loop_time = time.time() if self.dry_run: self.dryrun_chunk_rebuild(container_id, content_id, chunk_id) else: self.safe_chunk_rebuild(container_id, content_id, chunk_id) self.chunks_run_time = ratelimit( self.chunks_run_time, self.max_chunks_per_second ) self.total_chunks_processed += 1 now = time.time() if now - self.last_reported >= self.report_interval: self.logger.info( '%(start_time)s ' '%(passes)d ' '%(errors)d ' '%(c_rate).2f ' '%(b_rate).2f ' '%(total).2f ' '%(rebuilder_time).2f' '%(rebuilder_rate).2f' % { 'start_time': time.ctime(report_time), 'passes': self.passes, 'errors': self.errors, 'c_rate': self.passes / (now - report_time), 'b_rate': self.bytes_processed / (now - report_time), 'total': (now - start_time), 'rebuilder_time': rebuilder_time, 'rebuilder_rate': rebuilder_time / (now - start_time) } ) report_time = now total_errors += self.errors self.passes = 0 self.bytes_processed = 0 self.last_reported = now rebuilder_time += (now - loop_time) elapsed = (time.time() - start_time) or 0.000001 self.logger.info( '%(elapsed).02f ' '%(errors)d ' '%(chunk_rate).2f ' '%(bytes_rate).2f ' '%(rebuilder_time).2f ' '%(rebuilder_rate).2f' % { 'elapsed': elapsed, 'errors': total_errors + self.errors, 'chunk_rate': self.total_chunks_processed / elapsed, 'bytes_rate': self.total_bytes_processed / elapsed, 'rebuilder_time': rebuilder_time, 'rebuilder_rate': rebuilder_time / elapsed } ) def dryrun_chunk_rebuild(self, container_id, content_id, chunk_id): self.logger.info("[dryrun] Rebuilding " "container %s, content %s, chunk %s", container_id, content_id, chunk_id) self.passes += 1 def safe_chunk_rebuild(self, container_id, content_id, chunk_id): try: self.chunk_rebuild(container_id, content_id, chunk_id) except Exception as e: self.errors += 1 self.logger.error('ERROR while rebuilding chunk %s|%s|%s) : %s', container_id, content_id, chunk_id, e) self.passes += 1 def chunk_rebuild(self, container_id, content_id, chunk_id): self.logger.info('Rebuilding (container %s, content %s, chunk %s)', container_id, content_id, chunk_id) try: content = self.content_factory.get(container_id, content_id) except ContentNotFound: raise exc.OrphanChunk('Content not found') chunk = content.chunks.filter(id=chunk_id).one() if chunk is None: raise OrphanChunk("Chunk not found in content") chunk_size = chunk.size content.rebuild_chunk(chunk_id) self.rdir_client.chunk_push(self.volume, container_id, content_id, chunk_id, rtime=int(time.time())) self.bytes_processed += chunk_size self.total_bytes_processed += chunk_size
class BlobRebuilderWorker(RebuilderWorker): def __init__(self, rebuilder, try_chunk_delete=False, **kwargs): super(BlobRebuilderWorker, self).__init__(rebuilder, **kwargs) self.dry_run = true_value(self.rebuilder.conf.get('dry_run', False)) self.allow_same_rawx = true_value( self.rebuilder.conf.get('allow_same_rawx')) self.try_chunk_delete = try_chunk_delete self.rdir_client = self.rebuilder.rdir_client self.content_factory = ContentFactory(self.rebuilder.conf, logger=self.logger) self.sender = None def _rebuild_one(self, chunk, **kwargs): container_id, content_id, chunk_id_or_pos, _ = chunk if self.dry_run: self.dryrun_chunk_rebuild(container_id, content_id, chunk_id_or_pos, **kwargs) return 0 else: return self.chunk_rebuild(container_id, content_id, chunk_id_or_pos, **kwargs) def update_processed(self, chunk, bytes_processed, error=None, **kwargs): container_id, content_id, chunk_id_or_pos, more = chunk if more is not None: reply = more.get('reply', None) if reply is not None: event = { 'rebuilder_id': reply['rebuilder_id'], 'beanstalkd': self.rebuilder.beanstalkd_listener.addr, 'cid': container_id, 'content_id': content_id, 'chunk_id_or_pos': chunk_id_or_pos } if error is not None: event['error'] = error if bytes_processed is not None: event['bytes_processed'] = bytes_processed try: if self.sender is None: self.sender = BeanstalkdSender(reply['addr'], reply['tube'], self.logger, **kwargs) elif self.sender.addr != reply['addr'] \ or self.sender.addr != reply['tube']: self.sender.close() self.sender = BeanstalkdSender(reply['addr'], reply['tube'], self.logger, **kwargs) self.sender.send_event(json.dumps(event)) except BeanstalkError as exc: self.logger.warn( 'reply failed %s: %s', self.rebuilder._item_to_string(chunk, **kwargs), exc) super(BlobRebuilderWorker, self).update_processed(chunk, bytes_processed, error=error, **kwargs) def dryrun_chunk_rebuild(self, container_id, content_id, chunk_id_or_pos, **kwargs): self.logger.info( "[dryrun] Rebuilding " "container %s, content %s, chunk %s", container_id, content_id, chunk_id_or_pos) def chunk_rebuild(self, container_id, content_id, chunk_id_or_pos, **kwargs): self.logger.info('Rebuilding (container %s, content %s, chunk %s)', container_id, content_id, chunk_id_or_pos) try: content = self.content_factory.get(container_id, content_id) except ContentNotFound: raise OrphanChunk('Content not found: possible orphan chunk') chunk_size = 0 chunk_pos = None if len(chunk_id_or_pos) < 32: chunk_pos = chunk_id_or_pos chunk_id = None metapos = int(chunk_pos.split('.', 1)[0]) chunk_size = content.chunks.filter(metapos=metapos).all()[0].size else: if '/' in chunk_id_or_pos: chunk_id = chunk_id_or_pos.rsplit('/', 1)[-1] else: chunk_id = chunk_id_or_pos chunk = content.chunks.filter(id=chunk_id).one() if chunk is None: raise OrphanChunk(("Chunk not found in content:" 'possible orphan chunk')) elif self.volume and chunk.host != self.volume: raise ValueError("Chunk does not belong to this volume") chunk_size = chunk.size content.rebuild_chunk(chunk_id, allow_same_rawx=self.allow_same_rawx, chunk_pos=chunk_pos) if self.try_chunk_delete: try: content.blob_client.chunk_delete(chunk.url, **kwargs) self.logger.info("Chunk %s deleted", chunk.url) except NotFound as exc: self.logger.debug("Chunk %s: %s", chunk.url, exc) # This call does not raise exception if chunk is not referenced if chunk_id is not None: self.rdir_client.chunk_delete(chunk.host, container_id, content_id, chunk_id, **kwargs) return chunk_size
class BlobMoverWorker(object): def __init__(self, conf, logger, volume): self.conf = conf self.logger = logger or get_logger(conf) self.volume = volume self.run_time = 0 self.passes = 0 self.errors = 0 self.last_reported = 0 self.last_usage_check = 0 self.chunks_run_time = 0 self.bytes_running_time = 0 self.bytes_processed = 0 self.total_bytes_processed = 0 self.total_chunks_processed = 0 self.usage_target = int_value( conf.get('usage_target'), 0) self.usage_check_interval = int_value( conf.get('usage_check_interval'), 3600) self.report_interval = int_value( conf.get('report_interval'), 3600) self.max_chunks_per_second = int_value( conf.get('chunks_per_second'), 30) self.max_bytes_per_second = int_value( conf.get('bytes_per_second'), 10000000) self.blob_client = BlobClient() self.container_client = ContainerClient(conf) self.content_factory = ContentFactory(conf) def mover_pass(self): self.namespace, self.address = check_volume(self.volume) start_time = report_time = time.time() total_errors = 0 mover_time = 0 paths = paths_gen(self.volume) for path in paths: loop_time = time.time() now = time.time() if now - self.last_usage_check >= self.usage_check_interval: used, total = statfs(self.volume) usage = (float(used) / total) * 100 if usage <= self.usage_target: self.logger.info( 'current usage %.2f%%: target reached (%.2f%%)', usage, self.usage_target) self.last_usage_check = now break self.safe_chunk_move(path) self.chunks_run_time = ratelimit( self.chunks_run_time, self.max_chunks_per_second ) self.total_chunks_processed += 1 now = time.time() if now - self.last_reported >= self.report_interval: self.logger.info( '%(start_time)s ' '%(passes)d ' '%(errors)d ' '%(c_rate).2f ' '%(b_rate).2f ' '%(total).2f ' '%(mover_time).2f' '%(mover_rate).2f' % { 'start_time': time.ctime(report_time), 'passes': self.passes, 'errors': self.errors, 'c_rate': self.passes / (now - report_time), 'b_rate': self.bytes_processed / (now - report_time), 'total': (now - start_time), 'mover_time': mover_time, 'mover_rate': mover_time / (now - start_time) } ) report_time = now total_errors += self.errors self.passes = 0 self.bytes_processed = 0 self.last_reported = now mover_time += (now - loop_time) elapsed = (time.time() - start_time) or 0.000001 self.logger.info( '%(elapsed).02f ' '%(errors)d ' '%(chunk_rate).2f ' '%(bytes_rate).2f ' '%(mover_time).2f ' '%(mover_rate).2f' % { 'elapsed': elapsed, 'errors': total_errors + self.errors, 'chunk_rate': self.total_chunks_processed / elapsed, 'bytes_rate': self.total_bytes_processed / elapsed, 'mover_time': mover_time, 'mover_rate': mover_time / elapsed } ) def safe_chunk_move(self, path): try: self.chunk_move(path) except Exception as e: self.errors += 1 self.logger.error('ERROR while moving chunk %s: %s', path, e) self.passes += 1 def load_chunk_metadata(self, path): with open(path) as f: return read_chunk_metadata(f) def chunk_move(self, path): meta = self.load_chunk_metadata(path) content_cid = meta['content_cid'] content_id = meta['content_id'] chunk_id = meta['chunk_id'] chunk_url = 'http://%s/%s' % (self.address, meta['chunk_id']) try: content = self.content_factory.get(content_cid, content_id) except ContentNotFound: raise exc.OrphanChunk('Content not found') new_chunk = content.move_chunk(chunk_id) self.logger.info( 'moved chunk %s to %s', chunk_url, new_chunk['url'])
class BlobMoverWorker(object): def __init__(self, conf, logger, volume): self.conf = conf self.logger = logger or get_logger(conf) self.volume = volume self.run_time = 0 self.passes = 0 self.errors = 0 self.last_reported = 0 self.last_usage_check = 0 self.chunks_run_time = 0 self.bytes_running_time = 0 self.bytes_processed = 0 self.total_bytes_processed = 0 self.total_chunks_processed = 0 self.usage_target = int_value(conf.get("usage_target"), 0) self.usage_check_interval = int_value(conf.get("usage_check_interval"), 3600) self.report_interval = int_value(conf.get("report_interval"), 3600) self.max_chunks_per_second = int_value(conf.get("chunks_per_second"), 30) self.max_bytes_per_second = int_value(conf.get("bytes_per_second"), 10000000) self.blob_client = BlobClient() self.container_client = ContainerClient(conf) self.content_factory = ContentFactory(conf) def mover_pass(self): self.namespace, self.address = check_volume(self.volume) start_time = report_time = time.time() total_errors = 0 mover_time = 0 paths = paths_gen(self.volume) for path in paths: loop_time = time.time() now = time.time() if now - self.last_usage_check >= self.usage_check_interval: used, total = statfs(self.volume) usage = (float(used) / total) * 100 if usage <= self.usage_target: self.logger.info("current usage %.2f%%: target reached (%.2f%%)", usage, self.usage_target) self.last_usage_check = now break self.safe_chunk_move(path) self.chunks_run_time = ratelimit(self.chunks_run_time, self.max_chunks_per_second) self.total_chunks_processed += 1 now = time.time() if now - self.last_reported >= self.report_interval: self.logger.info( "%(start_time)s " "%(passes)d " "%(errors)d " "%(c_rate).2f " "%(b_rate).2f " "%(total).2f " "%(mover_time).2f" "%(mover_rate).2f" % { "start_time": time.ctime(report_time), "passes": self.passes, "errors": self.errors, "c_rate": self.passes / (now - report_time), "b_rate": self.bytes_processed / (now - report_time), "total": (now - start_time), "mover_time": mover_time, "mover_rate": mover_time / (now - start_time), } ) report_time = now total_errors += self.errors self.passes = 0 self.bytes_processed = 0 self.last_reported = now mover_time += now - loop_time elapsed = (time.time() - start_time) or 0.000001 self.logger.info( "%(elapsed).02f " "%(errors)d " "%(chunk_rate).2f " "%(bytes_rate).2f " "%(mover_time).2f " "%(mover_rate).2f" % { "elapsed": elapsed, "errors": total_errors + self.errors, "chunk_rate": self.total_chunks_processed / elapsed, "bytes_rate": self.total_bytes_processed / elapsed, "mover_time": mover_time, "mover_rate": mover_time / elapsed, } ) def safe_chunk_move(self, path): try: self.chunk_move(path) except Exception as e: self.errors += 1 self.logger.error("ERROR while moving chunk %s: %s", path, e) self.passes += 1 def load_chunk_metadata(self, path): with open(path) as f: return read_chunk_metadata(f) def chunk_move(self, path): meta = self.load_chunk_metadata(path) container_id = meta["container_id"] content_id = meta["content_id"] chunk_id = meta["chunk_id"] chunk_url = "http://%s/%s" % (self.address, meta["chunk_id"]) try: content = self.content_factory.get(container_id, content_id) except ContentNotFound: raise exc.OrphanChunk("Content not found") new_chunk = content.move_chunk(chunk_id) self.logger.info("moved chunk %s to %s", chunk_url, new_chunk["url"])
class BlobMoverWorker(object): def __init__(self, conf, logger, volume): self.conf = conf self.logger = logger or get_logger(conf) self.volume = volume self.namespace, self.address = check_volume(self.volume) self.running = False self.run_time = 0 self.passes = 0 self.errors = 0 self.last_reported = 0 self.last_usage_check = 0 self.chunks_run_time = 0 self.bytes_running_time = 0 self.bytes_processed = 0 self.total_bytes_processed = 0 self.total_chunks_processed = 0 self.concurrency = int_value(conf.get('concurrency'), 10) self.usage_target = int_value(conf.get('usage_target'), 0) self.usage_check_interval = int_value(conf.get('usage_check_interval'), 60) self.report_interval = int_value(conf.get('report_interval'), 3600) self.max_chunks_per_second = int_value(conf.get('chunks_per_second'), 30) self.limit = int_value(conf.get('limit'), 0) self.allow_links = true_value(conf.get('allow_links', True)) self.blob_client = BlobClient(conf) self.container_client = ContainerClient(conf, logger=self.logger) self.content_factory = ContentFactory( conf, container_client=self.container_client, blob_client=self.blob_client) self.excluded_rawx = \ [rawx for rawx in conf.get('excluded_rawx', '').split(',') if rawx] self.fake_excluded_chunks = self._generate_fake_excluded_chunks() def _generate_fake_excluded_chunks(self): conscience_client = ConscienceClient(self.conf, logger=self.logger) fake_excluded_chunks = list() fake_chunk_id = '0' * 64 for service_id in self.excluded_rawx: service_addr = conscience_client.resolve_service_id( 'rawx', service_id) chunk = dict() chunk['hash'] = '0000000000000000000000000000000000' chunk['pos'] = '0' chunk['size'] = 1 chunk['score'] = 1 chunk['url'] = 'http://' + service_id + '/' + fake_chunk_id chunk['real_url'] = 'http://' + service_addr + '/' + fake_chunk_id fake_excluded_chunks.append(chunk) return fake_excluded_chunks def mover_pass(self, **kwargs): start_time = report_time = time.time() total_errors = 0 mover_time = 0 pool = GreenPool(self.concurrency) paths = paths_gen(self.volume) for path in paths: loop_time = time.time() now = time.time() if now - self.last_usage_check >= self.usage_check_interval: free_ratio = statfs(self.volume) usage = (1 - float(free_ratio)) * 100 if usage <= self.usage_target: self.logger.info( 'current usage %.2f%%: target reached (%.2f%%)', usage, self.usage_target) break self.last_usage_check = now # Spawn a chunk move task. # The call will block if no green thread is available. pool.spawn_n(self.safe_chunk_move, path) self.chunks_run_time = ratelimit(self.chunks_run_time, self.max_chunks_per_second) self.total_chunks_processed += 1 now = time.time() if now - self.last_reported >= self.report_interval: self.logger.info( '%(start_time)s ' '%(passes)d ' '%(errors)d ' '%(c_rate).2f ' '%(b_rate).2f ' '%(total).2f ' '%(mover_time).2f' '%(mover_rate).2f' % { 'start_time': time.ctime(report_time), 'passes': self.passes, 'errors': self.errors, 'c_rate': self.passes / (now - report_time), 'b_rate': self.bytes_processed / (now - report_time), 'total': (now - start_time), 'mover_time': mover_time, 'mover_rate': mover_time / (now - start_time) }) report_time = now total_errors += self.errors self.passes = 0 self.bytes_processed = 0 self.last_reported = now mover_time += (now - loop_time) if self.limit != 0 and self.total_chunks_processed >= self.limit: break pool.waitall() elapsed = (time.time() - start_time) or 0.000001 self.logger.info( '%(elapsed).02f ' '%(errors)d ' '%(chunk_rate).2f ' '%(bytes_rate).2f ' '%(mover_time).2f ' '%(mover_rate).2f' % { 'elapsed': elapsed, 'errors': total_errors + self.errors, 'chunk_rate': self.total_chunks_processed / elapsed, 'bytes_rate': self.total_bytes_processed / elapsed, 'mover_time': mover_time, 'mover_rate': mover_time / elapsed }) def safe_chunk_move(self, path): chunk_id = path.rsplit('/', 1)[-1] if len(chunk_id) != STRLEN_CHUNKID: self.logger.warn('WARN Not a chunk %s' % path) return for char in chunk_id: if char not in hexdigits: self.logger.warn('WARN Not a chunk %s' % path) return try: self.chunk_move(path, chunk_id) except Exception as err: self.errors += 1 self.logger.error('ERROR while moving chunk %s: %s', path, err) self.passes += 1 def load_chunk_metadata(self, path, chunk_id): with open(path) as file_: meta, _ = read_chunk_metadata(file_, chunk_id) return meta def chunk_move(self, path, chunk_id): meta = self.load_chunk_metadata(path, chunk_id) container_id = meta['container_id'] content_id = meta['content_id'] chunk_id = meta['chunk_id'] # Maybe skip the chunk because it doesn't match the size constaint chunk_size = int(meta['chunk_size']) min_chunk_size = int(self.conf.get('min_chunk_size', 0)) max_chunk_size = int(self.conf.get('max_chunk_size', 0)) if chunk_size < min_chunk_size: self.logger.debug("SKIP %s too small", path) return if max_chunk_size > 0 and chunk_size > max_chunk_size: self.logger.debug("SKIP %s too big", path) return # Start moving the chunk try: content = self.content_factory.get(container_id, content_id) except ContentNotFound: raise exc.OrphanChunk('Content not found') new_chunk = content.move_chunk( chunk_id, fake_excluded_chunks=self.fake_excluded_chunks) self.logger.info('moved chunk http://%s/%s to %s', self.address, chunk_id, new_chunk['url']) if self.allow_links: old_links = meta['links'] for chunk_id, fullpath in old_links.items(): # pylint: disable=unbalanced-tuple-unpacking account, container, _, _, content_id = \ decode_fullpath(fullpath) container_id = cid_from_name(account, container) try: content = self.content_factory.get(container_id, content_id) except ContentNotFound: raise exc.OrphanChunk('Content not found') new_linked_chunk = content.move_linked_chunk( chunk_id, new_chunk['url']) self.logger.info('moved chunk http://%s/%s to %s', self.address, chunk_id, new_linked_chunk['url'])
class TestContentFactory(BaseTestCase): def setUp(self): super(TestContentFactory, self).setUp() self.namespace = self.conf['namespace'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_name = "TestContentFactory%f" % time.time() self.blob_client = BlobClient() self.container_client = ContainerClient(self.gridconf) self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() def tearDown(self): super(TestContentFactory, self).tearDown() def test_extract_datasec(self): self.content_factory.ns_info = { "data_security": { "DUPONETWO": "DUP:distance=1|nb_copy=2", "RAIN": "RAIN:k=6|m=2|algo=liber8tion" }, "storage_policy": { "RAIN": "NONE:RAIN:NONE", "SINGLE": "NONE:NONE:NONE", "TWOCOPIES": "NONE:DUPONETWO:NONE" } } ds_type, ds_args = self.content_factory._extract_datasec("RAIN") self.assertEqual(ds_type, "RAIN") self.assertEqual(ds_args, {"k": "6", "m": "2", "algo": "liber8tion"}) ds_type, ds_args = self.content_factory._extract_datasec("SINGLE") self.assertEqual(ds_type, "DUP") self.assertEqual(ds_args, {"nb_copy": "1", "distance": "0"}) ds_type, ds_args = self.content_factory._extract_datasec("TWOCOPIES") self.assertEqual(ds_type, "DUP") self.assertEqual(ds_args, {"nb_copy": "2", "distance": "1"}) self.assertRaises(InconsistentContent, self.content_factory._extract_datasec, "UnKnOwN") def test_get_rain(self): meta = { "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash-method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime-type": "application/octet-stream", "name": "tox.ini", "policy": "RAIN", "version": "1450176946676289" } chunks = [{ "url": "http://127.0.0.1:6012/A0A0", "pos": "0.p0", "size": 512, "hash": "E7D4E4AD460971CA2E3141F2102308D4" }, { "url": "http://127.0.0.1:6010/A01", "pos": "0.1", "size": 146, "hash": "760AB5DA7C51A3654F1CA622687CD6C3" }, { "url": "http://127.0.0.1:6011/A00", "pos": "0.0", "size": 512, "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB" }, { "url": "http://127.0.0.1:6013/A0A1", "pos": "0.p1", "size": 512, "hash": "DA9D7F72AEEA5791565724424CE45C16" }] self.content_factory.container_client.content_show = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id") self.assertEqual(type(c), RainContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual(c.version, "1450176946676289") self.assertEqual(c.algo, "liber8tion") self.assertEqual(c.k, 6) self.assertEqual(c.m, 2) self.assertEqual(len(c.chunks), 4) self.assertEqual(c.chunks[0].raw(), chunks[2]) self.assertEqual(c.chunks[1].raw(), chunks[1]) self.assertEqual(c.chunks[2].raw(), chunks[0]) self.assertEqual(c.chunks[3].raw(), chunks[3]) def test_get_dup(self): meta = { "chunk-method": "plain/bytes", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash-method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime-type": "application/octet-stream", "name": "tox.ini", "policy": "TWOCOPIES", "version": "1450176946676289" } chunks = [{ "url": "http://127.0.0.1:6010/A0", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73" }, { "url": "http://127.0.0.1:6011/A1", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73" }] self.content_factory.container_client.content_show = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id") self.assertEqual(type(c), DupContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual(c.version, "1450176946676289") self.assertEqual(c.nb_copy, 2) self.assertEqual(c.distance, 1) self.assertEqual(len(c.chunks), 2) self.assertEqual(c.chunks[0].raw(), chunks[0]) self.assertEqual(c.chunks[1].raw(), chunks[1]) def test_get_unknown_content(self): self.assertRaises(ContentNotFound, self.content_factory.get, self.container_id, "1234") def test_new_rain(self): meta = { "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2", "ctime": "1450341162", "deleted": "False", "hash": "", "hash-method": "md5", "id": "F4B1C8DD132705007DE8B43D0709DAA2", "length": "1000", "mime-type": "application/octet-stream", "name": "titi", "policy": "RAIN", "version": "1450341162332663" } chunks = [{ "url": "http://127.0.0.1:6010/0_p1", "pos": "0.p1", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6011/0_p0", "pos": "0.p0", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6016/0_1", "pos": "0.1", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6017/0_0", "pos": "0.0", "size": 1048576, "hash": "00000000000000000000000000000000" }] self.content_factory.container_client.content_prepare = Mock( return_value=(meta, chunks)) c = self.content_factory.new("xxx_container_id", "titi", 1000, "RAIN") self.assertEqual(type(c), RainContent) self.assertEqual(c.content_id, "F4B1C8DD132705007DE8B43D0709DAA2") self.assertEqual(c.length, 1000) self.assertEqual(c.path, "titi") self.assertEqual(c.version, "1450341162332663") self.assertEqual(c.algo, "liber8tion") self.assertEqual(c.k, 6) self.assertEqual(c.m, 2) self.assertEqual(len(c.chunks), 4) self.assertEqual(c.chunks[0].raw(), chunks[3]) self.assertEqual(c.chunks[1].raw(), chunks[2]) self.assertEqual(c.chunks[2].raw(), chunks[1]) self.assertEqual(c.chunks[3].raw(), chunks[0]) def _new_content(self, stgpol, data, path="titi"): old_content = self.content_factory.new(self.container_id, path, len(data), stgpol) old_content.upload(StringIO.StringIO(data)) return self.content_factory.get(self.container_id, old_content.content_id) def _test_change_policy(self, data_size, old_policy, new_policy): if (old_policy == "RAIN" or new_policy == "RAIN") \ and len(self.conf['rawx']) < 8: self.skipTest("RAIN: Need more than 8 rawx to run") data = random_data(data_size) obj_type = { "SINGLE": DupContent, "TWOCOPIES": DupContent, "THREECOPIES": DupContent, "RAIN": RainContent } old_content = self._new_content(old_policy, data) self.assertEqual(type(old_content), obj_type[old_policy]) changed_content = self.content_factory.change_policy( old_content.container_id, old_content.content_id, new_policy) self.assertRaises(NotFound, self.container_client.content_show, self.account, cid=old_content.container_id, content=old_content.content_id) new_content = self.content_factory.get(self.container_id, changed_content.content_id) self.assertEqual(type(new_content), obj_type[new_policy]) downloaded_data = "".join(new_content.download()) self.assertEqual(downloaded_data, data) def test_change_content_0_byte_policy_single_to_rain(self): self._test_change_policy(0, "SINGLE", "RAIN") def test_change_content_0_byte_policy_rain_to_twocopies(self): self._test_change_policy(0, "RAIN", "TWOCOPIES") def test_change_content_1_byte_policy_single_to_rain(self): self._test_change_policy(1, "SINGLE", "RAIN") def test_change_content_chunksize_bytes_policy_twocopies_to_rain(self): self._test_change_policy(self.chunk_size, "TWOCOPIES", "RAIN") def test_change_content_2xchunksize_bytes_policy_threecopies_to_rain(self): self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "RAIN") def test_change_content_1_byte_policy_rain_to_threecopies(self): self._test_change_policy(1, "RAIN", "THREECOPIES") def test_change_content_chunksize_bytes_policy_rain_to_twocopies(self): self._test_change_policy(self.chunk_size, "RAIN", "TWOCOPIES") def test_change_content_2xchunksize_bytes_policy_rain_to_single(self): self._test_change_policy(self.chunk_size * 2, "RAIN", "SINGLE") def test_change_content_0_byte_policy_twocopies_to_threecopies(self): self._test_change_policy(0, "TWOCOPIES", "THREECOPIES") def test_change_content_chunksize_bytes_policy_single_to_twocopies(self): self._test_change_policy(self.chunk_size, "SINGLE", "TWOCOPIES") def test_change_content_2xchunksize_bytes_policy_3copies_to_single(self): self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "SINGLE") def test_change_content_with_same_policy(self): data = random_data(10) old_content = self._new_content("TWOCOPIES", data) changed_content = self.content_factory.change_policy( old_content.container_id, old_content.content_id, "TWOCOPIES") self.assertEqual(old_content.content_id, changed_content.content_id) def test_change_policy_unknown_content(self): self.assertRaises(ContentNotFound, self.content_factory.change_policy, self.container_id, "1234", "SINGLE") def test_change_policy_unknown_storage_policy(self): data = random_data(10) old_content = self._new_content("TWOCOPIES", data) self.assertRaises(ClientException, self.content_factory.change_policy, self.container_id, old_content.content_id, "UnKnOwN") def _test_move_chunk(self, policy): data = random_data(self.chunk_size) content = self._new_content(policy, data) chunk_id = content.chunks.filter(metapos=0)[0].id chunk_url = content.chunks.filter(metapos=0)[0].url chunk_meta, chunk_stream = self.blob_client.chunk_get(chunk_url) chunk_hash = md5_stream(chunk_stream) new_chunk = content.move_chunk(chunk_id) content_updated = self.content_factory.get(self.container_id, content.content_id) hosts = [] for c in content_updated.chunks.filter(metapos=0): self.assertThat(hosts, Not(Contains(c.host))) self.assertNotEquals(c.id, chunk_id) hosts.append(c.host) new_chunk_meta, new_chunk_stream = self.blob_client.chunk_get( new_chunk["url"]) new_chunk_hash = md5_stream(new_chunk_stream) self.assertEqual(new_chunk_hash, chunk_hash) del chunk_meta["chunk_id"] del new_chunk_meta["chunk_id"] self.assertEqual(new_chunk_meta, chunk_meta) def test_single_move_chunk(self): self._test_move_chunk("SINGLE") def test_twocopies_move_chunk(self): self._test_move_chunk("TWOCOPIES") def test_rain_move_chunk(self): if len(self.conf['rawx']) < 9: self.skipTest("Need more than 8 rawx") self._test_move_chunk("RAIN") def test_move_chunk_not_in_content(self): data = random_data(self.chunk_size) content = self._new_content("TWOCOPIES", data) with ExpectedException(OrphanChunk): content.move_chunk("1234") def test_strange_paths(self): for cname in ( "Annual report.txt", "foo+bar=foobar.txt", "100%_bug_free.c", "forward/slash/allowed", "I\\put\\backslashes\\and$dollar$signs$in$file$names", "Je suis tombé sur la tête, mais ça va bien.", "%s%f%u%d%%", "carriage\rreturn", "line\nfeed", "ta\tbu\tla\ttion", "controlchars", ): content = self._new_content("SINGLE", "nobody cares", cname) try: self.assertEqual(cname, content.path) finally: pass # TODO: delete the content
class TestContentFactory(BaseTestCase): def setUp(self): super(TestContentFactory, self).setUp() self.namespace = self.conf['namespace'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_name = "TestContentFactory%f" % time.time() self.blob_client = BlobClient() self.container_client = ContainerClient(self.gridconf) self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() def tearDown(self): super(TestContentFactory, self).tearDown() def test_extract_datasec(self): self.content_factory.ns_info = { "data_security": { "DUPONETWO": "DUP:distance=1|nb_copy=2", "RAIN": "RAIN:k=6|m=2|algo=liber8tion" }, "storage_policy": { "RAIN": "NONE:RAIN:NONE", "SINGLE": "NONE:NONE:NONE", "TWOCOPIES": "NONE:DUPONETWO:NONE" } } ds_type, ds_args = self.content_factory._extract_datasec("RAIN") self.assertEqual(ds_type, "RAIN") self.assertEqual(ds_args, { "k": "6", "m": "2", "algo": "liber8tion" }) ds_type, ds_args = self.content_factory._extract_datasec("SINGLE") self.assertEqual(ds_type, "DUP") self.assertEqual(ds_args, { "nb_copy": "1", "distance": "0" }) ds_type, ds_args = self.content_factory._extract_datasec("TWOCOPIES") self.assertEqual(ds_type, "DUP") self.assertEqual(ds_args, { "nb_copy": "2", "distance": "1" }) self.assertRaises(InconsistentContent, self.content_factory._extract_datasec, "UnKnOwN") def test_get_rain(self): meta = { "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash-method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime-type": "application/octet-stream", "name": "tox.ini", "policy": "RAIN", "version": "1450176946676289" } chunks = [ { "url": "http://127.0.0.1:6012/A0A0", "pos": "0.p0", "size": 512, "hash": "E7D4E4AD460971CA2E3141F2102308D4"}, { "url": "http://127.0.0.1:6010/A01", "pos": "0.1", "size": 146, "hash": "760AB5DA7C51A3654F1CA622687CD6C3"}, { "url": "http://127.0.0.1:6011/A00", "pos": "0.0", "size": 512, "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB"}, { "url": "http://127.0.0.1:6013/A0A1", "pos": "0.p1", "size": 512, "hash": "DA9D7F72AEEA5791565724424CE45C16"} ] self.content_factory.container_client.content_show = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id") self.assertEqual(type(c), RainContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual(c.version, "1450176946676289") self.assertEqual(c.algo, "liber8tion") self.assertEqual(c.k, 6) self.assertEqual(c.m, 2) self.assertEqual(len(c.chunks), 4) self.assertEqual(c.chunks[0].raw(), chunks[2]) self.assertEqual(c.chunks[1].raw(), chunks[1]) self.assertEqual(c.chunks[2].raw(), chunks[0]) self.assertEqual(c.chunks[3].raw(), chunks[3]) def test_get_dup(self): meta = { "chunk-method": "plain/bytes", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash-method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime-type": "application/octet-stream", "name": "tox.ini", "policy": "TWOCOPIES", "version": "1450176946676289" } chunks = [ { "url": "http://127.0.0.1:6010/A0", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73"}, { "url": "http://127.0.0.1:6011/A1", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73"} ] self.content_factory.container_client.content_show = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id") self.assertEqual(type(c), DupContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual(c.version, "1450176946676289") self.assertEqual(c.nb_copy, 2) self.assertEqual(c.distance, 1) self.assertEqual(len(c.chunks), 2) self.assertEqual(c.chunks[0].raw(), chunks[0]) self.assertEqual(c.chunks[1].raw(), chunks[1]) def test_get_unknown_content(self): self.assertRaises(ContentNotFound, self.content_factory.get, self.container_id, "1234") def test_new_rain(self): meta = { "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2", "ctime": "1450341162", "deleted": "False", "hash": "", "hash-method": "md5", "id": "F4B1C8DD132705007DE8B43D0709DAA2", "length": "1000", "mime-type": "application/octet-stream", "name": "titi", "policy": "RAIN", "version": "1450341162332663" } chunks = [ { "url": "http://127.0.0.1:6010/0_p1", "pos": "0.p1", "size": 1048576, "hash": "00000000000000000000000000000000"}, { "url": "http://127.0.0.1:6011/0_p0", "pos": "0.p0", "size": 1048576, "hash": "00000000000000000000000000000000"}, { "url": "http://127.0.0.1:6016/0_1", "pos": "0.1", "size": 1048576, "hash": "00000000000000000000000000000000"}, { "url": "http://127.0.0.1:6017/0_0", "pos": "0.0", "size": 1048576, "hash": "00000000000000000000000000000000"} ] self.content_factory.container_client.content_prepare = Mock( return_value=(meta, chunks)) c = self.content_factory.new("xxx_container_id", "titi", 1000, "RAIN") self.assertEqual(type(c), RainContent) self.assertEqual(c.content_id, "F4B1C8DD132705007DE8B43D0709DAA2") self.assertEqual(c.length, 1000) self.assertEqual(c.path, "titi") self.assertEqual(c.version, "1450341162332663") self.assertEqual(c.algo, "liber8tion") self.assertEqual(c.k, 6) self.assertEqual(c.m, 2) self.assertEqual(len(c.chunks), 4) self.assertEqual(c.chunks[0].raw(), chunks[3]) self.assertEqual(c.chunks[1].raw(), chunks[2]) self.assertEqual(c.chunks[2].raw(), chunks[1]) self.assertEqual(c.chunks[3].raw(), chunks[0]) def _new_content(self, stgpol, data, path="titi"): old_content = self.content_factory.new(self.container_id, path, len(data), stgpol) old_content.upload(StringIO.StringIO(data)) return self.content_factory.get(self.container_id, old_content.content_id) def _test_change_policy(self, data_size, old_policy, new_policy): if (old_policy == "RAIN" or new_policy == "RAIN") \ and len(self.conf['rawx']) < 8: self.skipTest("RAIN: Need more than 8 rawx to run") data = random_data(data_size) obj_type = { "SINGLE": DupContent, "TWOCOPIES": DupContent, "THREECOPIES": DupContent, "RAIN": RainContent } old_content = self._new_content(old_policy, data) self.assertEqual(type(old_content), obj_type[old_policy]) changed_content = self.content_factory.change_policy( old_content.container_id, old_content.content_id, new_policy) self.assertRaises(NotFound, self.container_client.content_show, self.account, cid=old_content.container_id, content=old_content.content_id) new_content = self.content_factory.get(self.container_id, changed_content.content_id) self.assertEqual(type(new_content), obj_type[new_policy]) downloaded_data = "".join(new_content.download()) self.assertEqual(downloaded_data, data) def test_change_content_0_byte_policy_single_to_rain(self): self._test_change_policy(0, "SINGLE", "RAIN") def test_change_content_0_byte_policy_rain_to_twocopies(self): self._test_change_policy(0, "RAIN", "TWOCOPIES") def test_change_content_1_byte_policy_single_to_rain(self): self._test_change_policy(1, "SINGLE", "RAIN") def test_change_content_chunksize_bytes_policy_twocopies_to_rain(self): self._test_change_policy(self.chunk_size, "TWOCOPIES", "RAIN") def test_change_content_2xchunksize_bytes_policy_threecopies_to_rain(self): self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "RAIN") def test_change_content_1_byte_policy_rain_to_threecopies(self): self._test_change_policy(1, "RAIN", "THREECOPIES") def test_change_content_chunksize_bytes_policy_rain_to_twocopies(self): self._test_change_policy(self.chunk_size, "RAIN", "TWOCOPIES") def test_change_content_2xchunksize_bytes_policy_rain_to_single(self): self._test_change_policy(self.chunk_size * 2, "RAIN", "SINGLE") def test_change_content_0_byte_policy_twocopies_to_threecopies(self): self._test_change_policy(0, "TWOCOPIES", "THREECOPIES") def test_change_content_chunksize_bytes_policy_single_to_twocopies(self): self._test_change_policy(self.chunk_size, "SINGLE", "TWOCOPIES") def test_change_content_2xchunksize_bytes_policy_3copies_to_single(self): self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "SINGLE") def test_change_content_with_same_policy(self): data = random_data(10) old_content = self._new_content("TWOCOPIES", data) changed_content = self.content_factory.change_policy( old_content.container_id, old_content.content_id, "TWOCOPIES") self.assertEqual(old_content.content_id, changed_content.content_id) def test_change_policy_unknown_content(self): self.assertRaises(ContentNotFound, self.content_factory.change_policy, self.container_id, "1234", "SINGLE") def test_change_policy_unknown_storage_policy(self): data = random_data(10) old_content = self._new_content("TWOCOPIES", data) self.assertRaises(ClientException, self.content_factory.change_policy, self.container_id, old_content.content_id, "UnKnOwN") def _test_move_chunk(self, policy): data = random_data(self.chunk_size) content = self._new_content(policy, data) chunk_id = content.chunks.filter(metapos=0)[0].id chunk_url = content.chunks.filter(metapos=0)[0].url chunk_meta, chunk_stream = self.blob_client.chunk_get(chunk_url) chunk_hash = md5_stream(chunk_stream) new_chunk = content.move_chunk(chunk_id) content_updated = self.content_factory.get(self.container_id, content.content_id) hosts = [] for c in content_updated.chunks.filter(metapos=0): self.assertThat(hosts, Not(Contains(c.host))) self.assertNotEquals(c.id, chunk_id) hosts.append(c.host) new_chunk_meta, new_chunk_stream = self.blob_client.chunk_get( new_chunk["url"]) new_chunk_hash = md5_stream(new_chunk_stream) self.assertEqual(new_chunk_hash, chunk_hash) del chunk_meta["chunk_id"] del new_chunk_meta["chunk_id"] self.assertEqual(new_chunk_meta, chunk_meta) def test_single_move_chunk(self): self._test_move_chunk("SINGLE") def test_twocopies_move_chunk(self): self._test_move_chunk("TWOCOPIES") def test_rain_move_chunk(self): if len(self.conf['rawx']) < 9: self.skipTest("Need more than 8 rawx") self._test_move_chunk("RAIN") def test_move_chunk_not_in_content(self): data = random_data(self.chunk_size) content = self._new_content("TWOCOPIES", data) with ExpectedException(OrphanChunk): content.move_chunk("1234") def test_strange_paths(self): strange_paths = [ "Annual report.txt", "foo+bar=foobar.txt", "100%_bug_free.c", "forward/slash/allowed", "I\\put\\backslashes\\and$dollar$signs$in$file$names", "Je suis tombé sur la tête, mais ça va bien.", "%s%f%u%d%%", "carriage\rreturn", "line\nfeed", "ta\tbu\tla\ttion", "controlchars", ] answers = dict() for cname in strange_paths: content = self._new_content("SINGLE", "nobody cares", cname) answers[cname] = content listing = self.container_client.container_list(self.account, self.container_name) obj_set = {k["name"].encode("utf8", "ignore") for k in listing["objects"]} try: # Ensure the saved path is the one we gave the object for cname in answers: self.assertEqual(cname, answers[cname].path) # Ensure all objects appear in listing for cname in strange_paths: self.assertIn(cname, obj_set) finally: # Cleanup for cname in answers: try: content.delete() except: pass
class ChunkOperator(object): """ Execute maintenance operations on chunks. """ def __init__(self, conf, logger=None): self.conf = conf self.logger = logger or get_logger(conf) self.rdir_client = RdirClient(conf, logger=self.logger) self.content_factory = ContentFactory(conf, logger=self.logger) def rebuild(self, container_id, content_id, chunk_id_or_pos, rawx_id=None, try_chunk_delete=False, allow_frozen_container=True, allow_same_rawx=True): """ Try to find the chunk in the metadata of the specified object, then rebuild it. """ try: content = self.content_factory.get(container_id, content_id) except ContentNotFound: raise OrphanChunk('Content not found: possible orphan chunk') chunk_pos = None if looks_like_chunk_position(chunk_id_or_pos): chunk_pos = chunk_id_or_pos chunk_id = None else: if '/' in chunk_id_or_pos: parsed = urlparse(chunk_id_or_pos) chunk_id = parsed.path.lstrip('/') rawx_id = parsed.netloc else: chunk_id = chunk_id_or_pos candidates = content.chunks.filter(id=chunk_id) # FIXME(FVE): if for some reason the chunks have been registered # with an IP address and port instead of an ID, this won't work. if rawx_id: candidates = candidates.filter(host=rawx_id) chunk = candidates.one() if chunk is None: raise OrphanChunk( 'Chunk not found in content: possible orphan chunk: ' + '%s' % (candidates.all(), )) elif rawx_id and chunk.host != rawx_id: raise ValueError('Chunk does not belong to this rawx') rebuilt_bytes = content.rebuild_chunk( chunk_id, service_id=rawx_id, allow_frozen_container=allow_frozen_container, allow_same_rawx=allow_same_rawx, chunk_pos=chunk_pos) if try_chunk_delete: try: content.blob_client.chunk_delete(chunk.url) self.logger.info("Old chunk %s deleted", chunk.url) except Exception as exc: self.logger.warn('Failed to delete old chunk %s: %s', chunk.url, exc) # This call does not raise exception if chunk is not referenced if chunk_id is not None: try: self.rdir_client.chunk_delete(chunk.host, container_id, content_id, chunk_id) except Exception as exc: self.logger.warn( 'Failed to delete chunk entry (%s) from the rdir (%s): %s', chunk_id, chunk.host, exc) return rebuilt_bytes
class TestContentFactory(BaseTestCase): def setUp(self): super(TestContentFactory, self).setUp() self.wait_for_score(('meta2', )) self.namespace = self.conf['namespace'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_name = "TestContentFactory%f" % time.time() self.blob_client = BlobClient(conf=self.conf) self.container_client = ContainerClient(self.gridconf) self.container_client.container_create(account=self.account, reference=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() self.stgpol = "SINGLE" self.stgpol_twocopies = "TWOCOPIES" self.stgpol_threecopies = "THREECOPIES" self.stgpol_ec = "EC" def tearDown(self): super(TestContentFactory, self).tearDown() def test_get_ec(self): meta = { "chunk_method": "ec/algo=liberasurecode_rs_vand,k=6,m=2", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash_method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime_type": "application/octet-stream", "name": "tox.ini", "policy": self.stgpol_ec, "version": "1450176946676289", "oio_version": "4.2", } chunks = [{ "url": "http://127.0.0.1:6012/A0A0", "pos": "0.0", "size": 512, "hash": "E7D4E4AD460971CA2E3141F2102308D4" }, { "url": "http://127.0.0.1:6010/A01", "pos": "0.1", "size": 146, "hash": "760AB5DA7C51A3654F1CA622687CD6C3" }, { "url": "http://127.0.0.1:6011/A00", "pos": "0.2", "size": 512, "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB" }, { "url": "http://127.0.0.1:6013/A0A1", "pos": "0.3", "size": 512, "hash": "DA9D7F72AEEA5791565724424CE45C16" }] self.content_factory.container_client.content_locate = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id", account=self.account, container_name=self.container_name) self.assertEqual(type(c), ECContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual( c.full_path, encode_fullpath(self.account, self.container_name, "tox.ini", meta['version'], meta['id'])) self.assertEqual(c.version, "1450176946676289") # TODO test storage method self.assertEqual(len(c.chunks), 4) self.assertEqual(c.chunks[0].raw(), chunks[0]) self.assertEqual(c.chunks[1].raw(), chunks[1]) self.assertEqual(c.chunks[2].raw(), chunks[2]) self.assertEqual(c.chunks[3].raw(), chunks[3]) def test_get_plain(self): meta = { "chunk_method": "plain/nb_copy=2", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash_method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime_type": "application/octet-stream", "name": "tox.ini", "policy": self.stgpol_twocopies, "version": "1450176946676289", "oio_version": "4.2", } chunks = [{ "url": "http://127.0.0.1:6010/A0", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73" }, { "url": "http://127.0.0.1:6011/A1", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73" }] self.content_factory.container_client.content_locate = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id", account=self.account, container_name=self.container_name) self.assertEqual(type(c), PlainContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual(c.version, "1450176946676289") self.assertEqual( c.full_path, encode_fullpath(self.account, self.container_name, "tox.ini", meta['version'], meta['id'])) # TODO test storage_method self.assertEqual(len(c.chunks), 2) self.assertEqual(c.chunks[0].raw(), chunks[0]) self.assertEqual(c.chunks[1].raw(), chunks[1]) def test_get_unknown_content(self): self.assertRaises(ContentNotFound, self.content_factory.get, self.container_id, "1234") def test_new_ec(self): meta = { "chunk_method": "ec/algo=liberasurecode_rs_vand,k=6,m=2", "ctime": "1450341162", "deleted": "False", "hash": "", "hash_method": "md5", "id": "F4B1C8DD132705007DE8B43D0709DAA2", "length": "1000", "mime_type": "application/octet-stream", "name": "titi", "policy": self.stgpol_ec, "version": "1450341162332663", "oio_version": "4.2", } chunks = [{ "url": "http://127.0.0.1:6010/0_p1", "pos": "0.3", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6011/0_p0", "pos": "0.2", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6016/0_1", "pos": "0.1", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6017/0_0", "pos": "0.0", "size": 1048576, "hash": "00000000000000000000000000000000" }] self.content_factory.container_client.content_prepare = Mock( return_value=(meta, chunks)) c = self.content_factory.new("xxx_container_id", "titi", 1000, self.stgpol_ec, account=self.account, container_name=self.container_name) self.assertEqual(type(c), ECContent) self.assertEqual(c.content_id, "F4B1C8DD132705007DE8B43D0709DAA2") self.assertEqual(c.length, 1000) self.assertEqual(c.path, "titi") self.assertEqual(c.version, "1450341162332663") # TODO test storage_method self.assertEqual(len(c.chunks), 4) self.assertEqual(c.chunks[0].raw(), chunks[3]) self.assertEqual(c.chunks[1].raw(), chunks[2]) self.assertEqual(c.chunks[2].raw(), chunks[1]) self.assertEqual(c.chunks[3].raw(), chunks[0]) def _new_content(self, stgpol, data, path="titi", account=None, container_name=None, mime_type=None, properties=None): old_content = self.content_factory.new(self.container_id, path, len(data), stgpol, account=account, container_name=container_name) if properties: old_content.properties = properties if mime_type: old_content.mime_type = mime_type old_content.create(BytesIO(data)) return self.content_factory.get(self.container_id, old_content.content_id) def _test_move_chunk(self, policy): data = random_data(self.chunk_size) content = self._new_content(policy, data) mc = content.chunks.filter(metapos=0) chunk_id = mc[0].id chunk_url = mc[0].url chunk_host = mc[0].host chunk_meta, chunk_stream = self.blob_client.chunk_get(chunk_url) chunk_hash = md5_stream(chunk_stream) new_chunk = content.move_chunk(chunk_id, service_id=chunk_host) content_updated = self.content_factory.get(self.container_id, content.content_id) hosts = [] for c in content_updated.chunks.filter(metapos=0): self.assertThat(hosts, Not(Contains(c.host))) self.assertNotEqual(c.url, chunk_url) hosts.append(c.host) new_chunk_meta, new_chunk_stream = self.blob_client.chunk_get( new_chunk["url"]) new_chunk_hash = md5_stream(new_chunk_stream) self.assertEqual(new_chunk_hash, chunk_hash) self.assertGreaterEqual(new_chunk_meta['chunk_mtime'], chunk_meta['chunk_mtime']) del chunk_meta["chunk_id"] del new_chunk_meta["chunk_id"] del chunk_meta["chunk_mtime"] del new_chunk_meta["chunk_mtime"] self.assertEqual(new_chunk_meta, chunk_meta) def test_single_move_chunk(self): self._test_move_chunk(self.stgpol) def test_twocopies_move_chunk(self): self._test_move_chunk(self.stgpol_twocopies) @ec def test_ec_move_chunk(self): self._test_move_chunk(self.stgpol_ec) def test_move_chunk_not_in_content(self): data = random_data(self.chunk_size) content = self._new_content(self.stgpol_twocopies, data) with ExpectedException(OrphanChunk): content.move_chunk("1234") def test_strange_paths(self): answers = dict() for cname in strange_paths: content = self._new_content(self.stgpol, b"nobody cares", cname) answers[cname] = content _, listing = self.container_client.content_list( self.account, self.container_name) if PY2: obj_set = {k["name"].encode('utf-8') for k in listing["objects"]} else: obj_set = {k["name"] for k in listing["objects"]} try: # Ensure the saved path is the one we gave the object for cname in answers: self.assertEqual(cname, answers[cname].path) fullpath = encode_fullpath(self.account, self.container_name, cname, answers[cname].version, answers[cname].content_id) self.assertEqual(answers[cname].full_path, fullpath) # Ensure all objects appear in listing for cname in strange_paths: self.assertIn(cname, obj_set) finally: # Cleanup for cname in answers: try: content.delete() except Exception: pass
class BlobRebuilderWorker(object): def __init__(self, conf, logger, volume): self.conf = conf self.logger = logger or get_logger(conf) self.volume = volume self.run_time = 0 self.passes = 0 self.errors = 0 self.last_reported = 0 self.chunks_run_time = 0 self.bytes_running_time = 0 self.bytes_processed = 0 self.total_bytes_processed = 0 self.total_chunks_processed = 0 self.dry_run = true_value(conf.get('dry_run', False)) self.report_interval = int_value(conf.get('report_interval'), 3600) self.max_chunks_per_second = int_value(conf.get('chunks_per_second'), 30) self.max_bytes_per_second = int_value(conf.get('bytes_per_second'), 10000000) self.rdir_fetch_limit = int_value(conf.get('rdir_fetch_limit'), 100) self.allow_same_rawx = true_value(conf.get('allow_same_rawx')) self.rdir_client = RdirClient(conf) self.content_factory = ContentFactory(conf) def rebuilder_pass_with_lock(self): self.rdir_client.admin_lock(self.volume, "rebuilder on %s" % gethostname()) try: self.rebuilder_pass() finally: self.rdir_client.admin_unlock(self.volume) def rebuilder_pass(self): start_time = report_time = time.time() total_errors = 0 rebuilder_time = 0 chunks = self.rdir_client.chunk_fetch(self.volume, limit=self.rdir_fetch_limit, rebuild=True) for container_id, content_id, chunk_id, data in chunks: loop_time = time.time() if self.dry_run: self.dryrun_chunk_rebuild(container_id, content_id, chunk_id) else: self.safe_chunk_rebuild(container_id, content_id, chunk_id) self.chunks_run_time = ratelimit(self.chunks_run_time, self.max_chunks_per_second) self.total_chunks_processed += 1 now = time.time() if now - self.last_reported >= self.report_interval: self.logger.info( 'RUN %(volume)s ' 'started=%(start_time)s ' 'passes=%(passes)d ' 'errors=%(errors)d ' 'chunks=%(nb_chunks)d %(c_rate).2f/s ' 'bytes=%(nb_bytes)d %(b_rate).2fB/s ' 'elapsed=%(total).2f ' '(rebuilder: %(rebuilder_rate).2f%%)' % { 'volume': self.volume, 'start_time': datetime.fromtimestamp(int(report_time)).isoformat(), 'passes': self.passes, 'errors': self.errors, 'nb_chunks': self.total_chunks_processed, 'nb_bytes': self.total_bytes_processed, 'c_rate': self.passes / (now - report_time), 'b_rate': self.bytes_processed / (now - report_time), 'total': (now - start_time), 'rebuilder_time': rebuilder_time, 'rebuilder_rate': 100.0 * rebuilder_time / float(now - start_time) }) report_time = now total_errors += self.errors self.passes = 0 self.bytes_processed = 0 self.last_reported = now rebuilder_time += (now - loop_time) end_time = time.time() elapsed = (end_time - start_time) or 0.000001 self.logger.info( 'DONE %(volume)s ' 'started=%(start_time)s ' 'ended=%(end_time)s ' 'elapsed=%(elapsed).02f ' 'errors=%(errors)d ' 'chunks=%(nb_chunks)d %(c_rate).2f/s ' 'bytes=%(nb_bytes)d %(b_rate).2fB/s ' 'elapsed=%(rebuilder_time).2f ' '(rebuilder: %(rebuilder_rate).2f%%)' % { 'volume': self.volume, 'start_time': datetime.fromtimestamp( int(start_time)).isoformat(), 'end_time': datetime.fromtimestamp(int(end_time)).isoformat(), 'elapsed': elapsed, 'errors': total_errors + self.errors, 'nb_chunks': self.total_chunks_processed, 'nb_bytes': self.total_bytes_processed, 'c_rate': self.total_chunks_processed / elapsed, 'b_rate': self.total_bytes_processed / elapsed, 'rebuilder_time': rebuilder_time, 'rebuilder_rate': 100.0 * rebuilder_time / float(elapsed) }) def dryrun_chunk_rebuild(self, container_id, content_id, chunk_id): self.logger.info( "[dryrun] Rebuilding " "container %s, content %s, chunk %s", container_id, content_id, chunk_id) self.passes += 1 def safe_chunk_rebuild(self, container_id, content_id, chunk_id): try: self.chunk_rebuild(container_id, content_id, chunk_id) except Exception as e: self.errors += 1 self.logger.error('ERROR while rebuilding chunk %s|%s|%s) : %s', container_id, content_id, chunk_id, e) self.passes += 1 def chunk_rebuild(self, container_id, content_id, chunk_id): self.logger.info('Rebuilding (container %s, content %s, chunk %s)', container_id, content_id, chunk_id) try: content = self.content_factory.get(container_id, content_id) except ContentNotFound: raise exc.OrphanChunk('Content not found') chunk = content.chunks.filter(id=chunk_id).one() if chunk is None: raise OrphanChunk("Chunk not found in content") chunk_size = chunk.size content.rebuild_chunk(chunk_id, allow_same_rawx=self.allow_same_rawx) self.rdir_client.chunk_delete(self.volume, container_id, content_id, chunk_id) self.bytes_processed += chunk_size self.total_bytes_processed += chunk_size
class TestECContent(BaseTestCase): def setUp(self): super(TestECContent, self).setUp() if len(self.conf['services']['rawx']) < 12: self.skipTest("Not enough rawx. " "EC tests needs at least 12 rawx to run") self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient(self.conf) self.container_name = "TestECContent%f" % time.time() self.container_client.container_create(account=self.account, reference=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() self.content = "%s-%s" % (self.__class__.__name__, random_str(4)) self.stgpol = "EC" self.size = 1024 * 1024 + 320 self.k = 6 self.m = 3 def tearDown(self): super(TestECContent, self).tearDown() def random_chunks(self, nb): pos = random.sample(xrange(self.k + self.m), nb) return ["0.%s" % i for i in pos] def _test_create(self, data_size): # generate random test data data = random_data(data_size) # using factory create new EC content content = self.content_factory.new(self.container_id, self.content, len(data), self.stgpol) # verify the factory gave us an ECContent self.assertEqual(type(content), ECContent) # perform the content creation content.create(BytesIO(data)) meta, chunks = self.container_client.content_locate( cid=self.container_id, content=content.content_id) # verify metadata chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], self.stgpol) self.assertEqual(meta['name'], self.content) metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) \ if len(data) != 0 else 1 offset = 0 # verify each metachunk for metapos in range(metachunk_nb): chunks_at_pos = content.chunks.filter(metapos=metapos) if len(chunks_at_pos) < 1: break metachunk_size = chunks_at_pos[0].size metachunk_hash = md5_data(data[offset:offset + metachunk_size]) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(meta['metachunk_size'], str(chunk.size)) self.assertEqual(meta['metachunk_hash'], chunk.checksum) self.assertEqual(meta['content_path'], self.content) self.assertEqual(meta['container_id'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], chunk.pos) self.assertEqual(meta['chunk_hash'], md5_stream(stream)) full_path = encode_fullpath(self.account, self.container_name, self.content, meta['content_version'], meta['content_id']) self.assertEqual(meta['full_path'], full_path) self.assertEqual(meta['oio_version'], '4.2') self.assertEqual(metachunk_hash, chunk.checksum) offset += metachunk_size def test_create_0_byte(self): self._test_create(0) def test_create_1_byte(self): self._test_create(1) def test_create(self): self._test_create(DAT_LEGIT_SIZE) def test_create_6294503_bytes(self): self._test_create(6294503) def _test_rebuild(self, data_size, broken_pos_list): # generate test data data = os.urandom(data_size) # create initial content old_content = self.content_factory.new(self.container_id, self.content, len(data), self.stgpol) # verify factory work as intended self.assertEqual(type(old_content), ECContent) # perform initial content creation old_content.create(BytesIO(data)) uploaded_content = self.content_factory.get(self.container_id, old_content.content_id) # break the content old_info = {} for pos in broken_pos_list: old_info[pos] = {} c = uploaded_content.chunks.filter(pos=pos)[0] old_info[pos]["url"] = c.url old_info[pos]["id"] = c.id old_info[pos]["hash"] = c.checksum chunk_id_to_rebuild = c.id meta, stream = self.blob_client.chunk_get(c.url) old_info[pos]["dl_meta"] = meta old_info[pos]["dl_hash"] = md5_stream(stream) # delete the chunk self.blob_client.chunk_delete(c.url) # rebuild the broken chunks uploaded_content.rebuild_chunk(chunk_id_to_rebuild) rebuilt_content = self.content_factory.get(self.container_id, uploaded_content.content_id) # sanity check self.assertEqual(type(rebuilt_content), ECContent) # verify rebuild result for pos in broken_pos_list: c = rebuilt_content.chunks.filter(pos=pos)[0] rebuilt_meta, rebuilt_stream = self.blob_client.chunk_get(c.url) self.assertEqual(rebuilt_meta["chunk_id"], c.id) self.assertEqual(md5_stream(rebuilt_stream), old_info[pos]["dl_hash"]) self.assertEqual(c.checksum, old_info[pos]["hash"]) self.assertNotEqual(c.url, old_info[pos]["url"]) self.assertGreaterEqual(rebuilt_meta['chunk_mtime'], old_info[pos]['dl_meta']['chunk_mtime']) del old_info[pos]["dl_meta"]["chunk_mtime"] del rebuilt_meta["chunk_mtime"] del old_info[pos]["dl_meta"]["chunk_id"] del rebuilt_meta["chunk_id"] self.assertEqual(rebuilt_meta, old_info[pos]["dl_meta"]) def test_content_0_byte_rebuild(self): self._test_rebuild(0, self.random_chunks(1)) def test_content_0_byte_rebuild_advanced(self): self._test_rebuild(0, self.random_chunks(3)) def test_content_1_byte_rebuild(self): self._test_rebuild(1, self.random_chunks(1)) def test_content_1_byte_rebuild_advanced(self): self._test_rebuild(1, self.random_chunks(3)) def test_content_rebuild(self): self._test_rebuild(DAT_LEGIT_SIZE, self.random_chunks(1)) def test_content_rebuild_advanced(self): self._test_rebuild(DAT_LEGIT_SIZE, self.random_chunks(3)) def test_content_rebuild_unrecoverable(self): self.assertRaises(UnrecoverableContent, self._test_rebuild, DAT_LEGIT_SIZE, self.random_chunks(4)) def _new_content(self, data, broken_pos_list=[]): old_content = self.content_factory.new(self.container_id, self.content, len(data), self.stgpol) self.assertEqual(type(old_content), ECContent) old_content.create(BytesIO(data)) # break content for pos in broken_pos_list: c = old_content.chunks.filter(pos=pos)[0] self.blob_client.chunk_delete(c.url) # get the new structure of the uploaded content return self.content_factory.get(self.container_id, old_content.content_id) def test_orphan_chunk(self): content = self._new_content(random_data(10)) self.assertRaises(OrphanChunk, content.rebuild_chunk, "invalid") def _test_fetch(self, data_size, broken_pos_list=None): broken_pos_list = broken_pos_list or [] test_data = random_data(data_size) content = self._new_content(test_data, broken_pos_list) data = b''.join(content.fetch()) self.assertEqual(len(data), len(test_data)) self.assertEqual(md5_data(data), md5_data(test_data)) # verify that chunks are broken for pos in broken_pos_list: chunk = content.chunks.filter(pos=pos)[0] self.assertRaises(NotFound, self.blob_client.chunk_delete, chunk.url) def test_fetch_content_0_byte(self): self._test_fetch(0) def test_fetch_content_1_byte(self): self._test_fetch(1) def test_fetch_content(self): self._test_fetch(DAT_LEGIT_SIZE) def test_fetch_content_0_byte_broken(self): self._test_fetch(0, self.random_chunks(3)) def test_fetch_content_1_byte_broken(self): self._test_fetch(1, self.random_chunks(3)) def test_fetch_content_broken(self): self._test_fetch(DAT_LEGIT_SIZE, self.random_chunks(3)) def test_fetch_content_unrecoverable(self): broken_chunks = self.random_chunks(4) self.assertRaises(OioException, self._test_fetch, DAT_LEGIT_SIZE, broken_chunks)
class TestContentFactory(BaseTestCase): def setUp(self): super(TestContentFactory, self).setUp() self.namespace = self.conf['namespace'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_name = "TestContentFactory%f" % time.time() self.blob_client = BlobClient() self.container_client = ContainerClient(self.gridconf) self.container_client.container_create(account=self.account, reference=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() self.stgpol = "SINGLE" self.stgpol_twocopies = "TWOCOPIES" self.stgpol_threecopies = "THREECOPIES" self.stgpol_ec = "EC" def tearDown(self): super(TestContentFactory, self).tearDown() def test_get_ec(self): meta = { "chunk_method": "ec/algo=liberasurecode_rs_vand,k=6,m=2", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash_method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime_type": "application/octet-stream", "name": "tox.ini", "policy": self.stgpol_ec, "version": "1450176946676289" } chunks = [{ "url": "http://127.0.0.1:6012/A0A0", "pos": "0.0", "size": 512, "hash": "E7D4E4AD460971CA2E3141F2102308D4" }, { "url": "http://127.0.0.1:6010/A01", "pos": "0.1", "size": 146, "hash": "760AB5DA7C51A3654F1CA622687CD6C3" }, { "url": "http://127.0.0.1:6011/A00", "pos": "0.2", "size": 512, "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB" }, { "url": "http://127.0.0.1:6013/A0A1", "pos": "0.3", "size": 512, "hash": "DA9D7F72AEEA5791565724424CE45C16" }] self.content_factory.container_client.content_locate = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id") self.assertEqual(type(c), ECContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual(c.version, "1450176946676289") # TODO test storage method self.assertEqual(len(c.chunks), 4) self.assertEqual(c.chunks[0].raw(), chunks[0]) self.assertEqual(c.chunks[1].raw(), chunks[1]) self.assertEqual(c.chunks[2].raw(), chunks[2]) self.assertEqual(c.chunks[3].raw(), chunks[3]) def test_get_plain(self): meta = { "chunk_method": "plain/nb_copy=2", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash_method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime_type": "application/octet-stream", "name": "tox.ini", "policy": self.stgpol_twocopies, "version": "1450176946676289" } chunks = [{ "url": "http://127.0.0.1:6010/A0", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73" }, { "url": "http://127.0.0.1:6011/A1", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73" }] self.content_factory.container_client.content_locate = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id") self.assertEqual(type(c), PlainContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual(c.version, "1450176946676289") # TODO test storage_method self.assertEqual(len(c.chunks), 2) self.assertEqual(c.chunks[0].raw(), chunks[0]) self.assertEqual(c.chunks[1].raw(), chunks[1]) def test_get_unknown_content(self): self.assertRaises(ContentNotFound, self.content_factory.get, self.container_id, "1234") def test_new_ec(self): meta = { "chunk_method": "ec/algo=liberasurecode_rs_vand,k=6,m=2", "ctime": "1450341162", "deleted": "False", "hash": "", "hash_method": "md5", "id": "F4B1C8DD132705007DE8B43D0709DAA2", "length": "1000", "mime_type": "application/octet-stream", "name": "titi", "policy": self.stgpol_ec, "version": "1450341162332663" } chunks = [{ "url": "http://127.0.0.1:6010/0_p1", "pos": "0.3", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6011/0_p0", "pos": "0.2", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6016/0_1", "pos": "0.1", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6017/0_0", "pos": "0.0", "size": 1048576, "hash": "00000000000000000000000000000000" }] self.content_factory.container_client.content_prepare = Mock( return_value=(meta, chunks)) c = self.content_factory.new("xxx_container_id", "titi", 1000, self.stgpol_ec) self.assertEqual(type(c), ECContent) self.assertEqual(c.content_id, "F4B1C8DD132705007DE8B43D0709DAA2") self.assertEqual(c.length, 1000) self.assertEqual(c.path, "titi") self.assertEqual(c.version, "1450341162332663") # TODO test storage_method self.assertEqual(len(c.chunks), 4) self.assertEqual(c.chunks[0].raw(), chunks[3]) self.assertEqual(c.chunks[1].raw(), chunks[2]) self.assertEqual(c.chunks[2].raw(), chunks[1]) self.assertEqual(c.chunks[3].raw(), chunks[0]) def _new_content(self, stgpol, data, path="titi"): old_content = self.content_factory.new(self.container_id, path, len(data), stgpol) old_content.create(BytesIO(data)) return self.content_factory.get(self.container_id, old_content.content_id) def _test_change_policy(self, data_size, old_policy, new_policy): data = random_data(data_size) obj_type = { self.stgpol: PlainContent, self.stgpol_twocopies: PlainContent, self.stgpol_threecopies: PlainContent, self.stgpol_ec: ECContent } old_content = self._new_content(old_policy, data) self.assertEqual(type(old_content), obj_type[old_policy]) changed_content = self.content_factory.change_policy( old_content.container_id, old_content.content_id, new_policy) self.assertRaises(NotFound, self.container_client.content_show, self.account, cid=old_content.container_id, content=old_content.content_id) new_content = self.content_factory.get(self.container_id, changed_content.content_id) self.assertEqual(type(new_content), obj_type[new_policy]) downloaded_data = "".join(new_content.fetch()) self.assertEqual(downloaded_data, data) @ec def test_change_content_0_byte_policy_single_to_ec(self): self._test_change_policy(0, self.stgpol, self.stgpol_ec) @ec def test_change_content_0_byte_policy_ec_to_twocopies(self): self._test_change_policy(0, self.stgpol_ec, self.stgpol_twocopies) @ec def test_change_content_1_byte_policy_single_to_ec(self): self._test_change_policy(1, self.stgpol, self.stgpol_ec) @ec def test_change_content_chunksize_bytes_policy_twocopies_to_ec(self): self._test_change_policy(self.chunk_size, self.stgpol_twocopies, self.stgpol_ec) @ec def test_change_content_2xchunksize_bytes_policy_threecopies_to_ec(self): self._test_change_policy(self.chunk_size * 2, self.stgpol_threecopies, self.stgpol_ec) @ec def test_change_content_1_byte_policy_ec_to_threecopies(self): self._test_change_policy(1, self.stgpol_ec, self.stgpol_threecopies) @ec def test_change_content_chunksize_bytes_policy_ec_to_twocopies(self): self._test_change_policy(self.chunk_size, self.stgpol_ec, self.stgpol_twocopies) @ec def test_change_content_2xchunksize_bytes_policy_ec_to_single(self): self._test_change_policy(self.chunk_size * 2, self.stgpol_ec, self.stgpol) def test_change_content_0_byte_policy_twocopies_to_threecopies(self): self._test_change_policy(0, self.stgpol_twocopies, self.stgpol_threecopies) def test_change_content_chunksize_bytes_policy_single_to_twocopies(self): self._test_change_policy(self.chunk_size, self.stgpol, self.stgpol_twocopies) def test_change_content_2xchunksize_bytes_policy_3copies_to_single(self): self._test_change_policy(self.chunk_size * 2, self.stgpol_threecopies, self.stgpol) def test_change_content_with_same_policy(self): data = random_data(10) old_content = self._new_content(self.stgpol_twocopies, data) changed_content = self.content_factory.change_policy( old_content.container_id, old_content.content_id, self.stgpol_twocopies) self.assertEqual(old_content.content_id, changed_content.content_id) def test_change_policy_unknown_content(self): self.assertRaises(ContentNotFound, self.content_factory.change_policy, self.container_id, "1234", self.stgpol) def test_change_policy_unknown_storage_policy(self): data = random_data(10) old_content = self._new_content(self.stgpol_twocopies, data) self.assertRaises(ClientException, self.content_factory.change_policy, self.container_id, old_content.content_id, "UnKnOwN") def _test_move_chunk(self, policy): data = random_data(self.chunk_size) content = self._new_content(policy, data) chunk_id = content.chunks.filter(metapos=0)[0].id chunk_url = content.chunks.filter(metapos=0)[0].url chunk_meta, chunk_stream = self.blob_client.chunk_get(chunk_url) chunk_hash = md5_stream(chunk_stream) new_chunk = content.move_chunk(chunk_id) content_updated = self.content_factory.get(self.container_id, content.content_id) hosts = [] for c in content_updated.chunks.filter(metapos=0): self.assertThat(hosts, Not(Contains(c.host))) self.assertNotEquals(c.id, chunk_id) hosts.append(c.host) new_chunk_meta, new_chunk_stream = self.blob_client.chunk_get( new_chunk["url"]) new_chunk_hash = md5_stream(new_chunk_stream) self.assertEqual(new_chunk_hash, chunk_hash) del chunk_meta["chunk_id"] del new_chunk_meta["chunk_id"] self.assertEqual(new_chunk_meta, chunk_meta) def test_single_move_chunk(self): self._test_move_chunk(self.stgpol) def test_twocopies_move_chunk(self): self._test_move_chunk(self.stgpol_twocopies) @ec def test_ec_move_chunk(self): self._test_move_chunk(self.stgpol_ec) def test_move_chunk_not_in_content(self): data = random_data(self.chunk_size) content = self._new_content(self.stgpol_twocopies, data) with ExpectedException(OrphanChunk): content.move_chunk("1234") def test_strange_paths(self): strange_paths = [ "Annual report.txt", "foo+bar=foobar.txt", "100%_bug_free.c", "forward/slash/allowed", "I\\put\\backslashes\\and$dollar$signs$in$file$names", "Je suis tombé sur la tête, mais ça va bien.", "%s%f%u%d%%", "carriage\rreturn", "line\nfeed", "ta\tbu\tla\ttion", "controlchars", ] answers = dict() for cname in strange_paths: content = self._new_content(self.stgpol, "nobody cares", cname) answers[cname] = content _, listing = self.container_client.content_list( self.account, self.container_name) obj_set = { k["name"].encode("utf8", "ignore") for k in listing["objects"] } try: # Ensure the saved path is the one we gave the object for cname in answers: self.assertEqual(cname, answers[cname].path) # Ensure all objects appear in listing for cname in strange_paths: self.assertIn(cname, obj_set) finally: # Cleanup for cname in answers: try: content.delete() except: pass
class TestPlainContent(BaseTestCase): def setUp(self): super(TestPlainContent, self).setUp() if len(self.conf['services']['rawx']) < 4: self.skipTest( "Plain tests needs more than 3 rawx to run") self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient(self.conf) self.container_name = "TestPlainContent-%f" % time.time() self.container_client.container_create(account=self.account, reference=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() self.content = random_str(64) self.stgpol = "SINGLE" self.stgpol_twocopies = "TWOCOPIES" self.stgpol_threecopies = "THREECOPIES" def _test_create(self, stgpol, data_size): data = random_data(data_size) content = self.content_factory.new(self.container_id, self.content, len(data), stgpol) content.create(BytesIO(data)) meta, chunks = self.container_client.content_locate( cid=self.container_id, content=content.content_id) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], stgpol) self.assertEqual(meta['name'], self.content) metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) if metachunk_nb == 0: metachunk_nb = 1 # special case for empty content chunks = ChunksHelper(chunks) # TODO NO NO NO if stgpol == self.stgpol_threecopies: nb_copy = 3 elif stgpol == self.stgpol_twocopies: nb_copy = 2 elif stgpol == self.stgpol: nb_copy = 1 self.assertEqual(len(chunks), metachunk_nb * nb_copy) for pos in range(metachunk_nb): chunks_at_pos = chunks.filter(pos=pos) self.assertEqual(len(chunks_at_pos), nb_copy) data_begin = pos * self.chunk_size data_end = pos * self.chunk_size + self.chunk_size chunk_hash = md5_data(data[data_begin:data_end]) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(md5_stream(stream), chunk_hash) self.assertEqual(meta['content_path'], self.content) self.assertEqual(meta['container_id'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], str(pos)) # Check that chunk data matches chunk hash from xattr self.assertEqual(meta['chunk_hash'], chunk_hash) # Check that chunk data matches chunk hash from database self.assertEqual(chunk.checksum, chunk_hash) full_path = encode_fullpath( self.account, self.container_name, self.content, meta['content_version'], meta['content_id']) self.assertEqual(meta['full_path'], full_path) self.assertEqual(meta['oio_version'], '4.2') def test_twocopies_create_0_byte(self): self._test_create(self.stgpol_twocopies, 0) def test_twocopies_create_1_byte(self): self._test_create(self.stgpol_twocopies, 1) def test_twocopies_create_chunksize_bytes(self): self._test_create(self.stgpol_twocopies, self.chunk_size) def test_twocopies_create_chunksize_plus_1_bytes(self): self._test_create(self.stgpol_twocopies, self.chunk_size + 1) def test_twocopies_create_6294503_bytes(self): self._test_create(self.stgpol_twocopies, 6294503) def test_single_create_0_byte(self): self._test_create(self.stgpol, 0) def test_single_create_chunksize_plus_1_bytes(self): self._test_create(self.stgpol, self.chunk_size + 1) def _new_content(self, stgpol, data, broken_pos_list=[]): old_content = self.content_factory.new( self.container_id, self.content, len(data), stgpol) old_content.create(BytesIO(data)) broken_chunks_info = {} for pos, idx in broken_pos_list: c = old_content.chunks.filter(pos=pos)[idx] meta, stream = self.blob_client.chunk_get(c.url) if pos not in broken_chunks_info: broken_chunks_info[pos] = {} broken_chunks_info[pos][idx] = { "url": c.url, "id": c.id, "hash": c.checksum, "dl_meta": meta, "dl_hash": md5_stream(stream) } self.blob_client.chunk_delete(c.url) # get the new structure of the uploaded content return (self.content_factory.get( self.container_id, old_content.content_id), broken_chunks_info) def _rebuild_and_check(self, content, broken_chunks_info, full_rebuild_pos, allow_frozen_container=False): rebuild_pos, rebuild_idx = full_rebuild_pos rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx] content.rebuild_chunk(rebuild_chunk_info["id"], allow_frozen_container=allow_frozen_container) # get the new structure of the content rebuilt_content = self.content_factory.get(self.container_id, content.content_id) # find the rebuilt chunk for c in rebuilt_content.chunks.filter(pos=rebuild_pos): if len(content.chunks.filter(id=c.id)) > 0: # not the rebuilt chunk # if this chunk is broken, it must not have been rebuilt for b_c_i in broken_chunks_info[rebuild_pos].values(): if c.id == b_c_i["id"]: with ExpectedException(NotFound): _, _ = self.blob_client.chunk_get(c.url) continue meta, stream = self.blob_client.chunk_get(c.url) self.assertEqual(meta["chunk_id"], c.id) self.assertEqual(md5_stream(stream), rebuild_chunk_info["dl_hash"]) self.assertEqual(c.checksum, rebuild_chunk_info["hash"]) self.assertThat(c.url, NotEquals(rebuild_chunk_info["url"])) del meta["chunk_id"] del rebuild_chunk_info["dl_meta"]["chunk_id"] self.assertEqual(meta, rebuild_chunk_info["dl_meta"]) def _test_rebuild(self, stgpol, data_size, broken_pos_list, full_rebuild_pos): data = random_data(data_size) content, broken_chunks_info = self._new_content( stgpol, data, broken_pos_list) self._rebuild_and_check(content, broken_chunks_info, full_rebuild_pos) def test_2copies_content_0_byte_1broken_rebuild_pos_0_idx_0(self): self._test_rebuild(self.stgpol_twocopies, 0, [(0, 0)], (0, 0)) def test_2copies_content_1_byte_1broken_rebuild_pos_0_idx_1(self): self._test_rebuild(self.stgpol_twocopies, 1, [(0, 1)], (0, 1)) def test_3copies_content_chunksize_bytes_2broken_rebuild_pos_0_idx_1(self): if len(self.conf['services']['rawx']) <= 3: self.skipTest("Need more than 3 rawx") self._test_rebuild(self.stgpol_threecopies, self.chunk_size, [(0, 0), (0, 1)], (0, 1)) def test_3copies_content_2xchksize_bytes_2broken_rebuild_pos_1_idx_2(self): self._test_rebuild(self.stgpol_threecopies, 2 * self.chunk_size, [(1, 0), (1, 2)], (1, 2)) def test_2copies_content_0_byte_2broken_rebuild_pos_0_idx_0(self): with ExpectedException(UnrecoverableContent): self._test_rebuild( self.stgpol_twocopies, 0, [(0, 0), (0, 1)], (0, 0)) def test_rebuild_chunk_in_frozen_container(self): data = random_data(self.chunk_size) content, broken_chunks_info = self._new_content( self.stgpol_twocopies, data, [(0, 0)]) system = dict() system['sys.status'] = str(OIO_DB_FROZEN) self.container_client.container_set_properties( self.account, self.container_name, None, system=system) try: full_rebuild_pos = (0, 0) rebuild_pos, rebuild_idx = full_rebuild_pos rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx] self.assertRaises(ServiceBusy, content.rebuild_chunk, rebuild_chunk_info["id"]) finally: system['sys.status'] = str(OIO_DB_ENABLED) self.container_client.container_set_properties( self.account, self.container_name, None, system=system) self._rebuild_and_check(content, broken_chunks_info, full_rebuild_pos, allow_frozen_container=True) def _test_fetch(self, stgpol, data_size, broken_pos_list): data = random_data(data_size) content, _ = self._new_content(stgpol, data, broken_pos_list) fetched_data = "".join(content.fetch()) self.assertEqual(fetched_data, data) for pos, idx in broken_pos_list: # check nothing has been rebuilt c = content.chunks.filter(pos=pos)[0] self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url) def test_twocopies_fetch_content_0_byte_without_broken_chunks(self): self._test_fetch(self.stgpol_twocopies, 0, []) def test_twocopies_fetch_content_0_byte_with_broken_0_0(self): self._test_fetch(self.stgpol_twocopies, 0, [(0, 0)]) def test_twocopies_fetch_content_1_byte_without_broken_chunks(self): self._test_fetch(self.stgpol_twocopies, 1, []) def test_twocopies_fetch_content_1_byte_with_broken_0_0(self): self._test_fetch(self.stgpol_twocopies, 1, [(0, 0)]) def test_twocopies_fetch_chunksize_bytes_without_broken_chunks(self): self._test_fetch(self.stgpol_twocopies, self.chunk_size, []) def test_twocopies_fetch_2xchuksize_bytes_with_broken_0_0_and_1_0(self): self._test_fetch( self.stgpol_twocopies, self.chunk_size * 2, [(0, 0), (1, 0)]) def test_twocopies_fetch_content_chunksize_bytes_2_broken_chunks(self): data = random_data(self.chunk_size) content, _ = self._new_content( self.stgpol_twocopies, data, [(0, 0), (0, 1)]) gen = content.fetch() self.assertRaises(UnrecoverableContent, gen.next) def test_single_fetch_content_1_byte_without_broken_chunks(self): self._test_fetch(self.stgpol, 1, []) def test_single_fetch_chunksize_bytes_plus_1_without_broken_chunk(self): self._test_fetch(self.stgpol, self.chunk_size * 2, [])
class TestECContent(BaseTestCase): def setUp(self): super(TestECContent, self).setUp() if len(self.conf['services']['rawx']) < 12: self.skipTest("Not enough rawx. " "EC tests needs at least 12 rawx to run") self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient() self.container_name = "TestECContent%f" % time.time() self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() self.content = random_str(64) self.stgpol = "EC" self.size = 1024*1024 + 320 self.k = 6 self.m = 3 def tearDown(self): super(TestECContent, self).tearDown() def random_chunks(self, nb): l = random.sample(xrange(self.k + self.m), nb) return ["0.%s" % i for i in l] def _test_create(self, data_size): # generate random test data data = random_data(data_size) # using factory create new EC content content = self.content_factory.new( self.container_id, self.content, len(data), self.stgpol) # verify the factory gave us an ECContent self.assertEqual(type(content), ECContent) # perform the content creation content.create(StringIO(data)) meta, chunks = self.container_client.content_show( cid=self.container_id, content=content.content_id) # verify metadata chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], self.stgpol) self.assertEqual(meta['name'], self.content) metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) \ if len(data) != 0 else 1 # verify each metachunk for metapos in range(metachunk_nb): chunks_at_pos = content.chunks.filter(metapos=metapos) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(meta['metachunk_size'], str(chunk.size)) self.assertEqual(meta['metachunk_hash'], chunk.checksum) self.assertEqual(meta['content_path'], self.content) self.assertEqual(meta['container_id'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], chunk.pos) self.assertEqual(meta['chunk_hash'], md5_stream(stream)) def test_create_0_byte(self): self._test_create(0) def test_create_1_byte(self): self._test_create(1) def test_create(self): self._test_create(DAT_LEGIT_SIZE) def _test_rebuild(self, data_size, broken_pos_list): # generate test data data = os.urandom(data_size) # create initial content old_content = self.content_factory.new( self.container_id, self.content, len(data), self.stgpol) # verify factory work as intended self.assertEqual(type(old_content), ECContent) # perform initial content creation old_content.create(StringIO(data)) uploaded_content = self.content_factory.get(self.container_id, old_content.content_id) # break the content old_info = {} for pos in broken_pos_list: old_info[pos] = {} c = uploaded_content.chunks.filter(pos=pos)[0] old_info[pos]["url"] = c.url old_info[pos]["id"] = c.id old_info[pos]["hash"] = c.checksum chunk_id_to_rebuild = c.id meta, stream = self.blob_client.chunk_get(c.url) old_info[pos]["dl_meta"] = meta old_info[pos]["dl_hash"] = md5_stream(stream) # delete the chunk self.blob_client.chunk_delete(c.url) # rebuild the broken chunks uploaded_content.rebuild_chunk(chunk_id_to_rebuild) rebuilt_content = self.content_factory.get(self.container_id, uploaded_content.content_id) # sanity check self.assertEqual(type(rebuilt_content), ECContent) # verify rebuild result for pos in broken_pos_list: c = rebuilt_content.chunks.filter(pos=pos)[0] rebuilt_meta, rebuilt_stream = self.blob_client.chunk_get(c.url) self.assertEqual(rebuilt_meta["chunk_id"], c.id) self.assertEqual(md5_stream(rebuilt_stream), old_info[pos]["dl_hash"]) self.assertEqual(c.checksum, old_info[pos]["hash"]) self.assertNotEqual(c.url, old_info[pos]["url"]) del old_info[pos]["dl_meta"]["chunk_id"] del rebuilt_meta["chunk_id"] self.assertEqual(rebuilt_meta, old_info[pos]["dl_meta"]) def test_content_0_byte_rebuild(self): self._test_rebuild(0, self.random_chunks(1)) def test_content_0_byte_rebuild_advanced(self): self._test_rebuild(0, self.random_chunks(3)) def test_content_1_byte_rebuild(self): self._test_rebuild(1, self.random_chunks(1)) def test_content_1_byte_rebuild_advanced(self): self._test_rebuild(1, self.random_chunks(3)) def test_content_rebuild(self): self._test_rebuild(DAT_LEGIT_SIZE, self.random_chunks(1)) def test_content_rebuild_advanced(self): self._test_rebuild(DAT_LEGIT_SIZE, self.random_chunks(3)) def test_content_rebuild_unrecoverable(self): self.assertRaises( UnrecoverableContent, self._test_rebuild, DAT_LEGIT_SIZE, self.random_chunks(4)) def _new_content(self, data, broken_pos_list=[]): old_content = self.content_factory.new( self.container_id, self.content, len(data), self.stgpol) self.assertEqual(type(old_content), ECContent) old_content.create(StringIO(data)) # break content for pos in broken_pos_list: c = old_content.chunks.filter(pos=pos)[0] self.blob_client.chunk_delete(c.url) # get the new structure of the uploaded content return self.content_factory.get(self.container_id, old_content.content_id) def test_orphan_chunk(self): content = self._new_content(random_data(10)) self.assertRaises(OrphanChunk, content.rebuild_chunk, "invalid") def _test_fetch(self, data_size, broken_pos_list=None): broken_pos_list = broken_pos_list or [] test_data = random_data(data_size) content = self._new_content(test_data, broken_pos_list) data = "".join(content.fetch()) self.assertEqual(len(data), len(test_data)) self.assertEqual(md5_data(data), md5_data(test_data)) # verify that chunks are broken for pos in broken_pos_list: chunk = content.chunks.filter(pos=pos)[0] self.assertRaises( NotFound, self.blob_client.chunk_delete, chunk.url) def test_fetch_content_0_byte(self): self._test_fetch(0) def test_fetch_content_1_byte(self): self._test_fetch(1) def test_fetch_content(self): self._test_fetch(DAT_LEGIT_SIZE) def test_fetch_content_0_byte_broken(self): self._test_fetch(0, self.random_chunks(3)) def test_fetch_content_1_byte_broken(self): self._test_fetch(1, self.random_chunks(3)) def test_fetch_content_broken(self): self._test_fetch(DAT_LEGIT_SIZE, self.random_chunks(3)) def test_fetch_content_unrecoverable(self): broken_chunks = self.random_chunks(4) self.assertRaises( OioException, self._test_fetch, DAT_LEGIT_SIZE, broken_chunks)