class TestAccountClient(BaseTestCase): def setUp(self): super(TestAccountClient, self).setUp() self.account_id = "test_account_%f" % time.time() self.account_client = AccountClient(self.conf) self.container_client = ContainerClient(self.conf) self.account_client.account_create(self.account_id) self.container_client.container_create(acct=self.account_id, ref="container1") self.container_client.container_create(acct=self.account_id, ref="container2") time.sleep(0.5) # ensure container event have been processed def test_containers_list(self): resp = self.account_client.containers_list(self.account_id) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [["container1", 0, 0, 0], ["container2", 0, 0, 0]]) resp = self.account_client.containers_list(self.account_id, limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [["container1", 0, 0, 0]]) resp = self.account_client.containers_list(self.account_id, marker="container1", limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [["container2", 0, 0, 0]]) resp = self.account_client.containers_list(self.account_id, marker="container2", limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [])
class TestFilters(BaseTestCase): def setUp(self): with mock.patch('oio.container.client.gen_headers', gen_headers_mock): super(TestFilters, self).setUp() self.account = self.conf['account'] self.namespace = self.conf['namespace'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {'namespace': self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_name = 'TestFilter%f' % time.time() self.blob_client = BlobClient() self.container_client = ContainerClient(self.gridconf) self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() self.stgpol = "SINGLE" def _new_content(self, data, path): old_content = self.content_factory.new(self.container_id, path, len(data), self.stgpol) old_content.create(StringIO.StringIO(data)) return self.content_factory.get(self.container_id, old_content.content_id) def test_slave_and_admin(self): if not os.getenv("SLAVE"): self.skipTest("must be in slave mode") data = random_data(10) path = 'test_slave' try: self._new_content(data, path) self.assertTrue(None) except ClientException as e: print str(e) self.assertTrue(str(e).find('NS slave!') != -1) with mock.patch('oio.container.client.gen_headers', gen_headers_mock): content = self._new_content(data, path) content.delete() def test_worm_and_admin(self): if not os.getenv("WORM"): self.skipTest("must be in worm mode") data = random_data(10) path = 'test_worm' content = self._new_content(data, path) try: content.delete() self.assertTrue(None) except ClientException as e: self.assertTrue(str(e).find('NS wormed!') != -1) downloaded_data = ''.join(content.fetch()) self.assertEqual(downloaded_data, data) with mock.patch('oio.container.client.gen_headers', gen_headers_mock): content.delete()
class TestAccountClient(BaseTestCase): def setUp(self): super(TestAccountClient, self).setUp() self.account_id = "test_account_%f" % time.time() self.account_client = AccountClient(self.conf) self.container_client = ContainerClient(self.conf) retry = 3 for i in range(retry+1): try: self.account_client.account_create(self.account_id) break except ClientException: if i < retry: time.sleep(2) else: raise self.container_client.container_create(acct=self.account_id, ref="container1") self.container_client.container_create(acct=self.account_id, ref="container2") time.sleep(.5) # ensure container event have been processed def test_containers_list(self): resp = self.account_client.containers_list(self.account_id) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [ ["container1", 0, 0, 0], ["container2", 0, 0, 0] ]) resp = self.account_client.containers_list(self.account_id, limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [ ["container1", 0, 0, 0] ]) resp = self.account_client.containers_list(self.account_id, marker="container1", limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [ ["container2", 0, 0, 0] ]) resp = self.account_client.containers_list(self.account_id, marker="container2", limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [])
def __init__(self, conf, logger, volume): self.conf = conf self.logger = logger or get_logger(conf) self.volume = volume self.run_time = 0 self.passes = 0 self.errors = 0 self.last_reported = 0 self.chunks_run_time = 0 self.bytes_running_time = 0 self.bytes_processed = 0 self.total_bytes_processed = 0 self.total_chunks_processed = 0 self.dry_run = true_value( conf.get('dry_run', False)) self.report_interval = int_value( conf.get('report_interval'), 3600) self.max_chunks_per_second = int_value( conf.get('chunks_per_second'), 30) self.max_bytes_per_second = int_value( conf.get('bytes_per_second'), 10000000) self.rdir_fetch_limit = int_value( conf.get('rdir_fetch_limit'), 100) self.blob_client = BlobClient() self.container_client = ContainerClient(conf) self.rdir_client = RdirClient(conf)
def __init__(self, conf, logger, volume): self.conf = conf self.logger = logger or get_logger(conf) self.volume = volume self.run_time = 0 self.passes = 0 self.errors = 0 self.last_reported = 0 self.last_usage_check = 0 self.chunks_run_time = 0 self.bytes_running_time = 0 self.bytes_processed = 0 self.total_bytes_processed = 0 self.total_chunks_processed = 0 self.usage_target = int_value( conf.get('usage_target'), 0) self.usage_check_interval = int_value( conf.get('usage_check_interval'), 3600) self.report_interval = int_value( conf.get('report_interval'), 3600) self.max_chunks_per_second = int_value( conf.get('chunks_per_second'), 30) self.max_bytes_per_second = int_value( conf.get('bytes_per_second'), 10000000) self.blob_client = BlobClient() self.container_client = ContainerClient(conf)
def setUp(self): super(TestBlobAuditorFunctional, self).setUp() self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.test_dir = self.conf['sds_path'] self.chars = string.ascii_lowercase + string.ascii_uppercase +\ string.digits self.chars_id = string.digits + 'ABCDEF' self.rawx = 'http://' + self.conf["rawx"][0]['addr'] self.h = hashlib.new('md5') conf = {"namespace": self.namespace} self.auditor = BlobAuditorWorker(conf, get_logger(None), None) self.container_c = ContainerClient(conf) self.blob_c = BlobClient() self.ref = rand_generator(self.chars, 8) self.container_c.container_create(self.account, self.ref) self.url_rand = rand_generator(self.chars_id, 64) self.data = rand_generator(self.chars, 1280) self.h.update(self.data) self.hash_rand = self.h.hexdigest().lower() self.content = TestContent( rand_generator(self.chars, 6), len(self.data), self.url_rand, 1) self.content.id_container = cid_from_name( self.account, self.ref).upper() self.chunk = TestChunk(self.content.size, self.url_rand, 0, self.hash_rand) self.chunk_url = "%s/%s" % (self.rawx, self.chunk.id_chunk) self.chunk_proxy = {"hash": self.chunk.md5, "pos": "0", "size": self.chunk.size, "url": self.chunk_url} chunk_meta = {'content_size': self.content.size, 'content_chunksnb': self.content.nb_chunks, 'content_path': self.content.path, 'content_cid': self.content.id_container, 'content_mimetype': 'application/octet-stream', 'content_chunkmethod': 'bytes', 'content_policy': 'TESTPOLICY', 'content_id': '0000', 'content_version': 1, 'chunk_id': self.chunk.id_chunk, 'chunk_pos': self.chunk.pos} self.blob_c.chunk_put(self.chunk_url, chunk_meta, self.data) self.chunk_path = self.test_dir + '/data/NS-rawx-1/' +\ self.chunk.id_chunk[0:2] + "/" + self.chunk.id_chunk self.bad_container_id = '0'*64
def setUp(self): super(TestBlobAuditorFunctional, self).setUp() self.namespace = self.conf["namespace"] self.account = self.conf["account"] self.test_dir = self.conf["sds_path"] rawx_num, rawx_path, rawx_addr = self.get_service_url("rawx") self.rawx = "http://" + rawx_addr self.h = hashlib.new("md5") conf = {"namespace": self.namespace} self.auditor = BlobAuditorWorker(conf, get_logger(None), None) self.container_c = ContainerClient(conf) self.blob_c = BlobClient() self.ref = random_str(8) self.container_c.container_create(self.account, self.ref) self.url_rand = random_id(64) self.data = random_str(1280) self.h.update(self.data) self.hash_rand = self.h.hexdigest().lower() self.content = TestContent(random_str(6), len(self.data), self.url_rand, 1) self.content.id_container = cid_from_name(self.account, self.ref).upper() self.chunk = TestChunk(self.content.size, self.url_rand, 0, self.hash_rand) self.chunk_url = "%s/%s" % (self.rawx, self.chunk.id_chunk) self.chunk_proxy = {"hash": self.chunk.md5, "pos": "0", "size": self.chunk.size, "url": self.chunk_url} chunk_meta = { "content_path": self.content.path, "container_id": self.content.id_container, "chunk_method": "plain/nb_copy=3", "policy": "TESTPOLICY", "id": "0000", "version": 1, "chunk_id": self.chunk.id_chunk, "chunk_pos": self.chunk.pos, "chunk_hash": self.chunk.md5, } self.blob_c.chunk_put(self.chunk_url, chunk_meta, self.data) self.chunk_path = ( self.test_dir + "/data/" + self.namespace + "-rawx-1/" + self.chunk.id_chunk[0:3] + "/" + self.chunk.id_chunk ) self.bad_container_id = "0" * 64
class ContentFactory(object): DEFAULT_DATASEC = "plain", {"nb_copy": "1", "distance": "0"} def __init__(self, conf): self.conf = conf self.logger = get_logger(conf) self.container_client = ContainerClient(conf) def get(self, container_id, content_id): try: meta, chunks = self.container_client.content_show( cid=container_id, content=content_id) except NotFound: raise ContentNotFound("Content %s/%s not found" % (container_id, content_id)) chunk_method = meta['chunk_method'] storage_method = STORAGE_METHODS.load(chunk_method) cls = ECContent if storage_method.ec else PlainContent return cls(self.conf, container_id, meta, chunks, storage_method) def new(self, container_id, path, size, policy): meta, chunks = self.container_client.content_prepare( cid=container_id, path=path, size=size, stgpol=policy) chunk_method = meta['chunk_method'] storage_method = STORAGE_METHODS.load(chunk_method) cls = ECContent if storage_method.ec else PlainContent return cls(self.conf, container_id, meta, chunks, storage_method) def change_policy(self, container_id, content_id, new_policy): old_content = self.get(container_id, content_id) if old_content.stgpol == new_policy: return old_content new_content = self.new(container_id, old_content.path, old_content.length, new_policy) stream = old_content.fetch() new_content.create(GeneratorIO(stream)) # the old content is automatically deleted because the new content has # the same name (but not the same id) return new_content
def setUp(self): super(TestAccountClient, self).setUp() self.account_id = "test_account_%f" % time.time() self.account_client = AccountClient(self.conf) self.container_client = ContainerClient(self.conf) self.account_client.account_create(self.account_id) self.container_client.container_create(acct=self.account_id, ref="container1") self.container_client.container_create(acct=self.account_id, ref="container2") time.sleep(0.5) # ensure container event have been processed
def setUp(self): super(TestBlobAuditorFunctional, self).setUp() self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.test_dir = self.conf['sds_path'] rawx_num, rawx_path, rawx_addr = self.get_service_url('rawx') self.rawx = 'http://' + rawx_addr self.h = hashlib.new('md5') conf = {"namespace": self.namespace} self.auditor = BlobAuditorWorker(conf, get_logger(None), None) self.container_c = ContainerClient(conf) self.blob_c = BlobClient() self.ref = random_str(8) self.container_c.container_create(self.account, self.ref) self.url_rand = random_id(64) self.data = random_str(1280) self.h.update(self.data) self.hash_rand = self.h.hexdigest().lower() self.content = TestContent( random_str(6), len(self.data), self.url_rand, 1) self.content.id_container = cid_from_name( self.account, self.ref).upper() self.chunk = TestChunk(self.content.size, self.url_rand, 0, self.hash_rand) self.chunk_url = "%s/%s" % (self.rawx, self.chunk.id_chunk) self.chunk_proxy = {"hash": self.chunk.md5, "pos": "0", "size": self.chunk.size, "url": self.chunk_url} chunk_meta = {'content_path': self.content.path, 'container_id': self.content.id_container, 'content_chunkmethod': 'plain/nb_copy=3', 'content_policy': 'TESTPOLICY', 'content_id': '0000', 'content_version': 1, 'chunk_id': self.chunk.id_chunk, 'chunk_pos': self.chunk.pos} self.blob_c.chunk_put(self.chunk_url, chunk_meta, self.data) self.chunk_path = self.test_dir + '/data/' + self.namespace + \ '-rawx-1/' + self.chunk.id_chunk[0:3] + "/" + self.chunk.id_chunk self.bad_container_id = '0'*64
def setUp(self): super(TestContentFactory, self).setUp() self.namespace = self.conf['namespace'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_name = "TestContentFactory%f" % time.time() self.container_client = ContainerClient(self.gridconf) self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper()
def setUp(self): super(TestStorageTierer, self).setUp() self.namespace = self.conf['namespace'] self.test_account = "test_storage_tiering_%f" % time.time() self.gridconf = {"namespace": self.namespace, "container_fetch_limit": 2, "content_fetch_limit": 2, "account": self.test_account, "outdated_threshold": 0, "new_policy": "RAIN"} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self._populate()
def setUp(self): super(TestRebuilderCrawler, self).setUp() self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.gridconf = {"namespace": self.namespace} self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient() self.container_name = "TestRebuilderCrawler%d" % int(time.time()) self.container_client.container_create(acct=self.account, ref=self.container_name)
def setUp(self): with mock.patch('oio.container.client.gen_headers', gen_headers_mock): super(TestFilters, self).setUp() self.account = self.conf['account'] self.namespace = self.conf['namespace'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {'namespace': self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_name = 'TestFilter%f' % time.time() self.blob_client = BlobClient() self.container_client = ContainerClient(self.gridconf) self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() self.stgpol = "SINGLE"
def setUp(self): super(TestContentFactory, self).setUp() self.namespace = self.conf['namespace'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_name = "TestContentFactory%f" % time.time() self.blob_client = BlobClient() self.container_client = ContainerClient(self.gridconf) self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() self.stgpol = "SINGLE" self.stgpol_twocopies = "TWOCOPIES" self.stgpol_threecopies = "THREECOPIES" self.stgpol_ec = "EC"
def setUp(self): super(TestDupContent, self).setUp() if len(self.conf['rawx']) < 3: self.skipTest("Not enough rawx. " "Dup tests needs more than 2 rawx to run") self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient() self.container_name = "TestDupContent%f" % time.time() self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper()
def __init__(self, conf, container_id, metadata, chunks, stgpol_args): self.conf = conf self.container_id = container_id self.metadata = metadata self.chunks = ChunksHelper(chunks) self.stgpol_args = stgpol_args self.logger = get_logger(self.conf) self.cs_client = ConscienceClient(conf) self.container_client = ContainerClient(self.conf) self.blob_client = BlobClient() self.session = requests.Session() self.content_id = self.metadata["id"] self.stgpol_name = self.metadata["policy"] self.path = self.metadata["name"] self.length = int(self.metadata["length"]) self.version = self.metadata["version"] self.hash = self.metadata["hash"] self.mime_type = self.metadata["mime-type"] self.chunk_method = self.metadata["chunk-method"]
def __init__(self, conf, logger, volume): self.conf = conf self.logger = logger self.volume = volume self.run_time = 0 self.passes = 0 self.errors = 0 self.orphan_chunks = 0 self.faulty_chunks = 0 self.corrupted_chunks = 0 self.last_reported = 0 self.chunks_run_time = 0 self.bytes_running_time = 0 self.bytes_processed = 0 self.total_bytes_processed = 0 self.total_chunks_processed = 0 self.report_interval = int_value(conf.get("report_interval"), 3600) self.max_chunks_per_second = int_value(conf.get("chunks_per_second"), 30) self.max_bytes_per_second = int_value(conf.get("bytes_per_second"), 10000000) self.container_client = ContainerClient(conf)
def setUp(self): super(TestAccountClient, self).setUp() self.account_id = "test_account_%f" % time.time() self.account_client = AccountClient(self.conf) self.container_client = ContainerClient(self.conf) retry = 3 for i in range(retry+1): try: self.account_client.account_create(self.account_id) break except ClientException: if i < retry: time.sleep(2) else: raise self.container_client.container_create(acct=self.account_id, ref="container1") self.container_client.container_create(acct=self.account_id, ref="container2") time.sleep(.5) # ensure container event have been processed
def __init__(self, conf, logger): self.conf = conf self.logger = logger self.account = conf[CONF_ACCOUNT] self.container_client = ContainerClient(self.conf) self.account_client = AccountClient(self.conf) self.content_factory = ContentFactory(self.conf) self.passes = 0 self.errors = 0 self.last_reported = 0 self.contents_run_time = 0 self.total_contents_processed = 0 self.report_interval = int_value( conf.get('report_interval'), 3600) self.max_contents_per_second = int_value( conf.get('contents_per_second'), 30) self.container_fetch_limit = int_value( conf.get('container_fetch_limit'), 100) self.content_fetch_limit = int_value( conf.get('content_fetch_limit'), 100) self.outdated_threshold = int_value( conf.get(CONF_OUTDATED_THRESHOLD), 9999999999) self.new_policy = conf.get(CONF_NEW_POLICY)
def setUp(self): super(TestPlainContent, self).setUp() if len(self.conf['services']['rawx']) < 4: self.skipTest( "Plain tests needs more than 3 rawx to run") self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient() self.container_name = "TestPlainContent-%f" % time.time() self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() self.content = random_str(64) self.stgpol = "SINGLE" self.stgpol_twocopies = "TWOCOPIES" self.stgpol_threecopies = "THREECOPIES"
def setUp(self): super(TestECContent, self).setUp() if len(self.conf['services']['rawx']) < 12: self.skipTest("Not enough rawx. " "EC tests needs at least 12 rawx to run") self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient() self.container_name = "TestECContent%f" % time.time() self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() self.content = random_str(64) self.stgpol = "EC" self.size = 1024*1024 + 320 self.k = 6 self.m = 3
class TestDupContent(BaseTestCase): def setUp(self): super(TestDupContent, self).setUp() if len(self.conf['rawx']) < 3: self.skipTest("Not enough rawx. " "Dup tests needs more than 2 rawx to run") self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient() self.container_name = "TestDupContent%f" % time.time() self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() def tearDown(self): super(TestDupContent, self).tearDown() def _test_upload(self, stgpol, data_size): data = random_data(data_size) content = self.content_factory.new(self.container_id, "titi", len(data), stgpol) self.assertEqual(type(content), DupContent) content.upload(StringIO.StringIO(data)) meta, chunks = self.container_client.content_show( cid=self.container_id, content=content.content_id) chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], stgpol) self.assertEqual(meta['name'], "titi") metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) if metachunk_nb == 0: metachunk_nb = 1 # special case for empty content if stgpol == "THREECOPIES": nb_copy = 3 elif stgpol == "TWOCOPIES": nb_copy = 2 elif stgpol == "SINGLE": nb_copy = 1 self.assertEqual(len(chunks), metachunk_nb * nb_copy) for pos in range(metachunk_nb): chunks_at_pos = chunks.filter(pos=pos) self.assertEqual(len(chunks_at_pos), nb_copy) data_begin = pos * self.chunk_size data_end = pos * self.chunk_size + self.chunk_size chunk_hash = md5_data(data[data_begin:data_end]) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(md5_stream(stream), chunk_hash) self.assertEqual(meta['content_size'], str(len(data))) self.assertEqual(meta['content_path'], "titi") self.assertEqual(meta['content_cid'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], str(pos)) self.assertEqual(meta['chunk_hash'], chunk_hash) def test_twocopies_upload_0_byte(self): self._test_upload("TWOCOPIES", 0) def test_twocopies_upload_1_byte(self): self._test_upload("TWOCOPIES", 1) def test_twocopies_upload_chunksize_bytes(self): self._test_upload("TWOCOPIES", self.chunk_size) def test_twocopies_upload_chunksize_plus_1_bytes(self): self._test_upload("TWOCOPIES", self.chunk_size + 1) def test_single_upload_0_byte(self): self._test_upload("SINGLE", 0) def test_single_upload_chunksize_plus_1_bytes(self): self._test_upload("SINGLE", self.chunk_size + 1) def test_chunks_cleanup_when_upload_failed(self): data = random_data(2 * self.chunk_size) content = self.content_factory.new(self.container_id, "titi", len(data), "TWOCOPIES") self.assertEqual(type(content), DupContent) # set bad url for position 1 for chunk in content.chunks.filter(pos=1): chunk.url = "http://127.0.0.1:9/DEADBEEF" self.assertRaises(Exception, content.upload, StringIO.StringIO(data)) for chunk in content.chunks.exclude(pos=1): self.assertRaises(NotFound, self.blob_client.chunk_head, chunk.url) def _new_content(self, stgpol, data, broken_pos_list=[]): old_content = self.content_factory.new(self.container_id, "titi", len(data), stgpol) self.assertEqual(type(old_content), DupContent) old_content.upload(StringIO.StringIO(data)) broken_chunks_info = {} for pos, idx in broken_pos_list: c = old_content.chunks.filter(pos=pos)[idx] meta, stream = self.blob_client.chunk_get(c.url) if pos not in broken_chunks_info: broken_chunks_info[pos] = {} broken_chunks_info[pos][idx] = { "url": c.url, "id": c.id, "hash": c.hash, "dl_meta": meta, "dl_hash": md5_stream(stream) } self.blob_client.chunk_delete(c.url) # get the new structure of the uploaded content return (self.content_factory.get( self.container_id, old_content.content_id), broken_chunks_info) def _test_rebuild(self, stgpol, data_size, broken_pos_list, full_rebuild_pos): data = random_data(data_size) content, broken_chunks_info = self._new_content(stgpol, data, broken_pos_list) rebuild_pos, rebuild_idx = full_rebuild_pos rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx] content.rebuild_chunk(rebuild_chunk_info["id"]) # get the new structure of the content rebuilt_content = self.content_factory.get(self.container_id, content.content_id) self.assertEqual(type(rebuilt_content), DupContent) # find the rebuilt chunk for c in rebuilt_content.chunks.filter(pos=rebuild_pos): if len(content.chunks.filter(id=c.id)) > 0: # not the rebuilt chunk # if this chunk is broken, it must not have been rebuilt for b_c_i in broken_chunks_info[rebuild_pos].values(): if c.id == b_c_i["id"]: with ExpectedException(NotFound): _, _ = self.blob_client.chunk_get(c.url) continue meta, stream = self.blob_client.chunk_get(c.url) self.assertEqual(meta["chunk_id"], c.id) self.assertEqual(md5_stream(stream), rebuild_chunk_info["dl_hash"]) self.assertEqual(c.hash, rebuild_chunk_info["hash"]) self.assertThat(c.url, NotEquals(rebuild_chunk_info["url"])) del meta["chunk_id"] del rebuild_chunk_info["dl_meta"]["chunk_id"] self.assertEqual(meta, rebuild_chunk_info["dl_meta"]) def test_2copies_content_0_byte_1broken_rebuild_pos_0_idx_0(self): self._test_rebuild("TWOCOPIES", 0, [(0, 0)], (0, 0)) def test_2copies_content_1_byte_1broken_rebuild_pos_0_idx_1(self): self._test_rebuild("TWOCOPIES", 1, [(0, 1)], (0, 1)) def test_3copies_content_chunksize_bytes_2broken_rebuild_pos_0_idx_1(self): if len(self.conf['rawx']) <= 3: self.skipTest("Need more than 3 rawx") self._test_rebuild("THREECOPIES", self.chunk_size, [(0, 0), (0, 1)], (0, 1)) def test_3copies_content_2xchksize_bytes_2broken_rebuild_pos_1_idx_2(self): if len(self.conf['rawx']) <= 3: self.skipTest("Need more than 3 rawx") self._test_rebuild("THREECOPIES", 2 * self.chunk_size, [(1, 0), (1, 2)], (1, 2)) def test_2copies_content_0_byte_2broken_rebuild_pos_0_idx_0(self): with ExpectedException(UnrecoverableContent): self._test_rebuild("TWOCOPIES", 0, [(0, 0), (0, 1)], (0, 0)) def _test_download(self, stgpol, data_size, broken_pos_list): data = random_data(data_size) content, _ = self._new_content(stgpol, data, broken_pos_list) downloaded_data = "".join(content.download()) self.assertEqual(downloaded_data, data) for pos, idx in broken_pos_list: # check nothing has been rebuilt c = content.chunks.filter(pos=pos)[0] self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url) def test_twocopies_download_content_0_byte_without_broken_chunks(self): self._test_download("TWOCOPIES", 0, []) def test_twocopies_download_content_0_byte_with_broken_0_0(self): self._test_download("TWOCOPIES", 0, [(0, 0)]) def test_twocopies_download_content_1_byte_without_broken_chunks(self): self._test_download("TWOCOPIES", 1, []) def test_twocopies_download_content_1_byte_with_broken_0_0(self): self._test_download("TWOCOPIES", 1, [(0, 0)]) def test_twocopies_download_chunksize_bytes_without_broken_chunks(self): self._test_download("TWOCOPIES", self.chunk_size, []) def test_twocopies_download_2xchuksize_bytes_with_broken_0_0_and_1_0(self): self._test_download("TWOCOPIES", self.chunk_size * 2, [(0, 0), (1, 0)]) def test_twocopies_download_content_chunksize_bytes_2_broken_chunks(self): data = random_data(self.chunk_size) content, _ = self._new_content("TWOCOPIES", data, [(0, 0), (0, 1)]) gen = content.download() self.assertRaises(UnrecoverableContent, gen.next) def test_single_download_content_1_byte_without_broken_chunks(self): self._test_download("SINGLE", 1, []) def test_single_download_chunksize_bytes_plus_1_without_broken_chunk(self): self._test_download("SINGLE", self.chunk_size * 2, [])
class Content(object): def __init__(self, conf, container_id, metadata, chunks, stgpol_args): self.conf = conf self.container_id = container_id self.metadata = metadata self.chunks = ChunksHelper(chunks) self.stgpol_args = stgpol_args self.logger = get_logger(self.conf) self.cs_client = ConscienceClient(conf) self.container_client = ContainerClient(self.conf) self.blob_client = BlobClient() self.session = requests.Session() self.content_id = self.metadata["id"] self.stgpol_name = self.metadata["policy"] self.path = self.metadata["name"] self.length = int(self.metadata["length"]) self.version = self.metadata["version"] self.hash = self.metadata["hash"] self.mime_type = self.metadata["mime-type"] self.chunk_method = self.metadata["chunk-method"] def _meta2_get_spare_chunk(self, chunks_notin, chunks_broken): spare_data = { "notin": ChunksHelper(chunks_notin, False).raw(), "broken": ChunksHelper(chunks_broken, False).raw() } try: spare_resp = self.container_client.content_spare( cid=self.container_id, content=self.content_id, data=spare_data, stgpol=self.stgpol_name) except ClientException as e: raise exc.SpareChunkException("No spare chunk (%s)" % e.message) url_list = [] for c in spare_resp["chunks"]: url_list.append(c["id"]) return url_list def _meta2_update_spare_chunk(self, current_chunk, new_url): old = [{ 'type': 'chunk', 'id': current_chunk.url, 'hash': current_chunk.hash, 'size': current_chunk.size, 'pos': current_chunk.pos, 'content': self.content_id }] new = [{ 'type': 'chunk', 'id': new_url, 'hash': current_chunk.hash, 'size': current_chunk.size, 'pos': current_chunk.pos, 'content': self.content_id }] update_data = {'old': old, 'new': new} self.container_client.container_raw_update(cid=self.container_id, data=update_data) def _meta2_create_object(self): self.container_client.content_create(cid=self.container_id, path=self.path, content_id=self.content_id, stgpol=self.stgpol_name, size=self.length, checksum=self.hash, version=self.version, chunk_method=self.chunk_method, mime_type=self.mime_type, data=self.chunks.raw()) def rebuild_chunk(self, chunk_id): raise NotImplementedError() def upload(self, stream): try: self._upload(stream) except Exception as e: for chunk in self.chunks: try: self.blob_client.chunk_delete(chunk.url) except: pass raise e def _upload(self, stream): raise NotImplementedError() def download(self): raise NotImplementedError()
def init(self): self.container_client = ContainerClient(self.conf, logger=self.logger) self.endpoint = self.conf.get('endpoint') # TODO configure pool manager self.http = get_pool_manager()
class ObjectStorageApi(object): """ The Object Storage API. High level API that wraps `AccountClient`, `ContainerClient` and `DirectoryClient` classes. """ def __init__(self, namespace, **kwargs): """ Initialize the object storage API. :param namespace: name of the namespace to interract with :type namespace: `str` :keyword connection_timeout: connection timeout towards rawx services :type connection_timeout: `float` seconds :keyword read_timeout: timeout for rawx responses and data reads from the caller (when uploading) :type read_timeout: `float` seconds :keyword write_timeout: timeout for rawx write requests :type write_timeout: `float` seconds """ self.namespace = namespace self.connection_timeout = utils.float_value( kwargs.get("connection_timeout"), None) self.read_timeout = utils.float_value(kwargs.get("read_timeout"), None) self.write_timeout = utils.float_value(kwargs.get("write_timeout"), None) # FIXME: share session between all the clients self.directory = DirectoryClient({"namespace": self.namespace}, **kwargs) self.account = AccountClient({"namespace": self.namespace}, **kwargs) self.container = ContainerClient({"namespace": self.namespace}, **kwargs) def account_create(self, account, headers=None): """ Create an account. :param account: name of the account to create :type account: `str` :returns: `True` if the account has been created """ return self.account.account_create(account, headers=headers) @handle_account_not_found def account_delete(self, account, headers=None): """ Delete an account. :param account: name of the account to delete :type account: `str` """ self.account.account_delete(account, headers=headers) @handle_account_not_found def account_show(self, account, headers=None): """ Get information about an account. """ return self.account.account_show(account, headers=headers) def account_list(self, headers=None): """ List accounts """ return self.account.account_list(headers=headers) # FIXME: @handle_account_not_found def account_update(self, account, metadata, to_delete=None, headers=None): self.account.account_update(account, metadata, to_delete, headers=headers) @handle_account_not_found def account_set_properties(self, account, properties, headers=None): self.account_update(account, properties, headers=headers) @handle_account_not_found def account_del_properties(self, account, properties, headers=None): self.account_update(account, None, properties, headers=headers) def container_create(self, account, container, properties=None, headers=None, **kwargs): """ Create a container. :param account: account in which to create the container :type account: `str` :param container: name of the container :type container: `str` :param properties: properties to set on the container :type properties: `dict` :keyword headers: extra headers to send to the proxy :type headers: `dict` :returns: True if the container has been created, False if it already exists """ return self.container.container_create(account, container, properties=properties, headers=headers, autocreate=True, **kwargs) @handle_container_not_found def container_touch(self, account, container, headers=None, **kwargs): """ Trigger a notification about the container state. :param account: account from which to delete the container :type account: `str` :param container: name of the container :type container: `str` :keyword headers: extra headers to send to the proxy :type headers: `dict` """ if not headers: headers = dict() if 'X-oio-req-id' not in headers: headers['X-oio-req-id'] = utils.request_id() self.container.container_touch(account, container, headers=headers, **kwargs) def container_create_many(self, account, containers, properties=None, headers=None, **kwargs): """ Create Many containers :param account: account in which to create the containers :type account: `str` :param containers: names of the containers :type containers: `list` :param properties: properties to set on the containers :type properties: `dict` :keyword headers: extra headers to send to the proxy :type headers: `dict` """ return self.container.container_create_many(account, containers, properties=properties, headers=headers, autocreate=True, **kwargs) @handle_container_not_found def container_delete(self, account, container, headers=None, **kwargs): """ Delete a container. :param account: account from which to delete the container :type account: `str` :param container: name of the container :type container: `str` :keyword headers: extra headers to send to the proxy :type headers: `dict` """ self.container.container_delete(account, container, headers=headers, **kwargs) @handle_account_not_found def container_list(self, account, limit=None, marker=None, end_marker=None, prefix=None, delimiter=None, headers=None): """ Get the list of containers of an account. :param account: account from which to get the container list :type account: `str` :keyword limit: maximum number of results to return :type limit: `int` :keyword marker: name of the container from where to start the listing :type marker: `str` :keyword end_marker: :keyword prefix: :keyword delimiter: :keyword headers: extra headers to send to the proxy :type headers: `dict` """ resp = self.account.container_list(account, limit=limit, marker=marker, end_marker=end_marker, prefix=prefix, delimiter=delimiter, headers=headers) return resp["listing"] @handle_container_not_found def container_show(self, account, container, headers=None): """ Get information about a container (user properties). :param account: account in which the container is :type account: `str` :param container: name of the container :type container: `str` :keyword headers: extra headers to send to the proxy :type headers: `dict` :returns: a `dict` with "properties" containing a `dict` of user properties. """ return self.container.container_show(account, container, headers=headers) @handle_container_not_found def container_get_properties(self, account, container, properties=None, headers=None): """ Get information about a container (user and system properties). :param account: account in which the container is :type account: `str` :param container: name of the container :type container: `str` :param properties: *ignored* :keyword headers: extra headers to send to the proxy :type headers: `dict` :returns: a `dict` with "properties" and "system" entries, containing respectively a `dict` of user properties and a `dict` of system properties. """ return self.container.container_get_properties(account, container, properties=properties, headers=headers) @handle_container_not_found def container_set_properties(self, account, container, properties=None, clear=False, headers=None, **kwargs): """ Set properties on a container. :param account: name of the account :type account: `str` :param container: name of the container where to set properties :type container: `str` :param properties: a dictionary of properties :type properties: `dict` :param clear: :type clear: `bool` :param headers: extra headers to pass to the proxy :type headers: `dict` :keyword system: dictionary of system properties to set """ return self.container.container_set_properties(account, container, properties, clear=clear, headers=headers, **kwargs) @handle_container_not_found def container_del_properties(self, account, container, properties, headers=None, **kwargs): return self.container.container_del_properties(account, container, properties, headers=headers, **kwargs) def container_update(self, account, container, metadata, clear=False, headers=None): if not metadata: self.container_del_properties(account, container, [], headers=headers) else: self.container_set_properties(account, container, metadata, clear, headers=headers) @handle_container_not_found def object_create(self, account, container, file_or_path=None, data=None, etag=None, obj_name=None, mime_type=None, metadata=None, policy=None, headers=None, key_file=None, **_kwargs): """ Create an object in *container* of *account* with data taken from either *data* (`str` or `generator`) or *file_or_path* (path to a file or file-like object). The object will be named after *obj_name* if specified, or after the base name of *file_or_path*. :param account: name of the account where to create the object :type account: `str` :param container: name of the container where to create the object :type container: `str` :param file_or_path: file-like object or path to a file from which to read object data :type file_or_path: `str` or file-like object :param data: object data (if `file_or_path` is not set) :type data: `str` or `generator` :keyword etag: entity tag of the object :type etag: `str` :keyword obj_name: name of the object to create. If not set, will use the base name of `file_or_path`. :keyword mime_type: MIME type of the object :type mime_type: `str` :keyword properties: a dictionary of properties :type properties: `dict` :keyword policy: name of the storage policy :type policy: `str` :param headers: extra headers to pass to the proxy :type headers: `dict` :keyword key_file: """ if (data, file_or_path) == (None, None): raise exc.MissingData() src = data if data is not None else file_or_path if src is file_or_path: if isinstance(file_or_path, basestring): if not os.path.exists(file_or_path): raise exc.FileNotFound("File '%s' not found." % file_or_path) file_name = os.path.basename(file_or_path) else: try: file_name = os.path.basename(file_or_path.name) except AttributeError: file_name = None obj_name = obj_name or file_name elif isgenerator(src): file_or_path = utils.GeneratorIO(src) src = file_or_path if not obj_name: raise exc.MissingName("No name for the object has been specified") sysmeta = {'mime_type': mime_type, 'etag': etag} if not headers: headers = dict() if 'X-oio-req-id' not in headers: headers['X-oio-req-id'] = utils.request_id() if src is data: return self._object_create(account, container, obj_name, BytesIO(data), sysmeta, properties=metadata, policy=policy, headers=headers, key_file=key_file) elif hasattr(file_or_path, "read"): return self._object_create(account, container, obj_name, src, sysmeta, properties=metadata, policy=policy, headers=headers, key_file=key_file) else: with open(file_or_path, "rb") as f: return self._object_create(account, container, obj_name, f, sysmeta, properties=metadata, policy=policy, headers=headers, key_file=key_file) def object_touch(self, account, container, obj, headers=None, **kwargs): """ Trigger a notification about an object (as if it just had been created). :param account: name of the account where to create the object :type account: `str` :param container: name of the container where to create the object :type container: `str` :param obj: name of the object to touch :param headers: extra headers to pass to the proxy """ if not headers: headers = dict() if 'X-oio-req-id' not in headers: headers['X-oio-req-id'] = utils.request_id() self.container.content_touch(account, container, obj, headers=headers, **kwargs) @handle_object_not_found def object_delete(self, account, container, obj, headers=None, **kwargs): if not headers: headers = dict() if 'X-oio-req-id' not in headers: headers['X-oio-req-id'] = utils.request_id() return self.container.content_delete(account, container, obj, headers=headers, **kwargs) def object_delete_many(self, account, container, objs, headers=None, **kwargs): if not headers: headers = dict() if 'X-oio-req-id' not in headers: headers['X-oio-req-id'] = utils.request_id() return self.container.content_delete_many(account, container, objs, headers=headers, **kwargs) @handle_container_not_found def object_list(self, account, container, limit=None, marker=None, delimiter=None, prefix=None, end_marker=None, headers=None, properties=False, **kwargs): """ Lists objects inside a container. :returns: a dict which contains * 'objects': the list of objects * 'prefixes': common prefixes (only if delimiter and prefix are set) * 'properties': a dict of container properties * 'system': system metadata """ _, resp_body = self.container.content_list(account, container, limit=limit, marker=marker, end_marker=end_marker, prefix=prefix, delimiter=delimiter, properties=properties, headers=headers, **kwargs) for obj in resp_body['objects']: mtype = obj.get('mime-type') if mtype: obj['mime_type'] = mtype del obj['mime-type'] return resp_body # FIXME: @handle_object_not_found def object_locate(self, account, container, obj, headers=None): obj_meta, body = self.container.content_locate(account, container, obj) return obj_meta, body def object_analyze(self, *args, **kwargs): """ :deprecated: use `object_locate` """ return self.object_locate(*args, **kwargs) def object_fetch(self, account, container, obj, ranges=None, headers=None, key_file=None): if not headers: headers = dict() if 'X-oio-req-id' not in headers: headers['X-oio-req-id'] = utils.request_id() meta, raw_chunks = self.object_locate(account, container, obj, headers=headers) chunk_method = meta['chunk_method'] storage_method = STORAGE_METHODS.load(chunk_method) chunks = _sort_chunks(raw_chunks, storage_method.ec) meta['container_id'] = utils.name2cid(account, container).upper() meta['ns'] = self.namespace if storage_method.ec: stream = self._fetch_stream_ec(meta, chunks, ranges, storage_method, headers) elif storage_method.backblaze: stream = self._fetch_stream_backblaze(meta, chunks, ranges, storage_method, key_file) else: stream = self._fetch_stream(meta, chunks, ranges, storage_method, headers) return meta, stream @handle_object_not_found def object_get_properties(self, account, container, obj, headers=None): return self.container.content_get_properties(account, container, obj) def object_show(self, account, container, obj, headers=None): """ Get a description of the content along with its user properties. :param account: name of the account in which the object is stored :param container: name of the container in which the object is stored :param obj: name of the object to query :returns: a `dict` describing the object .. python:: {'hash': '6BF60C17CC15EEA108024903B481738F', 'ctime': '1481031763', 'deleted': 'False', 'properties': { u'projet': u'OpenIO-SDS'}, 'length': '43518', 'hash_method': 'md5', 'chunk_method': 'ec/algo=liberasurecode_rs_vand,k=6,m=3', 'version': '1481031762951972', 'policy': 'EC', 'id': '20BF2194FD420500CD4729AE0B5CBC07', 'mime_type': 'application/octet-stream', 'name': 'Makefile'} """ return self.container.content_show(account, container, obj, headers=headers) def object_update(self, account, container, obj, metadata, clear=False, headers=None): if clear: self.object_del_properties(account, container, obj, [], headers=headers) if metadata: self.object_set_properties(account, container, obj, metadata, headers=headers) @handle_object_not_found def object_set_properties(self, account, container, obj, properties, clear=False, headers=None, **kwargs): return self.container.content_set_properties( account, container, obj, properties={'properties': properties}, headers=headers, **kwargs) @handle_object_not_found def object_del_properties(self, account, container, obj, properties, headers=None, **kwargs): return self.container.content_del_properties(account, container, obj, properties=properties, headers=headers, **kwargs) # FIXME: remove and call self.container.content_prepare() directly def _content_prepare(self, account, container, obj_name, size, policy=None, headers=None): return self.container.content_prepare(account, container, obj_name, size, stgpol=policy, autocreate=True, headers=headers) def _content_preparer(self, account, container, obj_name, policy=None, headers=None): # TODO: optimize by asking more than one metachunk at a time obj_meta, first_body = self.container.content_prepare(account, container, obj_name, size=1, stgpol=policy, autocreate=True, headers=headers) storage_method = STORAGE_METHODS.load(obj_meta['chunk_method']) def _fix_mc_pos(chunks, mc_pos): for chunk in chunks: raw_pos = chunk["pos"].split(".") if storage_method.ec: chunk['num'] = int(raw_pos[1]) chunk["pos"] = "%d.%d" % (mc_pos, chunk['num']) else: chunk["pos"] = str(mc_pos) def _metachunk_preparer(): mc_pos = 0 _fix_mc_pos(first_body, mc_pos) yield first_body while True: mc_pos += 1 _, next_body = self._content_prepare(account, container, obj_name, 1, policy, headers) _fix_mc_pos(next_body, mc_pos) yield next_body return obj_meta, _metachunk_preparer def _object_create(self, account, container, obj_name, source, sysmeta, properties=None, policy=None, headers=None, key_file=None): obj_meta, chunk_prep = self._content_preparer(account, container, obj_name, policy=policy, headers=headers) obj_meta.update(sysmeta) obj_meta['content_path'] = obj_name obj_meta['container_id'] = utils.name2cid(account, container).upper() obj_meta['ns'] = self.namespace storage_method = STORAGE_METHODS.load(obj_meta['chunk_method']) if storage_method.ec: handler = ECWriteHandler( source, obj_meta, chunk_prep, storage_method, headers=headers, write_timeout=self.write_timeout, read_timeout=self.read_timeout, connection_timeout=self.connection_timeout) elif storage_method.backblaze: backblaze_info = self._b2_credentials(storage_method, key_file) handler = BackblazeWriteHandler(source, obj_meta, chunk_prep, storage_method, headers, backblaze_info) else: handler = ReplicatedWriteHandler( source, obj_meta, chunk_prep, storage_method, headers=headers, write_timeout=self.write_timeout, read_timeout=self.read_timeout, connection_timeout=self.connection_timeout) final_chunks, bytes_transferred, content_checksum = handler.stream() etag = obj_meta.get('etag') if etag and etag.lower() != content_checksum.lower(): raise exc.EtagMismatch("given etag %s != computed %s" % (etag, content_checksum)) obj_meta['etag'] = content_checksum data = {'chunks': final_chunks, 'properties': properties or {}} # FIXME: we may just pass **obj_meta self.container.content_create(account, container, obj_name, size=bytes_transferred, checksum=content_checksum, data=data, content_id=obj_meta['id'], stgpol=obj_meta['policy'], version=obj_meta['version'], mime_type=obj_meta['mime_type'], chunk_method=obj_meta['chunk_method'], headers=headers) return final_chunks, bytes_transferred, content_checksum def _fetch_stream(self, meta, chunks, ranges, storage_method, headers): total_bytes = 0 headers = headers or {} ranges = ranges or [(None, None)] meta_range_list = get_meta_ranges(ranges, chunks) for meta_range_dict in meta_range_list: for pos, meta_range in meta_range_dict.iteritems(): meta_start, meta_end = meta_range if meta_start is not None and meta_end is not None: headers['Range'] = http_header_from_ranges([meta_range]) reader = io.ChunkReader( iter(chunks[pos]), io.READ_CHUNK_SIZE, headers, connection_timeout=self.connection_timeout, response_timeout=self.read_timeout, read_timeout=self.read_timeout) try: it = reader.get_iter() except Exception as err: raise exc.OioException( "Error while downloading position %d: %s" % (pos, err)) for part in it: for d in part['iter']: total_bytes += len(d) yield d def _fetch_stream_ec(self, meta, chunks, ranges, storage_method, headers): ranges = ranges or [(None, None)] meta_range_list = get_meta_ranges(ranges, chunks) for meta_range_dict in meta_range_list: for pos, meta_range in meta_range_dict.iteritems(): meta_start, meta_end = meta_range handler = ECChunkDownloadHandler( storage_method, chunks[pos], meta_start, meta_end, headers, connection_timeout=self.connection_timeout, response_timeout=self.read_timeout, read_timeout=self.read_timeout) stream = handler.get_stream() for part_info in stream: for d in part_info['iter']: yield d stream.close() def _b2_credentials(self, storage_method, key_file): key_file = key_file or '/etc/oio/sds/b2-appkey.conf' try: return BackblazeUtils.get_credentials(storage_method, key_file) except BackblazeUtilsException as err: raise exc.ConfigurationException(str(err)) def _fetch_stream_backblaze(self, meta, chunks, ranges, storage_method, key_file): backblaze_info = self._b2_credentials(storage_method, key_file) total_bytes = 0 current_offset = 0 size = None offset = 0 for pos in range(len(chunks)): if ranges: offset = ranges[pos][0] size = ranges[pos][1] if size is None: size = int(meta["length"]) chunk_size = int(chunks[pos][0]["size"]) if total_bytes >= size: break if current_offset + chunk_size > offset: if current_offset < offset: _offset = offset - current_offset else: _offset = 0 if chunk_size + total_bytes > size: _size = size - total_bytes else: _size = chunk_size handler = BackblazeChunkDownloadHandler( meta, chunks[pos], _offset, _size, backblaze_info=backblaze_info) stream = handler.get_stream() if not stream: raise exc.OioException("Error while downloading") total_bytes += len(stream) yield stream current_offset += chunk_size
class BlobAuditorWorker(object): def __init__(self, conf, logger, volume): self.conf = conf self.logger = logger self.volume = volume self.run_time = 0 self.passes = 0 self.errors = 0 self.orphan_chunks = 0 self.faulty_chunks = 0 self.corrupted_chunks = 0 self.last_reported = 0 self.chunks_run_time = 0 self.bytes_running_time = 0 self.bytes_processed = 0 self.total_bytes_processed = 0 self.total_chunks_processed = 0 self.report_interval = int_value(conf.get('report_interval'), 3600) self.max_chunks_per_second = int_value(conf.get('chunks_per_second'), 30) self.max_bytes_per_second = int_value(conf.get('bytes_per_second'), 10000000) self.container_client = ContainerClient(conf, logger=self.logger) def audit_pass(self): self.namespace, self.address = check_volume(self.volume) start_time = report_time = time.time() total_errors = 0 total_corrupted = 0 total_orphans = 0 total_faulty = 0 audit_time = 0 paths = paths_gen(self.volume) for path in paths: loop_time = time.time() self.safe_chunk_audit(path) self.chunks_run_time = ratelimit(self.chunks_run_time, self.max_chunks_per_second) self.total_chunks_processed += 1 now = time.time() if now - self.last_reported >= self.report_interval: self.logger.info( '%(start_time)s ' '%(passes)d ' '%(corrupted)d ' '%(faulty)d ' '%(orphans)d ' '%(errors)d ' '%(c_rate).2f ' '%(b_rate).2f ' '%(total).2f ' '%(audit_time).2f' '%(audit_rate).2f' % { 'start_time': time.ctime(report_time), 'passes': self.passes, 'corrupted': self.corrupted_chunks, 'faulty': self.faulty_chunks, 'orphans': self.orphan_chunks, 'errors': self.errors, 'c_rate': self.passes / (now - report_time), 'b_rate': self.bytes_processed / (now - report_time), 'total': (now - start_time), 'audit_time': audit_time, 'audit_rate': audit_time / (now - start_time) }) report_time = now total_corrupted += self.corrupted_chunks total_orphans += self.orphan_chunks total_faulty += self.faulty_chunks total_errors += self.errors self.passes = 0 self.corrupted_chunks = 0 self.orphan_chunks = 0 self.faulty_chunks = 0 self.errors = 0 self.bytes_processed = 0 self.last_reported = now audit_time += (now - loop_time) elapsed = (time.time() - start_time) or 0.000001 self.logger.info( '%(elapsed).02f ' '%(corrupted)d ' '%(faulty)d ' '%(orphans)d ' '%(errors)d ' '%(chunk_rate).2f ' '%(bytes_rate).2f ' '%(audit_time).2f ' '%(audit_rate).2f' % { 'elapsed': elapsed, 'corrupted': total_corrupted + self.corrupted_chunks, 'faulty': total_faulty + self.faulty_chunks, 'orphans': total_orphans + self.orphan_chunks, 'errors': total_errors + self.errors, 'chunk_rate': self.total_chunks_processed / elapsed, 'bytes_rate': self.total_bytes_processed / elapsed, 'audit_time': audit_time, 'audit_rate': audit_time / elapsed }) def safe_chunk_audit(self, path): try: self.chunk_audit(path) except exc.FaultyChunk as err: self.faulty_chunks += 1 self.logger.error('ERROR faulty chunk %s: %s', path, err) except exc.CorruptedChunk as err: self.corrupted_chunks += 1 self.logger.error('ERROR corrupted chunk %s: %s', path, err) except exc.OrphanChunk as err: self.orphan_chunks += 1 self.logger.error('ERROR orphan chunk %s: %s', path, err) except Exception: self.errors += 1 self.logger.exception('ERROR while auditing chunk %s', path) self.passes += 1 def chunk_audit(self, path): with open(path) as f: try: meta = read_chunk_metadata(f) except exc.MissingAttribute as e: raise exc.FaultyChunk('Missing extended attribute %s' % e) size = int(meta['chunk_size']) md5_checksum = meta['chunk_hash'].lower() reader = ChunkReader(f, size, md5_checksum) with closing(reader): for buf in reader: buf_len = len(buf) self.bytes_running_time = ratelimit( self.bytes_running_time, self.max_bytes_per_second, increment=buf_len) self.bytes_processed += buf_len self.total_bytes_processed += buf_len try: container_id = meta['container_id'] content_path = meta['content_path'] _obj_meta, data = self.container_client.content_locate( cid=container_id, path=content_path, properties=False) # Check chunk data chunk_data = None metachunks = set() for c in data: if c['url'].endswith(meta['chunk_id']): metachunks.add(c['pos'].split('.', 2)[0]) chunk_data = c if not chunk_data: raise exc.OrphanChunk('Not found in content') if chunk_data['size'] != int(meta['chunk_size']): raise exc.FaultyChunk('Invalid chunk size found') if chunk_data['hash'] != meta['chunk_hash']: raise exc.FaultyChunk('Invalid chunk hash found') if chunk_data['pos'] != meta['chunk_pos']: raise exc.FaultyChunk('Invalid chunk position found') except exc.NotFound: raise exc.OrphanChunk('Chunk not found in container')
class TestECContent(BaseTestCase): def setUp(self): super(TestECContent, self).setUp() if len(self.conf['services']['rawx']) < 12: self.skipTest("Not enough rawx. " "EC tests needs at least 12 rawx to run") self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient() self.container_name = "TestECContent%f" % time.time() self.container_client.container_create(account=self.account, reference=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() self.content = random_str(64) self.stgpol = "EC" self.size = 1024 * 1024 + 320 self.k = 6 self.m = 3 def tearDown(self): super(TestECContent, self).tearDown() def _generate_fullpath(self, account, container_name, path, version): return [ '{0}/{1}/{2}/{3}'.format(quote_plus(account), quote_plus(container_name), quote_plus(path), version) ] def random_chunks(self, nb): l = random.sample(xrange(self.k + self.m), nb) return ["0.%s" % i for i in l] def _test_create(self, data_size): # generate random test data data = random_data(data_size) # using factory create new EC content content = self.content_factory.new(self.container_id, self.content, len(data), self.stgpol) # verify the factory gave us an ECContent self.assertEqual(type(content), ECContent) # perform the content creation content.create(BytesIO(data)) meta, chunks = self.container_client.content_locate( cid=self.container_id, content=content.content_id) # verify metadata chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], self.stgpol) self.assertEqual(meta['name'], self.content) metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) \ if len(data) != 0 else 1 offset = 0 # verify each metachunk for metapos in range(metachunk_nb): chunks_at_pos = content.chunks.filter(metapos=metapos) if len(chunks_at_pos) < 1: break metachunk_size = chunks_at_pos[0].size metachunk_hash = md5_data(data[offset:offset + metachunk_size]) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(meta['metachunk_size'], str(chunk.size)) self.assertEqual(meta['metachunk_hash'], chunk.checksum) self.assertEqual(meta['content_path'], self.content) self.assertEqual(meta['container_id'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], chunk.pos) self.assertEqual(meta['chunk_hash'], md5_stream(stream)) full_path = self._generate_fullpath(self.account, self.container_name, self.content, meta['content_version']) self.assertEqual(meta['full_path'], full_path) self.assertEqual(meta['oio_version'], '4.0') self.assertEqual(metachunk_hash, chunk.checksum) offset += metachunk_size def test_create_0_byte(self): self._test_create(0) def test_create_1_byte(self): self._test_create(1) def test_create(self): self._test_create(DAT_LEGIT_SIZE) def test_create_6294503_bytes(self): self._test_create(6294503) def _test_rebuild(self, data_size, broken_pos_list): # generate test data data = os.urandom(data_size) # create initial content old_content = self.content_factory.new(self.container_id, self.content, len(data), self.stgpol) # verify factory work as intended self.assertEqual(type(old_content), ECContent) # perform initial content creation old_content.create(BytesIO(data)) uploaded_content = self.content_factory.get(self.container_id, old_content.content_id) # break the content old_info = {} for pos in broken_pos_list: old_info[pos] = {} c = uploaded_content.chunks.filter(pos=pos)[0] old_info[pos]["url"] = c.url old_info[pos]["id"] = c.id old_info[pos]["hash"] = c.checksum chunk_id_to_rebuild = c.id meta, stream = self.blob_client.chunk_get(c.url) old_info[pos]["dl_meta"] = meta old_info[pos]["dl_hash"] = md5_stream(stream) # delete the chunk self.blob_client.chunk_delete(c.url) # rebuild the broken chunks uploaded_content.rebuild_chunk(chunk_id_to_rebuild) rebuilt_content = self.content_factory.get(self.container_id, uploaded_content.content_id) # sanity check self.assertEqual(type(rebuilt_content), ECContent) # verify rebuild result for pos in broken_pos_list: c = rebuilt_content.chunks.filter(pos=pos)[0] rebuilt_meta, rebuilt_stream = self.blob_client.chunk_get(c.url) self.assertEqual(rebuilt_meta["chunk_id"], c.id) self.assertEqual(md5_stream(rebuilt_stream), old_info[pos]["dl_hash"]) self.assertEqual(c.checksum, old_info[pos]["hash"]) self.assertNotEqual(c.url, old_info[pos]["url"]) del old_info[pos]["dl_meta"]["chunk_id"] del rebuilt_meta["chunk_id"] self.assertEqual(rebuilt_meta, old_info[pos]["dl_meta"]) def test_content_0_byte_rebuild(self): self._test_rebuild(0, self.random_chunks(1)) def test_content_0_byte_rebuild_advanced(self): self._test_rebuild(0, self.random_chunks(3)) def test_content_1_byte_rebuild(self): self._test_rebuild(1, self.random_chunks(1)) def test_content_1_byte_rebuild_advanced(self): self._test_rebuild(1, self.random_chunks(3)) def test_content_rebuild(self): self._test_rebuild(DAT_LEGIT_SIZE, self.random_chunks(1)) def test_content_rebuild_advanced(self): self._test_rebuild(DAT_LEGIT_SIZE, self.random_chunks(3)) def test_content_rebuild_unrecoverable(self): self.assertRaises(UnrecoverableContent, self._test_rebuild, DAT_LEGIT_SIZE, self.random_chunks(4)) def _new_content(self, data, broken_pos_list=[]): old_content = self.content_factory.new(self.container_id, self.content, len(data), self.stgpol) self.assertEqual(type(old_content), ECContent) old_content.create(BytesIO(data)) # break content for pos in broken_pos_list: c = old_content.chunks.filter(pos=pos)[0] self.blob_client.chunk_delete(c.url) # get the new structure of the uploaded content return self.content_factory.get(self.container_id, old_content.content_id) def test_orphan_chunk(self): content = self._new_content(random_data(10)) self.assertRaises(OrphanChunk, content.rebuild_chunk, "invalid") def _test_fetch(self, data_size, broken_pos_list=None): broken_pos_list = broken_pos_list or [] test_data = random_data(data_size) content = self._new_content(test_data, broken_pos_list) data = "".join(content.fetch()) self.assertEqual(len(data), len(test_data)) self.assertEqual(md5_data(data), md5_data(test_data)) # verify that chunks are broken for pos in broken_pos_list: chunk = content.chunks.filter(pos=pos)[0] self.assertRaises(NotFound, self.blob_client.chunk_delete, chunk.url) def test_fetch_content_0_byte(self): self._test_fetch(0) def test_fetch_content_1_byte(self): self._test_fetch(1) def test_fetch_content(self): self._test_fetch(DAT_LEGIT_SIZE) def test_fetch_content_0_byte_broken(self): self._test_fetch(0, self.random_chunks(3)) def test_fetch_content_1_byte_broken(self): self._test_fetch(1, self.random_chunks(3)) def test_fetch_content_broken(self): self._test_fetch(DAT_LEGIT_SIZE, self.random_chunks(3)) def test_fetch_content_unrecoverable(self): broken_chunks = self.random_chunks(4) self.assertRaises(OioException, self._test_fetch, DAT_LEGIT_SIZE, broken_chunks)
def __init__(self, conf, **kwargs): self.conf = conf self.logger = get_logger(conf) self.container_client = ContainerClient(conf, logger=self.logger, **kwargs)
class ContentFactory(object): DEFAULT_DATASEC = "plain", {"nb_copy": "1", "distance": "0"} def __init__(self, conf, **kwargs): self.conf = conf self.logger = get_logger(conf) self.container_client = ContainerClient(conf, logger=self.logger, **kwargs) def get(self, container_id, content_id, account=None, container_name=None): try: meta, chunks = self.container_client.content_locate( cid=container_id, content=content_id) except NotFound: raise ContentNotFound("Content %s/%s not found" % (container_id, content_id)) chunk_method = meta['chunk_method'] storage_method = STORAGE_METHODS.load(chunk_method) if not account or not container_name: container_info = self.container_client.container_get_properties( cid=container_id)['system'] if not account: account = container_info['sys.account'] if not container_name: container_name = container_info['sys.user.name'] cls = ECContent if storage_method.ec else PlainContent return cls(self.conf, container_id, meta, chunks, storage_method, account, container_name, container_client=self.container_client) def new(self, container_id, path, size, policy, account=None, container_name=None): meta, chunks = self.container_client.content_prepare(cid=container_id, path=path, size=size, stgpol=policy) chunk_method = meta['chunk_method'] storage_method = STORAGE_METHODS.load(chunk_method) if not account or not container_name: container_info = self.container_client.container_get_properties( cid=container_id)['system'] if not account: account = container_info['sys.account'] if not container_name: container_name = container_info['sys.user.name'] cls = ECContent if storage_method.ec else PlainContent return cls(self.conf, container_id, meta, chunks, storage_method, account, container_name) def copy(self, origin, policy=None): if not policy: policy = origin.policy metadata = origin.metadata.copy() new_metadata, chunks = self.container_client.content_prepare( cid=origin.container_id, path=metadata['name'], size=metadata['length'], stgpol=policy) metadata['chunk_method'] = new_metadata['chunk_method'] metadata['chunk_size'] = new_metadata['chunk_size'] # We must use a new content_id since we change the data metadata['id'] = new_metadata['id'] # We may want to keep the same version, but it is denied by meta2 metadata['version'] = int(metadata['version']) + 1 metadata['policy'] = new_metadata['policy'] # FIXME: meta2 does not allow us to set ctime # and thus the object will appear as new. storage_method = STORAGE_METHODS.load(metadata['chunk_method']) cls = ECContent if storage_method.ec else PlainContent return cls(self.conf, origin.container_id, metadata, chunks, storage_method, origin.account, origin.container_name) def change_policy(self, container_id, content_id, new_policy): old_content = self.get(container_id, content_id) if old_content.policy == new_policy: return old_content new_content = self.copy(old_content, policy=new_policy) stream = old_content.fetch() new_content.create(GeneratorIO(stream)) # the old content is automatically deleted because the new content has # the same name (but not the same id) return new_content
class TestContentFactory(BaseTestCase): def setUp(self): super(TestContentFactory, self).setUp() self.namespace = self.conf['namespace'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_name = "TestContentFactory%f" % time.time() self.blob_client = BlobClient() self.container_client = ContainerClient(self.gridconf) self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() def tearDown(self): super(TestContentFactory, self).tearDown() def test_extract_datasec(self): self.content_factory.ns_info = { "data_security": { "DUPONETWO": "DUP:distance=1|nb_copy=2", "RAIN": "RAIN:k=6|m=2|algo=liber8tion" }, "storage_policy": { "RAIN": "NONE:RAIN:NONE", "SINGLE": "NONE:NONE:NONE", "TWOCOPIES": "NONE:DUPONETWO:NONE" } } ds_type, ds_args = self.content_factory._extract_datasec("RAIN") self.assertEqual(ds_type, "RAIN") self.assertEqual(ds_args, {"k": "6", "m": "2", "algo": "liber8tion"}) ds_type, ds_args = self.content_factory._extract_datasec("SINGLE") self.assertEqual(ds_type, "DUP") self.assertEqual(ds_args, {"nb_copy": "1", "distance": "0"}) ds_type, ds_args = self.content_factory._extract_datasec("TWOCOPIES") self.assertEqual(ds_type, "DUP") self.assertEqual(ds_args, {"nb_copy": "2", "distance": "1"}) self.assertRaises(InconsistentContent, self.content_factory._extract_datasec, "UnKnOwN") def test_get_rain(self): meta = { "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash-method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime-type": "application/octet-stream", "name": "tox.ini", "policy": "RAIN", "version": "1450176946676289" } chunks = [{ "url": "http://127.0.0.1:6012/A0A0", "pos": "0.p0", "size": 512, "hash": "E7D4E4AD460971CA2E3141F2102308D4" }, { "url": "http://127.0.0.1:6010/A01", "pos": "0.1", "size": 146, "hash": "760AB5DA7C51A3654F1CA622687CD6C3" }, { "url": "http://127.0.0.1:6011/A00", "pos": "0.0", "size": 512, "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB" }, { "url": "http://127.0.0.1:6013/A0A1", "pos": "0.p1", "size": 512, "hash": "DA9D7F72AEEA5791565724424CE45C16" }] self.content_factory.container_client.content_show = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id") self.assertEqual(type(c), RainContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual(c.version, "1450176946676289") self.assertEqual(c.algo, "liber8tion") self.assertEqual(c.k, 6) self.assertEqual(c.m, 2) self.assertEqual(len(c.chunks), 4) self.assertEqual(c.chunks[0].raw(), chunks[2]) self.assertEqual(c.chunks[1].raw(), chunks[1]) self.assertEqual(c.chunks[2].raw(), chunks[0]) self.assertEqual(c.chunks[3].raw(), chunks[3]) def test_get_dup(self): meta = { "chunk-method": "plain/bytes", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash-method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime-type": "application/octet-stream", "name": "tox.ini", "policy": "TWOCOPIES", "version": "1450176946676289" } chunks = [{ "url": "http://127.0.0.1:6010/A0", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73" }, { "url": "http://127.0.0.1:6011/A1", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73" }] self.content_factory.container_client.content_show = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id") self.assertEqual(type(c), DupContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual(c.version, "1450176946676289") self.assertEqual(c.nb_copy, 2) self.assertEqual(c.distance, 1) self.assertEqual(len(c.chunks), 2) self.assertEqual(c.chunks[0].raw(), chunks[0]) self.assertEqual(c.chunks[1].raw(), chunks[1]) def test_get_unknown_content(self): self.assertRaises(ContentNotFound, self.content_factory.get, self.container_id, "1234") def test_new_rain(self): meta = { "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2", "ctime": "1450341162", "deleted": "False", "hash": "", "hash-method": "md5", "id": "F4B1C8DD132705007DE8B43D0709DAA2", "length": "1000", "mime-type": "application/octet-stream", "name": "titi", "policy": "RAIN", "version": "1450341162332663" } chunks = [{ "url": "http://127.0.0.1:6010/0_p1", "pos": "0.p1", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6011/0_p0", "pos": "0.p0", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6016/0_1", "pos": "0.1", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6017/0_0", "pos": "0.0", "size": 1048576, "hash": "00000000000000000000000000000000" }] self.content_factory.container_client.content_prepare = Mock( return_value=(meta, chunks)) c = self.content_factory.new("xxx_container_id", "titi", 1000, "RAIN") self.assertEqual(type(c), RainContent) self.assertEqual(c.content_id, "F4B1C8DD132705007DE8B43D0709DAA2") self.assertEqual(c.length, 1000) self.assertEqual(c.path, "titi") self.assertEqual(c.version, "1450341162332663") self.assertEqual(c.algo, "liber8tion") self.assertEqual(c.k, 6) self.assertEqual(c.m, 2) self.assertEqual(len(c.chunks), 4) self.assertEqual(c.chunks[0].raw(), chunks[3]) self.assertEqual(c.chunks[1].raw(), chunks[2]) self.assertEqual(c.chunks[2].raw(), chunks[1]) self.assertEqual(c.chunks[3].raw(), chunks[0]) def _new_content(self, stgpol, data, path="titi"): old_content = self.content_factory.new(self.container_id, path, len(data), stgpol) old_content.upload(StringIO.StringIO(data)) return self.content_factory.get(self.container_id, old_content.content_id) def _test_change_policy(self, data_size, old_policy, new_policy): if (old_policy == "RAIN" or new_policy == "RAIN") \ and len(self.conf['rawx']) < 8: self.skipTest("RAIN: Need more than 8 rawx to run") data = random_data(data_size) obj_type = { "SINGLE": DupContent, "TWOCOPIES": DupContent, "THREECOPIES": DupContent, "RAIN": RainContent } old_content = self._new_content(old_policy, data) self.assertEqual(type(old_content), obj_type[old_policy]) changed_content = self.content_factory.change_policy( old_content.container_id, old_content.content_id, new_policy) self.assertRaises(NotFound, self.container_client.content_show, self.account, cid=old_content.container_id, content=old_content.content_id) new_content = self.content_factory.get(self.container_id, changed_content.content_id) self.assertEqual(type(new_content), obj_type[new_policy]) downloaded_data = "".join(new_content.download()) self.assertEqual(downloaded_data, data) def test_change_content_0_byte_policy_single_to_rain(self): self._test_change_policy(0, "SINGLE", "RAIN") def test_change_content_0_byte_policy_rain_to_twocopies(self): self._test_change_policy(0, "RAIN", "TWOCOPIES") def test_change_content_1_byte_policy_single_to_rain(self): self._test_change_policy(1, "SINGLE", "RAIN") def test_change_content_chunksize_bytes_policy_twocopies_to_rain(self): self._test_change_policy(self.chunk_size, "TWOCOPIES", "RAIN") def test_change_content_2xchunksize_bytes_policy_threecopies_to_rain(self): self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "RAIN") def test_change_content_1_byte_policy_rain_to_threecopies(self): self._test_change_policy(1, "RAIN", "THREECOPIES") def test_change_content_chunksize_bytes_policy_rain_to_twocopies(self): self._test_change_policy(self.chunk_size, "RAIN", "TWOCOPIES") def test_change_content_2xchunksize_bytes_policy_rain_to_single(self): self._test_change_policy(self.chunk_size * 2, "RAIN", "SINGLE") def test_change_content_0_byte_policy_twocopies_to_threecopies(self): self._test_change_policy(0, "TWOCOPIES", "THREECOPIES") def test_change_content_chunksize_bytes_policy_single_to_twocopies(self): self._test_change_policy(self.chunk_size, "SINGLE", "TWOCOPIES") def test_change_content_2xchunksize_bytes_policy_3copies_to_single(self): self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "SINGLE") def test_change_content_with_same_policy(self): data = random_data(10) old_content = self._new_content("TWOCOPIES", data) changed_content = self.content_factory.change_policy( old_content.container_id, old_content.content_id, "TWOCOPIES") self.assertEqual(old_content.content_id, changed_content.content_id) def test_change_policy_unknown_content(self): self.assertRaises(ContentNotFound, self.content_factory.change_policy, self.container_id, "1234", "SINGLE") def test_change_policy_unknown_storage_policy(self): data = random_data(10) old_content = self._new_content("TWOCOPIES", data) self.assertRaises(ClientException, self.content_factory.change_policy, self.container_id, old_content.content_id, "UnKnOwN") def _test_move_chunk(self, policy): data = random_data(self.chunk_size) content = self._new_content(policy, data) chunk_id = content.chunks.filter(metapos=0)[0].id chunk_url = content.chunks.filter(metapos=0)[0].url chunk_meta, chunk_stream = self.blob_client.chunk_get(chunk_url) chunk_hash = md5_stream(chunk_stream) new_chunk = content.move_chunk(chunk_id) content_updated = self.content_factory.get(self.container_id, content.content_id) hosts = [] for c in content_updated.chunks.filter(metapos=0): self.assertThat(hosts, Not(Contains(c.host))) self.assertNotEquals(c.id, chunk_id) hosts.append(c.host) new_chunk_meta, new_chunk_stream = self.blob_client.chunk_get( new_chunk["url"]) new_chunk_hash = md5_stream(new_chunk_stream) self.assertEqual(new_chunk_hash, chunk_hash) del chunk_meta["chunk_id"] del new_chunk_meta["chunk_id"] self.assertEqual(new_chunk_meta, chunk_meta) def test_single_move_chunk(self): self._test_move_chunk("SINGLE") def test_twocopies_move_chunk(self): self._test_move_chunk("TWOCOPIES") def test_rain_move_chunk(self): if len(self.conf['rawx']) < 9: self.skipTest("Need more than 8 rawx") self._test_move_chunk("RAIN") def test_move_chunk_not_in_content(self): data = random_data(self.chunk_size) content = self._new_content("TWOCOPIES", data) with ExpectedException(OrphanChunk): content.move_chunk("1234") def test_strange_paths(self): for cname in ( "Annual report.txt", "foo+bar=foobar.txt", "100%_bug_free.c", "forward/slash/allowed", "I\\put\\backslashes\\and$dollar$signs$in$file$names", "Je suis tombé sur la tête, mais ça va bien.", "%s%f%u%d%%", "carriage\rreturn", "line\nfeed", "ta\tbu\tla\ttion", "controlchars", ): content = self._new_content("SINGLE", "nobody cares", cname) try: self.assertEqual(cname, content.path) finally: pass # TODO: delete the content
class TestMeta2Indexing(BaseTestCase): def setUp(self): super(TestMeta2Indexing, self).setUp() self.rdir_client = RdirClient(self.conf) self.directory_client = DirectoryClient(self.conf) self.container_client = ContainerClient(self.conf) self.containers = [random_str(14) for _ in range(0, randint(1, 10))] self.containers_svcs = {} self.event_agent_name = 'event-agent-1' def tearDown(self): super(TestMeta2Indexing, self).tearDown() self._containers_cleanup() self._service(self.event_agent_name, 'start', wait=3) def _containers_cleanup(self): for container in self.containers: self.container_client.container_delete(self.account, container) for svc in self.containers_svcs[container]: self.rdir_client.meta2_index_delete( volume_id=svc['host'], container_path="{0}/{1}/{2}".format( self.ns, self.account, container), container_id=cid_from_name(self.account, container)) def _filter_by_managing_svc(self, all_containers, svc_of_interest): """ Filters through the containers returning only those that have svc_of_interest in their list of managing services. """ containers_list = [] for key in all_containers.keys(): if svc_of_interest in [x['host'] for x in all_containers[key]]: containers_list.append(key) return sorted(containers_list) def test_volume_indexing_worker(self): """ Test steps: - Generate a list of container names and create them - Collect their respective meta2 servers - For each meta2 server: - Run a meta2 indexing worker - List all rdir index records and match then with the services we're expecting. :return: """ self._service(self.event_agent_name, "stop", wait=3) for container in self.containers: self.container_client.container_create(account=self.account, reference=container) for container in self.containers: self.containers_svcs[container] = [ x for x in self.directory_client.list(account=self.account, reference=container)['srv'] if x['type'] == 'meta2' ] meta2_data_paths = {} for svc in self.conf['services']['meta2']: svc_host = svc.get('service_id', svc['addr']) meta2_data_paths[svc_host] = svc['path'] distinct_meta2_servers = set() for svc_list in self.containers_svcs.values(): for svc in svc_list: distinct_meta2_servers.add(svc['host']) for svc in distinct_meta2_servers: expected_containers = self._filter_by_managing_svc( self.containers_svcs, svc) worker = Meta2IndexingWorker(meta2_data_paths[svc], self.conf) worker.crawl_volume() indexed_containers = sorted([ x['container_url'].split('/')[-1] for x in self.rdir_client.meta2_index_fetch_all(volume_id=svc) ]) for cont in expected_containers: self.assertIn(cont, indexed_containers)
class TestContentFactory(BaseTestCase): def setUp(self): super(TestContentFactory, self).setUp() self.namespace = self.conf['namespace'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_name = "TestContentFactory%f" % time.time() self.blob_client = BlobClient() self.container_client = ContainerClient(self.gridconf) self.container_client.container_create(account=self.account, reference=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() self.stgpol = "SINGLE" self.stgpol_twocopies = "TWOCOPIES" self.stgpol_threecopies = "THREECOPIES" self.stgpol_ec = "EC" def tearDown(self): super(TestContentFactory, self).tearDown() def _generate_fullpath(self, account, container_name, path, version): return [ '{0}/{1}/{2}/{3}'.format(quote_plus(account), quote_plus(container_name), quote_plus(path), version) ] def test_get_ec(self): meta = { "chunk_method": "ec/algo=liberasurecode_rs_vand,k=6,m=2", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash_method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime_type": "application/octet-stream", "name": "tox.ini", "policy": self.stgpol_ec, "version": "1450176946676289", "oio_version": "4.0", } chunks = [{ "url": "http://127.0.0.1:6012/A0A0", "pos": "0.0", "size": 512, "hash": "E7D4E4AD460971CA2E3141F2102308D4" }, { "url": "http://127.0.0.1:6010/A01", "pos": "0.1", "size": 146, "hash": "760AB5DA7C51A3654F1CA622687CD6C3" }, { "url": "http://127.0.0.1:6011/A00", "pos": "0.2", "size": 512, "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB" }, { "url": "http://127.0.0.1:6013/A0A1", "pos": "0.3", "size": 512, "hash": "DA9D7F72AEEA5791565724424CE45C16" }] self.content_factory.container_client.content_locate = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id", account=self.account, container_name=self.container_name) self.assertEqual(type(c), ECContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual( c.full_path, self._generate_fullpath(self.account, self.container_name, "tox.ini", meta['version'])) self.assertEqual(c.version, "1450176946676289") # TODO test storage method self.assertEqual(len(c.chunks), 4) self.assertEqual(c.chunks[0].raw(), chunks[0]) self.assertEqual(c.chunks[1].raw(), chunks[1]) self.assertEqual(c.chunks[2].raw(), chunks[2]) self.assertEqual(c.chunks[3].raw(), chunks[3]) def test_get_plain(self): meta = { "chunk_method": "plain/nb_copy=2", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash_method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime_type": "application/octet-stream", "name": "tox.ini", "policy": self.stgpol_twocopies, "version": "1450176946676289", "oio_version": "4.0", } chunks = [{ "url": "http://127.0.0.1:6010/A0", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73" }, { "url": "http://127.0.0.1:6011/A1", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73" }] self.content_factory.container_client.content_locate = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id", account=self.account, container_name=self.container_name) self.assertEqual(type(c), PlainContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual(c.version, "1450176946676289") self.assertEqual( c.full_path, self._generate_fullpath(self.account, self.container_name, "tox.ini", meta['version'])) # TODO test storage_method self.assertEqual(len(c.chunks), 2) self.assertEqual(c.chunks[0].raw(), chunks[0]) self.assertEqual(c.chunks[1].raw(), chunks[1]) def test_get_unknown_content(self): self.assertRaises(ContentNotFound, self.content_factory.get, self.container_id, "1234") def test_new_ec(self): meta = { "chunk_method": "ec/algo=liberasurecode_rs_vand,k=6,m=2", "ctime": "1450341162", "deleted": "False", "hash": "", "hash_method": "md5", "id": "F4B1C8DD132705007DE8B43D0709DAA2", "length": "1000", "mime_type": "application/octet-stream", "name": "titi", "policy": self.stgpol_ec, "version": "1450341162332663", "oio_version": "4.0", } chunks = [{ "url": "http://127.0.0.1:6010/0_p1", "pos": "0.3", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6011/0_p0", "pos": "0.2", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6016/0_1", "pos": "0.1", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6017/0_0", "pos": "0.0", "size": 1048576, "hash": "00000000000000000000000000000000" }] self.content_factory.container_client.content_prepare = Mock( return_value=(meta, chunks)) c = self.content_factory.new("xxx_container_id", "titi", 1000, self.stgpol_ec, account=self.account, container_name=self.container_name) self.assertEqual(type(c), ECContent) self.assertEqual(c.content_id, "F4B1C8DD132705007DE8B43D0709DAA2") self.assertEqual(c.length, 1000) self.assertEqual(c.path, "titi") self.assertEqual(c.version, "1450341162332663") # TODO test storage_method self.assertEqual(len(c.chunks), 4) self.assertEqual(c.chunks[0].raw(), chunks[3]) self.assertEqual(c.chunks[1].raw(), chunks[2]) self.assertEqual(c.chunks[2].raw(), chunks[1]) self.assertEqual(c.chunks[3].raw(), chunks[0]) def _new_content(self, stgpol, data, path="titi", account=None, container_name=None, mime_type=None, properties=None): old_content = self.content_factory.new(self.container_id, path, len(data), stgpol, account=account, container_name=container_name) if properties: old_content.properties = properties if mime_type: old_content.mime_type = mime_type old_content.create(BytesIO(data)) return self.content_factory.get(self.container_id, old_content.content_id) def _test_change_policy(self, data_size, old_policy, new_policy): data = random_data(data_size) obj_type = { self.stgpol: PlainContent, self.stgpol_twocopies: PlainContent, self.stgpol_threecopies: PlainContent, self.stgpol_ec: ECContent } mime_type = "application/test" props = {'a': 'b'} old_content = self._new_content(old_policy, data, mime_type=mime_type, properties=props) self.assertEqual(type(old_content), obj_type[old_policy]) changed_content = self.content_factory.change_policy( old_content.container_id, old_content.content_id, new_policy) self.assertRaises(NotFound, self.container_client.content_show, self.account, cid=old_content.container_id, content=old_content.content_id) new_content = self.content_factory.get(self.container_id, changed_content.content_id) self.assertEqual(type(new_content), obj_type[new_policy]) downloaded_data = "".join(new_content.fetch()) self.assertEqual(downloaded_data, data) self.assertEqual(mime_type, new_content.mime_type) self.assertDictEqual(props, new_content.properties) @ec def test_change_content_0_byte_policy_single_to_ec(self): self._test_change_policy(0, self.stgpol, self.stgpol_ec) @ec def test_change_content_0_byte_policy_ec_to_twocopies(self): self._test_change_policy(0, self.stgpol_ec, self.stgpol_twocopies) @ec def test_change_content_1_byte_policy_single_to_ec(self): self._test_change_policy(1, self.stgpol, self.stgpol_ec) @ec def test_change_content_chunksize_bytes_policy_twocopies_to_ec(self): self._test_change_policy(self.chunk_size, self.stgpol_twocopies, self.stgpol_ec) @ec def test_change_content_2xchunksize_bytes_policy_threecopies_to_ec(self): self._test_change_policy(self.chunk_size * 2, self.stgpol_threecopies, self.stgpol_ec) @ec def test_change_content_1_byte_policy_ec_to_threecopies(self): self._test_change_policy(1, self.stgpol_ec, self.stgpol_threecopies) @ec def test_change_content_chunksize_bytes_policy_ec_to_twocopies(self): self._test_change_policy(self.chunk_size, self.stgpol_ec, self.stgpol_twocopies) @ec def test_change_content_2xchunksize_bytes_policy_ec_to_single(self): self._test_change_policy(self.chunk_size * 2, self.stgpol_ec, self.stgpol) def test_change_content_0_byte_policy_twocopies_to_threecopies(self): self._test_change_policy(0, self.stgpol_twocopies, self.stgpol_threecopies) def test_change_content_chunksize_bytes_policy_single_to_twocopies(self): self._test_change_policy(self.chunk_size, self.stgpol, self.stgpol_twocopies) def test_change_content_2xchunksize_bytes_policy_3copies_to_single(self): self._test_change_policy(self.chunk_size * 2, self.stgpol_threecopies, self.stgpol) def test_change_content_with_same_policy(self): data = random_data(10) old_content = self._new_content(self.stgpol_twocopies, data) changed_content = self.content_factory.change_policy( old_content.container_id, old_content.content_id, self.stgpol_twocopies) self.assertEqual(old_content.content_id, changed_content.content_id) def test_change_policy_unknown_content(self): self.assertRaises(ContentNotFound, self.content_factory.change_policy, self.container_id, "1234", self.stgpol) def test_change_policy_unknown_storage_policy(self): data = random_data(10) old_content = self._new_content(self.stgpol_twocopies, data) self.assertRaises(ClientException, self.content_factory.change_policy, self.container_id, old_content.content_id, "UnKnOwN") def _test_move_chunk(self, policy): data = random_data(self.chunk_size) content = self._new_content(policy, data) chunk_id = content.chunks.filter(metapos=0)[0].id chunk_url = content.chunks.filter(metapos=0)[0].url chunk_meta, chunk_stream = self.blob_client.chunk_get(chunk_url) chunk_hash = md5_stream(chunk_stream) new_chunk = content.move_chunk(chunk_id) content_updated = self.content_factory.get(self.container_id, content.content_id) hosts = [] for c in content_updated.chunks.filter(metapos=0): self.assertThat(hosts, Not(Contains(c.host))) self.assertNotEqual(c.id, chunk_id) hosts.append(c.host) new_chunk_meta, new_chunk_stream = self.blob_client.chunk_get( new_chunk["url"]) new_chunk_hash = md5_stream(new_chunk_stream) self.assertEqual(new_chunk_hash, chunk_hash) del chunk_meta["chunk_id"] del new_chunk_meta["chunk_id"] self.assertEqual(new_chunk_meta, chunk_meta) def test_single_move_chunk(self): self._test_move_chunk(self.stgpol) def test_twocopies_move_chunk(self): self._test_move_chunk(self.stgpol_twocopies) @ec def test_ec_move_chunk(self): self._test_move_chunk(self.stgpol_ec) def test_move_chunk_not_in_content(self): data = random_data(self.chunk_size) content = self._new_content(self.stgpol_twocopies, data) with ExpectedException(OrphanChunk): content.move_chunk("1234") def test_strange_paths(self): strange_paths = [ "Annual report.txt", "foo+bar=foobar.txt", "100%_bug_free.c", "forward/slash/allowed", "I\\put\\backslashes\\and$dollar$signs$in$file$names", "Je suis tombé sur la tête, mais ça va bien.", "%s%f%u%d%%", "{1},{0},{3}", "carriage\rreturn", "line\nfeed", "ta\tbu\tla\ttion", "controlchars", "//azeaze\\//azeaz\\//azea" ] answers = dict() for cname in strange_paths: content = self._new_content(self.stgpol, "nobody cares", cname) answers[cname] = content _, listing = self.container_client.content_list( self.account, self.container_name) obj_set = { k["name"].encode("utf8", "ignore") for k in listing["objects"] } try: # Ensure the saved path is the one we gave the object for cname in answers: self.assertEqual(cname, answers[cname].path) full_path = self._generate_fullpath(self.account, self.container_name, cname, answers[cname].version) self.assertEqual(answers[cname].full_path, full_path) # Ensure all objects appear in listing for cname in strange_paths: self.assertIn(cname, obj_set) finally: # Cleanup for cname in answers: try: content.delete() except Exception: pass
class TestDupContent(BaseTestCase): def setUp(self): super(TestDupContent, self).setUp() if len(self.conf['rawx']) < 3: self.skipTest("Not enough rawx. " "Dup tests needs more than 2 rawx to run") self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient() self.container_name = "TestDupContent%f" % time.time() self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() def tearDown(self): super(TestDupContent, self).tearDown() def _test_upload(self, stgpol, data_size): data = random_data(data_size) content = self.content_factory.new(self.container_id, "titi", len(data), stgpol) self.assertEqual(type(content), DupContent) content.upload(StringIO.StringIO(data)) meta, chunks = self.container_client.content_show( cid=self.container_id, content=content.content_id) chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], stgpol) self.assertEqual(meta['name'], "titi") metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) if metachunk_nb == 0: metachunk_nb = 1 # special case for empty content if stgpol == "THREECOPIES": nb_copy = 3 elif stgpol == "TWOCOPIES": nb_copy = 2 elif stgpol == "SINGLE": nb_copy = 1 self.assertEqual(len(chunks), metachunk_nb * nb_copy) for pos in range(metachunk_nb): chunks_at_pos = chunks.filter(pos=pos) self.assertEqual(len(chunks_at_pos), nb_copy) data_begin = pos * self.chunk_size data_end = pos * self.chunk_size + self.chunk_size chunk_hash = md5_data(data[data_begin:data_end]) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(md5_stream(stream), chunk_hash) self.assertEqual(meta['content_size'], str(len(data))) self.assertEqual(meta['content_path'], "titi") self.assertEqual(meta['content_cid'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], str(pos)) self.assertEqual(meta['chunk_hash'], chunk_hash) def test_twocopies_upload_0_byte(self): self._test_upload("TWOCOPIES", 0) def test_twocopies_upload_1_byte(self): self._test_upload("TWOCOPIES", 1) def test_twocopies_upload_chunksize_bytes(self): self._test_upload("TWOCOPIES", self.chunk_size) def test_twocopies_upload_chunksize_plus_1_bytes(self): self._test_upload("TWOCOPIES", self.chunk_size + 1) def test_single_upload_0_byte(self): self._test_upload("SINGLE", 0) def test_single_upload_chunksize_plus_1_bytes(self): self._test_upload("SINGLE", self.chunk_size + 1) def test_chunks_cleanup_when_upload_failed(self): data = random_data(2 * self.chunk_size) content = self.content_factory.new(self.container_id, "titi", len(data), "TWOCOPIES") self.assertEqual(type(content), DupContent) # set bad url for position 1 for chunk in content.chunks.filter(pos=1): chunk.url = "http://127.0.0.1:9/DEADBEEF" self.assertRaises(Exception, content.upload, StringIO.StringIO(data)) for chunk in content.chunks.exclude(pos=1): self.assertRaises(NotFound, self.blob_client.chunk_head, chunk.url) def _new_content(self, stgpol, data, broken_pos_list=[]): old_content = self.content_factory.new(self.container_id, "titi", len(data), stgpol) self.assertEqual(type(old_content), DupContent) old_content.upload(StringIO.StringIO(data)) broken_chunks_info = {} for pos, idx in broken_pos_list: c = old_content.chunks.filter(pos=pos)[idx] meta, stream = self.blob_client.chunk_get(c.url) if pos not in broken_chunks_info: broken_chunks_info[pos] = {} broken_chunks_info[pos][idx] = { "url": c.url, "id": c.id, "hash": c.hash, "dl_meta": meta, "dl_hash": md5_stream(stream) } self.blob_client.chunk_delete(c.url) # get the new structure of the uploaded content return (self.content_factory.get(self.container_id, old_content.content_id), broken_chunks_info) def _test_rebuild(self, stgpol, data_size, broken_pos_list, full_rebuild_pos): data = random_data(data_size) content, broken_chunks_info = self._new_content( stgpol, data, broken_pos_list) rebuild_pos, rebuild_idx = full_rebuild_pos rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx] content.rebuild_chunk(rebuild_chunk_info["id"]) # get the new structure of the content rebuilt_content = self.content_factory.get(self.container_id, content.content_id) self.assertEqual(type(rebuilt_content), DupContent) # find the rebuilt chunk for c in rebuilt_content.chunks.filter(pos=rebuild_pos): if len(content.chunks.filter(id=c.id)) > 0: # not the rebuilt chunk # if this chunk is broken, it must not have been rebuilt for b_c_i in broken_chunks_info[rebuild_pos].values(): if c.id == b_c_i["id"]: with ExpectedException(NotFound): _, _ = self.blob_client.chunk_get(c.url) continue meta, stream = self.blob_client.chunk_get(c.url) self.assertEqual(meta["chunk_id"], c.id) self.assertEqual(md5_stream(stream), rebuild_chunk_info["dl_hash"]) self.assertEqual(c.hash, rebuild_chunk_info["hash"]) self.assertThat(c.url, NotEquals(rebuild_chunk_info["url"])) del meta["chunk_id"] del rebuild_chunk_info["dl_meta"]["chunk_id"] self.assertEqual(meta, rebuild_chunk_info["dl_meta"]) def test_2copies_content_0_byte_1broken_rebuild_pos_0_idx_0(self): self._test_rebuild("TWOCOPIES", 0, [(0, 0)], (0, 0)) def test_2copies_content_1_byte_1broken_rebuild_pos_0_idx_1(self): self._test_rebuild("TWOCOPIES", 1, [(0, 1)], (0, 1)) def test_3copies_content_chunksize_bytes_2broken_rebuild_pos_0_idx_1(self): if len(self.conf['rawx']) <= 3: self.skipTest("Need more than 3 rawx") self._test_rebuild("THREECOPIES", self.chunk_size, [(0, 0), (0, 1)], (0, 1)) def test_3copies_content_2xchksize_bytes_2broken_rebuild_pos_1_idx_2(self): if len(self.conf['rawx']) <= 3: self.skipTest("Need more than 3 rawx") self._test_rebuild("THREECOPIES", 2 * self.chunk_size, [(1, 0), (1, 2)], (1, 2)) def test_2copies_content_0_byte_2broken_rebuild_pos_0_idx_0(self): with ExpectedException(UnrecoverableContent): self._test_rebuild("TWOCOPIES", 0, [(0, 0), (0, 1)], (0, 0)) def _test_download(self, stgpol, data_size, broken_pos_list): data = random_data(data_size) content, _ = self._new_content(stgpol, data, broken_pos_list) downloaded_data = "".join(content.download()) self.assertEqual(downloaded_data, data) for pos, idx in broken_pos_list: # check nothing has been rebuilt c = content.chunks.filter(pos=pos)[0] self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url) def test_twocopies_download_content_0_byte_without_broken_chunks(self): self._test_download("TWOCOPIES", 0, []) def test_twocopies_download_content_0_byte_with_broken_0_0(self): self._test_download("TWOCOPIES", 0, [(0, 0)]) def test_twocopies_download_content_1_byte_without_broken_chunks(self): self._test_download("TWOCOPIES", 1, []) def test_twocopies_download_content_1_byte_with_broken_0_0(self): self._test_download("TWOCOPIES", 1, [(0, 0)]) def test_twocopies_download_chunksize_bytes_without_broken_chunks(self): self._test_download("TWOCOPIES", self.chunk_size, []) def test_twocopies_download_2xchuksize_bytes_with_broken_0_0_and_1_0(self): self._test_download("TWOCOPIES", self.chunk_size * 2, [(0, 0), (1, 0)]) def test_twocopies_download_content_chunksize_bytes_2_broken_chunks(self): data = random_data(self.chunk_size) content, _ = self._new_content("TWOCOPIES", data, [(0, 0), (0, 1)]) gen = content.download() self.assertRaises(UnrecoverableContent, gen.next) def test_single_download_content_1_byte_without_broken_chunks(self): self._test_download("SINGLE", 1, []) def test_single_download_chunksize_bytes_plus_1_without_broken_chunk(self): self._test_download("SINGLE", self.chunk_size * 2, [])
class TestBlobAuditorFunctional(BaseTestCase): def setUp(self): super(TestBlobAuditorFunctional, self).setUp() self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.ref = random_str(8) _, rawx_loc, rawx_addr, rawx_uuid = \ self.get_service_url('rawx') self.rawx_id = 'http://' + (rawx_uuid if rawx_uuid else rawx_addr) self.auditor = BlobAuditorWorker(self.conf, get_logger(None), None) self.container_client = ContainerClient(self.conf) self.blob_client = BlobClient(conf=self.conf) self.container_client.container_create(self.account, self.ref) self.content = TestContent(self.account, self.ref) self.chunk = TestChunk(self.rawx_id, rawx_loc, self.content.size, self.content.hash) chunk_meta = { 'container_id': self.content.cid, 'content_path': self.content.path, 'version': self.content.version, 'id': self.content.id, 'full_path': self.content.fullpath, 'chunk_method': 'plain/nb_copy=3', 'policy': 'TESTPOLICY', 'chunk_id': self.chunk.id, 'chunk_pos': self.chunk.pos, 'chunk_hash': self.chunk.metachunk_hash, 'chunk_size': self.chunk.metachunk_size, 'metachunk_hash': self.chunk.metachunk_hash, 'metachunk_size': self.chunk.metachunk_size, 'oio_version': OIO_VERSION } self.blob_client.chunk_put(self.chunk.url, chunk_meta, self.content.data) def tearDown(self): super(TestBlobAuditorFunctional, self).tearDown() try: self.container_client.content_delete(self.account, self.ref, self.content.path) except Exception: pass try: self.container_client.container_delete(self.account, self.ref) except Exception: pass try: os.remove(self.chunk.path) except Exception: pass def init_content(self): chunk_proxy = { "url": self.chunk.url, "pos": str(self.chunk.pos), "hash": self.chunk.metachunk_hash, "size": self.chunk.metachunk_size } self.container_client.content_create(self.account, self.ref, self.content.path, version=int(time.time() * 1000000), content_id=self.content.id, size=self.content.size, checksum=self.content.hash, data={'chunks': [chunk_proxy]}, stgpol="SINGLE") def test_chunk_audit(self): self.init_content() self.auditor.chunk_audit(self.chunk.path, self.chunk.id) def test_content_deleted(self): self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit, self.chunk.path, self.chunk.id) def test_container_deleted(self): self.container_client.container_delete(self.account, self.ref) self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit, self.chunk.path, self.chunk.id) def test_chunk_corrupted(self): self.init_content() with open(self.chunk.path, "wb") as outf: outf.write(os.urandom(1280)) self.assertRaises(exc.CorruptedChunk, self.auditor.chunk_audit, self.chunk.path, self.chunk.id) def test_chunk_bad_chunk_size(self): self.init_content() with open(self.chunk.path, "wb") as outf: outf.write(os.urandom(320)) exc_class = (exc.FaultyChunk, exc.CorruptedChunk) self.assertRaises(exc_class, self.auditor.chunk_audit, self.chunk.path, self.chunk.id) def test_xattr_bad_xattr_metachunk_size(self): self.init_content() xattr.setxattr(self.chunk.path, 'user.' + chunk_xattr_keys['metachunk_size'], b'320') self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit, self.chunk.path, self.chunk.id) def test_xattr_bad_xattr_metachunk_hash(self): self.init_content() xattr.setxattr(self.chunk.path, 'user.' + chunk_xattr_keys['metachunk_hash'], b'0123456789ABCDEF0123456789ABCDEF') self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit, self.chunk.path, self.chunk.id) def test_xattr_bad_xattr_chunk_id(self): self.init_content() xattr.removexattr( self.chunk.path, 'user.' + CHUNK_XATTR_CONTENT_FULLPATH_PREFIX + str(self.chunk.id)) xattr.setxattr( self.chunk.path, 'user.' + CHUNK_XATTR_CONTENT_FULLPATH_PREFIX + 'WRONG_ID', self.content.fullpath.encode('utf-8')) self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit, self.chunk.path, self.chunk.id) def test_xattr_bad_xattr_content_container(self): self.init_content() xattr.setxattr( self.chunk.path, 'user.' + CHUNK_XATTR_CONTENT_FULLPATH_PREFIX + str(self.chunk.id), encode_fullpath(self.account, 'WRONG_REF', self.content.path, self.content.version, self.content.id).encode('utf-8')) self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit, self.chunk.path, self.chunk.id) def test_xattr_bad_xattr_content_id(self): self.init_content() xattr.setxattr( self.chunk.path, 'user.' + CHUNK_XATTR_CONTENT_FULLPATH_PREFIX + str(self.chunk.id), encode_fullpath(self.account, self.ref, self.content.path, self.content.version, '0123456789ABCDEF').encode('utf-8')) self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit, self.chunk.path, self.chunk.id) def test_xattr_bad_xattr_chunk_position(self): self.init_content() xattr.setxattr(self.chunk.path, 'user.' + chunk_xattr_keys['chunk_pos'], b'42') self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit, self.chunk.path, self.chunk.id) def test_chunk_bad_meta2_metachunk_size(self): self.content.size = 320 self.chunk.metachunk_size = 320 self.init_content() self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit, self.chunk.path, self.chunk.id) def test_chunk_bad_meta2_metachunk_hash(self): self.chunk.metachunk_hash = '0123456789ABCDEF0123456789ABCDEF' self.init_content() self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit, self.chunk.path, self.chunk.id) def test_chunk_bad_meta2_chunk_url(self): self.chunk.url = '%s/0123456789ABCDEF' % self.rawx_id self.init_content() self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit, self.chunk.path, self.chunk.id)
class Checker(object): def __init__(self, namespace, concurrency=50, error_file=None, rebuild_file=None): self.pool = GreenPool(concurrency) self.error_file = error_file if self.error_file: f = open(self.error_file, 'a') self.error_writer = csv.writer(f, delimiter=' ') self.rebuild_file = rebuild_file if self.rebuild_file: fd = open(self.rebuild_file, 'a') self.rebuild_writer = csv.writer(fd, delimiter='|') conf = {'namespace': namespace} self.account_client = AccountClient(conf) self.container_client = ContainerClient(conf) self.blob_client = BlobClient() self.accounts_checked = 0 self.containers_checked = 0 self.objects_checked = 0 self.chunks_checked = 0 self.account_not_found = 0 self.container_not_found = 0 self.object_not_found = 0 self.chunk_not_found = 0 self.account_exceptions = 0 self.container_exceptions = 0 self.object_exceptions = 0 self.chunk_exceptions = 0 self.list_cache = {} self.running = {} def write_error(self, target): error = [target.account] if target.container: error.append(target.container) if target.obj: error.append(target.obj) if target.chunk: error.append(target.chunk) self.error_writer.writerow(error) def write_rebuilder_input(self, target, obj_meta, ct_meta): cid = ct_meta['properties']['sys.name'].split('.', 1)[0] self.rebuild_writer.writerow((cid, obj_meta['id'], target.chunk)) def _check_chunk_xattr(self, target, obj_meta, xattr_meta): error = False # Composed position -> erasure coding attr_prefix = 'meta' if '.' in obj_meta['pos'] else '' attr_key = attr_prefix + 'chunk_size' if str(obj_meta['size']) != xattr_meta.get(attr_key): print(" Chunk %s '%s' xattr (%s) " "differs from size in meta2 (%s)" % (target, attr_key, xattr_meta.get(attr_key), obj_meta['size'])) error = True attr_key = attr_prefix + 'chunk_hash' if obj_meta['hash'] != xattr_meta.get(attr_key): print(" Chunk %s '%s' xattr (%s) " "differs from hash in meta2 (%s)" % (target, attr_key, xattr_meta.get(attr_key), obj_meta['hash'])) error = True return error def check_chunk(self, target): chunk = target.chunk obj_listing, obj_meta = self.check_obj(target) error = False if chunk not in obj_listing: print(' Chunk %s missing from object listing' % target) error = True db_meta = dict() else: db_meta = obj_listing[chunk] try: xattr_meta = self.blob_client.chunk_head(chunk) except exc.NotFound as e: self.chunk_not_found += 1 error = True print(' Not found chunk "%s": %s' % (target, str(e))) except Exception as e: self.chunk_exceptions += 1 error = True print(' Exception chunk "%s": %s' % (target, str(e))) else: if db_meta: error = self._check_chunk_xattr(target, db_meta, xattr_meta) if error: if self.error_file: self.write_error(target) if self.rebuild_file: self.write_rebuilder_input( target, obj_meta, self.list_cache[(target.account, target.container)][1]) self.chunks_checked += 1 def check_obj(self, target, recurse=False): account = target.account container = target.container obj = target.obj if (account, container, obj) in self.running: self.running[(account, container, obj)].wait() if (account, container, obj) in self.list_cache: return self.list_cache[(account, container, obj)] self.running[(account, container, obj)] = Event() print('Checking object "%s"' % target) container_listing, ct_meta = self.check_container(target) error = False if obj not in container_listing: print(' Object %s missing from container listing' % target) error = True # checksum = None else: # TODO check checksum match # checksum = container_listing[obj]['hash'] pass results = [] meta = dict() try: meta, results = self.container_client.content_show( acct=account, ref=container, path=obj) except exc.NotFound as e: self.object_not_found += 1 error = True print(' Not found object "%s": %s' % (target, str(e))) except Exception as e: self.object_exceptions += 1 error = True print(' Exception object "%s": %s' % (target, str(e))) chunk_listing = dict() for chunk in results: chunk_listing[chunk['url']] = chunk self.objects_checked += 1 self.list_cache[(account, container, obj)] = (chunk_listing, meta) self.running[(account, container, obj)].send(True) del self.running[(account, container, obj)] if recurse: for chunk in chunk_listing: t = target.copy() t.chunk = chunk self.pool.spawn_n(self.check_chunk, t) if error and self.error_file: self.write_error(target) return chunk_listing, meta def check_container(self, target, recurse=False): account = target.account container = target.container if (account, container) in self.running: self.running[(account, container)].wait() if (account, container) in self.list_cache: return self.list_cache[(account, container)] self.running[(account, container)] = Event() print('Checking container "%s"' % target) account_listing = self.check_account(target) error = False if container not in account_listing: error = True print(' Container %s missing from account listing' % target) marker = None results = [] ct_meta = dict() while True: try: resp = self.container_client.container_list( acct=account, ref=container, marker=marker) except exc.NotFound as e: self.container_not_found += 1 error = True print(' Not found container "%s": %s' % (target, str(e))) break except Exception as e: self.container_exceptions += 1 error = True print(' Exception container "%s": %s' % (target, str(e))) break if resp['objects']: marker = resp['objects'][-1]['name'] results.extend(resp['objects']) else: ct_meta = resp ct_meta.pop('objects') break container_listing = dict() for obj in results: container_listing[obj['name']] = obj self.containers_checked += 1 self.list_cache[(account, container)] = container_listing, ct_meta self.running[(account, container)].send(True) del self.running[(account, container)] if recurse: for obj in container_listing: t = target.copy() t.obj = obj self.pool.spawn_n(self.check_obj, t, True) if error and self.error_file: self.write_error(target) return container_listing, ct_meta def check_account(self, target, recurse=False): account = target.account if account in self.running: self.running[account].wait() if account in self.list_cache: return self.list_cache[account] self.running[account] = Event() print('Checking account "%s"' % target) error = False marker = None results = [] while True: try: resp = self.account_client.containers_list( account, marker=marker) except Exception as e: self.account_exceptions += 1 error = True print(' Exception account "%s": %s' % (target, str(e))) break if resp['listing']: marker = resp['listing'][-1][0] else: break results.extend(resp['listing']) containers = dict() for e in results: containers[e[0]] = (e[1], e[2]) self.list_cache[account] = containers self.running[account].send(True) del self.running[account] self.accounts_checked += 1 if recurse: for container in containers: t = target.copy() t.container = container self.pool.spawn_n(self.check_container, t, True) if error and self.error_file: self.write_error(target) return containers def check(self, target): if target.chunk and target.obj and target.container: self.pool.spawn_n(self.check_chunk, target) elif target.obj and target.container: self.pool.spawn_n(self.check_obj, target, True) elif target.container: self.pool.spawn_n(self.check_container, target, True) else: self.pool.spawn_n(self.check_account, target, True) def wait(self): self.pool.waitall() def report(self): def _report_stat(name, stat): print("{0:18}: {1}".format(name, stat)) print() print('Report') _report_stat("Accounts checked", self.accounts_checked) if self.account_not_found: _report_stat("Missing accounts", self.account_not_found) if self.account_exceptions: _report_stat("Exceptions", self.account_not_found) print() _report_stat("Containers checked", self.containers_checked) if self.container_not_found: _report_stat("Missing containers", self.container_not_found) if self.container_exceptions: _report_stat("Exceptions", self.container_exceptions) print() _report_stat("Objects checked", self.objects_checked) if self.object_not_found: _report_stat("Missing objects", self.object_not_found) if self.object_exceptions: _report_stat("Exceptions", self.object_exceptions) print() _report_stat("Chunks checked", self.chunks_checked) if self.chunk_not_found: _report_stat("Missing chunks", self.chunk_not_found) if self.chunk_exceptions: _report_stat("Exceptions", self.chunk_exceptions)
class TestBlobAuditorFunctional(BaseTestCase): def setUp(self): super(TestBlobAuditorFunctional, self).setUp() self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.test_dir = self.conf['sds_path'] rawx_num, rawx_path, rawx_addr = self.get_service_url('rawx') self.rawx = 'http://' + rawx_addr self.h = hashlib.new('md5') conf = {"namespace": self.namespace} self.auditor = BlobAuditorWorker(conf, get_logger(None), None) self.container_c = ContainerClient(conf) self.blob_c = BlobClient() self.ref = random_str(8) self.container_c.container_create(self.account, self.ref) self.url_rand = random_id(64) self.data = random_str(1280) self.h.update(self.data) self.hash_rand = self.h.hexdigest().lower() self.content = TestContent(random_str(6), len(self.data), self.url_rand, 1) self.content.id_container = cid_from_name(self.account, self.ref).upper() self.chunk = TestChunk(self.content.size, self.url_rand, 0, self.hash_rand) self.chunk_url = "%s/%s" % (self.rawx, self.chunk.id_chunk) self.chunk_proxy = { "hash": self.chunk.md5, "pos": "0", "size": self.chunk.size, "url": self.chunk_url } chunk_meta = { 'content_path': self.content.path, 'container_id': self.content.id_container, 'chunk_method': 'plain/nb_copy=3', 'policy': 'TESTPOLICY', 'id': '0000', 'version': 1, 'chunk_id': self.chunk.id_chunk, 'chunk_pos': self.chunk.pos, 'chunk_hash': self.chunk.md5, } self.blob_c.chunk_put(self.chunk_url, chunk_meta, self.data) self.chunk_path = self.test_dir + '/data/' + self.namespace + \ '-rawx-1/' + self.chunk.id_chunk[0:3] + "/" + self.chunk.id_chunk self.bad_container_id = '0' * 64 def tearDown(self): super(TestBlobAuditorFunctional, self).tearDown() try: self.container_c.content_delete(self.account, self.ref, self.content.path) except Exception: pass try: self.container_c.container_destroy(self.account, self.ref) except Exception: pass try: os.remove(self.chunk_path) except Exception: pass def init_content(self): self.container_c.content_create(self.account, self.ref, self.content.path, self.chunk.size, self.hash_rand, data=[self.chunk_proxy]) def test_chunk_audit(self): self.init_content() self.auditor.chunk_audit(self.chunk_path) def test_content_deleted(self): self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit, self.chunk_path) def test_container_deleted(self): self.container_c.container_delete(self.account, self.ref) self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit, self.chunk_path) def test_chunk_corrupted(self): self.init_content() with open(self.chunk_path, "w") as f: f.write(random_str(1280)) self.assertRaises(exc.CorruptedChunk, self.auditor.chunk_audit, self.chunk_path) def test_chunk_bad_size(self): self.init_content() with open(self.chunk_path, "w") as f: f.write(random_str(320)) self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit, self.chunk_path) def test_xattr_bad_chunk_size(self): self.init_content() xattr.setxattr(self.chunk_path, 'user.' + chunk_xattr_keys['chunk_size'], '-1') self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit, self.chunk_path) def test_xattr_bad_chunk_hash(self): self.init_content() xattr.setxattr(self.chunk_path, 'user.' + chunk_xattr_keys['chunk_hash'], 'WRONG_HASH') self.assertRaises(exc.CorruptedChunk, self.auditor.chunk_audit, self.chunk_path) def test_xattr_bad_content_path(self): self.init_content() xattr.setxattr(self.chunk_path, 'user.' + chunk_xattr_keys['content_path'], 'WRONG_PATH') self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit, self.chunk_path) def test_xattr_bad_chunk_id(self): self.init_content() xattr.setxattr(self.chunk_path, 'user.' + chunk_xattr_keys['chunk_id'], 'WRONG_ID') self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit, self.chunk_path) def test_xattr_bad_content_container(self): self.init_content() xattr.setxattr(self.chunk_path, 'user.' + chunk_xattr_keys['container_id'], self.bad_container_id) self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit, self.chunk_path) def test_xattr_bad_chunk_position(self): self.init_content() xattr.setxattr(self.chunk_path, 'user.grid.chunk.position', '42') xattr.setxattr(self.chunk_path, 'user.' + chunk_xattr_keys['chunk_pos'], '42') self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit, self.chunk_path) def test_chunk_bad_hash(self): self.h.update(self.data) self.hash_rand = self.h.hexdigest().lower() self.chunk.md5 = self.hash_rand self.chunk_proxy['hash'] = self.chunk.md5 self.init_content() self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit, self.chunk_path) def test_chunk_bad_length(self): self.chunk.size = 320 self.chunk_proxy['size'] = self.chunk.size self.init_content() self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit, self.chunk_path) def test_chunk_bad_chunk_size(self): self.chunk.size = 320 self.chunk_proxy['size'] = self.chunk.size self.init_content() self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit, self.chunk_path) def test_chunk_bad_url(self): self.chunk_proxy['url'] = '%s/WRONG_ID' % self.rawx self.init_content() self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit, self.chunk_path)
def __init__(self, conf): self.conf = conf self.logger = get_logger(conf) self.container_client = ContainerClient(conf)
class Checker(object): def __init__(self, namespace, concurrency=50, error_file=None): self.pool = GreenPool(concurrency) self.error_file = error_file if self.error_file: f = open(self.error_file, 'a') self.error_writer = csv.writer(f, delimiter=' ') conf = {'namespace': namespace} self.account_client = AccountClient(conf) self.container_client = ContainerClient(conf) self.blob_client = BlobClient() self.accounts_checked = 0 self.containers_checked = 0 self.objects_checked = 0 self.chunks_checked = 0 self.account_not_found = 0 self.container_not_found = 0 self.object_not_found = 0 self.chunk_not_found = 0 self.account_exceptions = 0 self.container_exceptions = 0 self.object_exceptions = 0 self.chunk_exceptions = 0 self.list_cache = {} self.running = {} def write_error(self, target): error = [target.account] if target.container: error.append(target.container) if target.obj: error.append(target.obj) if target.chunk: error.append(target.chunk) self.error_writer.writerow(error) def check_chunk(self, target): chunk = target.chunk obj_listing = self.check_obj(target) error = False if chunk not in obj_listing: print(' Chunk %s missing in object listing' % target) error = True # checksum = None else: # TODO check checksum match # checksum = obj_listing[chunk]['hash'] pass try: self.blob_client.chunk_head(chunk) except exc.NotFound as e: self.chunk_not_found += 1 error = True print(' Not found chunk "%s": %s' % (target, str(e))) except Exception as e: self.chunk_exceptions += 1 error = True print(' Exception chunk "%s": %s' % (target, str(e))) if error and self.error_file: self.write_error(target) self.chunks_checked += 1 def check_obj(self, target, recurse=False): account = target.account container = target.container obj = target.obj if (account, container, obj) in self.running: self.running[(account, container, obj)].wait() if (account, container, obj) in self.list_cache: return self.list_cache[(account, container, obj)] self.running[(account, container, obj)] = Event() print('Checking object "%s"' % target) container_listing = self.check_container(target) error = False if obj not in container_listing: print(' Object %s missing in container listing' % target) error = True # checksum = None else: # TODO check checksum match # checksum = container_listing[obj]['hash'] pass results = [] try: _, resp = self.container_client.content_show(acct=account, ref=container, path=obj) except exc.NotFound as e: self.object_not_found += 1 error = True print(' Not found object "%s": %s' % (target, str(e))) except Exception as e: self.object_exceptions += 1 error = True print(' Exception object "%s": %s' % (target, str(e))) else: results = resp chunk_listing = dict() for chunk in results: chunk_listing[chunk['url']] = chunk self.objects_checked += 1 self.list_cache[(account, container, obj)] = chunk_listing self.running[(account, container, obj)].send(True) del self.running[(account, container, obj)] if recurse: for chunk in chunk_listing: t = target.copy() t.chunk = chunk self.pool.spawn_n(self.check_chunk, t) if error and self.error_file: self.write_error(target) return chunk_listing def check_container(self, target, recurse=False): account = target.account container = target.container if (account, container) in self.running: self.running[(account, container)].wait() if (account, container) in self.list_cache: return self.list_cache[(account, container)] self.running[(account, container)] = Event() print('Checking container "%s"' % target) account_listing = self.check_account(target) error = False if container not in account_listing: error = True print(' Container %s missing in account listing' % target) marker = None results = [] while True: try: resp = self.container_client.container_list(acct=account, ref=container, marker=marker) except exc.NotFound as e: self.container_not_found += 1 error = True print(' Not found container "%s": %s' % (target, str(e))) break except Exception as e: self.container_exceptions += 1 error = True print(' Exception container "%s": %s' % (target, str(e))) break if resp['objects']: marker = resp['objects'][-1]['name'] else: break results.extend(resp['objects']) container_listing = dict() for obj in results: container_listing[obj['name']] = obj self.containers_checked += 1 self.list_cache[(account, container)] = container_listing self.running[(account, container)].send(True) del self.running[(account, container)] if recurse: for obj in container_listing: t = target.copy() t.obj = obj self.pool.spawn_n(self.check_obj, t, True) if error and self.error_file: self.write_error(target) return container_listing def check_account(self, target, recurse=False): account = target.account if account in self.running: self.running[account].wait() if account in self.list_cache: return self.list_cache[account] self.running[account] = Event() print('Checking account "%s"' % target) error = False marker = None results = [] while True: try: resp = self.account_client.containers_list(account, marker=marker) except Exception as e: self.account_exceptions += 1 error = True print(' Exception account "%s": %s' % (target, str(e))) break if resp['listing']: marker = resp['listing'][-1][0] else: break results.extend(resp['listing']) containers = dict() for e in results: containers[e[0]] = (e[1], e[2]) self.list_cache[account] = containers self.running[account].send(True) del self.running[account] self.accounts_checked += 1 if recurse: for container in containers: t = target.copy() t.container = container self.pool.spawn_n(self.check_container, t, True) if error and self.error_file: self.write_error(target) return containers def check(self, target): if target.chunk and target.obj and target.container: self.pool.spawn_n(self.check_chunk, target) elif target.obj and target.container: self.pool.spawn_n(self.check_obj, target, True) elif target.container: self.pool.spawn_n(self.check_container, target, True) else: self.pool.spawn_n(self.check_account, target, True) def wait(self): self.pool.waitall() def report(self): def _report_stat(name, stat): print("{0:18}: {1}".format(name, stat)) print() print('Report') _report_stat("Accounts checked", self.accounts_checked) if self.account_not_found: _report_stat("Missing accounts", self.account_not_found) if self.account_exceptions: _report_stat("Exceptions", self.account_not_found) print() _report_stat("Containers checked", self.containers_checked) if self.container_not_found: _report_stat("Missing containers", self.container_not_found) if self.container_exceptions: _report_stat("Exceptions", self.container_exceptions) print() _report_stat("Objects checked", self.objects_checked) if self.object_not_found: _report_stat("Missing objects", self.object_not_found) if self.object_exceptions: _report_stat("Exceptions", self.object_exceptions) print() _report_stat("Chunks checked", self.chunks_checked) if self.chunk_not_found: _report_stat("Missing chunks", self.chunk_not_found) if self.chunk_exceptions: _report_stat("Exceptions", self.chunk_exceptions)
class TestContentFactory(BaseTestCase): def setUp(self): super(TestContentFactory, self).setUp() self.wait_for_score(('meta2', )) self.namespace = self.conf['namespace'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_name = "TestContentFactory%f" % time.time() self.blob_client = BlobClient(conf=self.conf) self.container_client = ContainerClient(self.gridconf) self.container_client.container_create(account=self.account, reference=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() self.stgpol = "SINGLE" self.stgpol_twocopies = "TWOCOPIES" self.stgpol_threecopies = "THREECOPIES" self.stgpol_ec = "EC" def tearDown(self): super(TestContentFactory, self).tearDown() def test_get_ec(self): meta = { "chunk_method": "ec/algo=liberasurecode_rs_vand,k=6,m=2", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash_method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime_type": "application/octet-stream", "name": "tox.ini", "policy": self.stgpol_ec, "version": "1450176946676289", "oio_version": "4.2", } chunks = [{ "url": "http://127.0.0.1:6012/A0A0", "pos": "0.0", "size": 512, "hash": "E7D4E4AD460971CA2E3141F2102308D4" }, { "url": "http://127.0.0.1:6010/A01", "pos": "0.1", "size": 146, "hash": "760AB5DA7C51A3654F1CA622687CD6C3" }, { "url": "http://127.0.0.1:6011/A00", "pos": "0.2", "size": 512, "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB" }, { "url": "http://127.0.0.1:6013/A0A1", "pos": "0.3", "size": 512, "hash": "DA9D7F72AEEA5791565724424CE45C16" }] self.content_factory.container_client.content_locate = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id", account=self.account, container_name=self.container_name) self.assertEqual(type(c), ECContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual( c.full_path, encode_fullpath(self.account, self.container_name, "tox.ini", meta['version'], meta['id'])) self.assertEqual(c.version, "1450176946676289") # TODO test storage method self.assertEqual(len(c.chunks), 4) self.assertEqual(c.chunks[0].raw(), chunks[0]) self.assertEqual(c.chunks[1].raw(), chunks[1]) self.assertEqual(c.chunks[2].raw(), chunks[2]) self.assertEqual(c.chunks[3].raw(), chunks[3]) def test_get_plain(self): meta = { "chunk_method": "plain/nb_copy=2", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash_method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime_type": "application/octet-stream", "name": "tox.ini", "policy": self.stgpol_twocopies, "version": "1450176946676289", "oio_version": "4.2", } chunks = [{ "url": "http://127.0.0.1:6010/A0", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73" }, { "url": "http://127.0.0.1:6011/A1", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73" }] self.content_factory.container_client.content_locate = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id", account=self.account, container_name=self.container_name) self.assertEqual(type(c), PlainContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual(c.version, "1450176946676289") self.assertEqual( c.full_path, encode_fullpath(self.account, self.container_name, "tox.ini", meta['version'], meta['id'])) # TODO test storage_method self.assertEqual(len(c.chunks), 2) self.assertEqual(c.chunks[0].raw(), chunks[0]) self.assertEqual(c.chunks[1].raw(), chunks[1]) def test_get_unknown_content(self): self.assertRaises(ContentNotFound, self.content_factory.get, self.container_id, "1234") def test_new_ec(self): meta = { "chunk_method": "ec/algo=liberasurecode_rs_vand,k=6,m=2", "ctime": "1450341162", "deleted": "False", "hash": "", "hash_method": "md5", "id": "F4B1C8DD132705007DE8B43D0709DAA2", "length": "1000", "mime_type": "application/octet-stream", "name": "titi", "policy": self.stgpol_ec, "version": "1450341162332663", "oio_version": "4.2", } chunks = [{ "url": "http://127.0.0.1:6010/0_p1", "pos": "0.3", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6011/0_p0", "pos": "0.2", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6016/0_1", "pos": "0.1", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6017/0_0", "pos": "0.0", "size": 1048576, "hash": "00000000000000000000000000000000" }] self.content_factory.container_client.content_prepare = Mock( return_value=(meta, chunks)) c = self.content_factory.new("xxx_container_id", "titi", 1000, self.stgpol_ec, account=self.account, container_name=self.container_name) self.assertEqual(type(c), ECContent) self.assertEqual(c.content_id, "F4B1C8DD132705007DE8B43D0709DAA2") self.assertEqual(c.length, 1000) self.assertEqual(c.path, "titi") self.assertEqual(c.version, "1450341162332663") # TODO test storage_method self.assertEqual(len(c.chunks), 4) self.assertEqual(c.chunks[0].raw(), chunks[3]) self.assertEqual(c.chunks[1].raw(), chunks[2]) self.assertEqual(c.chunks[2].raw(), chunks[1]) self.assertEqual(c.chunks[3].raw(), chunks[0]) def _new_content(self, stgpol, data, path="titi", account=None, container_name=None, mime_type=None, properties=None): old_content = self.content_factory.new(self.container_id, path, len(data), stgpol, account=account, container_name=container_name) if properties: old_content.properties = properties if mime_type: old_content.mime_type = mime_type old_content.create(BytesIO(data)) return self.content_factory.get(self.container_id, old_content.content_id) def _test_move_chunk(self, policy): data = random_data(self.chunk_size) content = self._new_content(policy, data) chunk_id = content.chunks.filter(metapos=0)[0].id chunk_url = content.chunks.filter(metapos=0)[0].url chunk_meta, chunk_stream = self.blob_client.chunk_get(chunk_url) chunk_hash = md5_stream(chunk_stream) new_chunk = content.move_chunk(chunk_id) content_updated = self.content_factory.get(self.container_id, content.content_id) hosts = [] for c in content_updated.chunks.filter(metapos=0): self.assertThat(hosts, Not(Contains(c.host))) self.assertNotEqual(c.id, chunk_id) hosts.append(c.host) new_chunk_meta, new_chunk_stream = self.blob_client.chunk_get( new_chunk["url"]) new_chunk_hash = md5_stream(new_chunk_stream) self.assertEqual(new_chunk_hash, chunk_hash) del chunk_meta["chunk_id"] del new_chunk_meta["chunk_id"] self.assertEqual(new_chunk_meta, chunk_meta) def test_single_move_chunk(self): self._test_move_chunk(self.stgpol) def test_twocopies_move_chunk(self): self._test_move_chunk(self.stgpol_twocopies) @ec def test_ec_move_chunk(self): self._test_move_chunk(self.stgpol_ec) def test_move_chunk_not_in_content(self): data = random_data(self.chunk_size) content = self._new_content(self.stgpol_twocopies, data) with ExpectedException(OrphanChunk): content.move_chunk("1234") def test_strange_paths(self): answers = dict() for cname in strange_paths: content = self._new_content(self.stgpol, b"nobody cares", cname) answers[cname] = content _, listing = self.container_client.content_list( self.account, self.container_name) if PY2: obj_set = {k["name"].encode('utf-8') for k in listing["objects"]} else: obj_set = {k["name"] for k in listing["objects"]} try: # Ensure the saved path is the one we gave the object for cname in answers: self.assertEqual(cname, answers[cname].path) fullpath = encode_fullpath(self.account, self.container_name, cname, answers[cname].version, answers[cname].content_id) self.assertEqual(answers[cname].full_path, fullpath) # Ensure all objects appear in listing for cname in strange_paths: self.assertIn(cname, obj_set) finally: # Cleanup for cname in answers: try: content.delete() except Exception: pass
class BlobRegistratorWorker(object): def __init__(self, conf, logger, volume): self.conf = conf self.logger = logger self.volume = volume self.namespace = self.conf["namespace"] self.volume_ns, self.volume_id = check_volume(self.volume) c = dict() c['namespace'] = self.namespace self.client = ContainerClient(c, logger=self.logger) self.report_interval = conf.get( "report_period", default_report_interval) actions = { 'update': BlobRegistratorWorker._update_chunk, 'insert': BlobRegistratorWorker._insert_chunk, 'check': BlobRegistratorWorker._check_chunk, } self.action = actions[conf.get("action", "check")] def pass_with_lock(self): with lock_volume(self.volume): return self.pass_without_lock() def pass_without_lock(self): last_report = now() count, success, fail = 0, 0, 0 if self.namespace != self.volume_ns: self.logger.warn("Forcing the NS to [%s] (previously [%s])", self.namespace, self.volume_ns) self.logger.info("START %s", self.volume) paths = paths_gen(self.volume) for path in paths: # Action try: with open(path) as f: meta = read_chunk_metadata(f) self.action(self, path, f, meta) success = success + 1 except NotFound as e: fail = fail + 1 self.logger.info("ORPHAN %s/%s in %s/%s %s", meta['content_id'], meta['chunk_id'], meta['container_id'], meta['content_path'], str(e)) except Conflict as e: fail = fail + 1 self.logger.info("ALREADY %s/%s in %s/%s %s", meta['content_id'], meta['chunk_id'], meta['container_id'], meta['content_path'], str(e)) except Exception as e: fail = fail + 1 self.logger.warn("ERROR %s/%s in %s/%s %s", meta['content_id'], meta['chunk_id'], meta['container_id'], meta['content_path'], str(e)) count = count + 1 # TODO(jfs): do the throttling # periodical reporting t = now() if t - last_report > self.report_interval: self.logger.info("STEP %d ok %d ko %d", count, success, fail) self.logger.info("FINAL %s %d ok %d ko %d", self.volume, count, success, fail) def _check_chunk(self, path, f, meta): raise Exception("CHECK not yet implemented") def _insert_chunk(self, path, f, meta): cid = meta['container_id'] chunkid = basename(path) bean = meta2bean(self.volume_id, meta) self.client.container_raw_insert(bean, cid=cid) self.logger.info("inserted %s/%s in %s/%s", meta['content_id'], chunkid, cid, meta['content_path']) def _update_chunk(self, path, f, meta): cid = meta['container_id'] chunkid = basename(path) if str(meta['chunk_pos']).startswith('0'): if not self.conf['first']: self.logger.info("skip %s/%s from %s/%s", meta['content_id'], chunkid, cid, meta['content_path']) return pre = meta2bean(self.volume_id, meta) post = meta2bean(self.volume_id, meta) self.client.container_raw_update(pre, post, cid=cid) self.logger.info("updated %s/%s in %s/%s", meta['content_id'], chunkid, cid, meta['content_path'])
class WebhookFilter(Filter): def init(self): self.container_client = ContainerClient(self.conf, logger=self.logger) self.endpoint = self.conf.get('endpoint') # TODO configure pool manager self.http = get_pool_manager() def _request(self, data): try: resp = self.http.request( "POST", self.endpoint, headers={'Content-Type': 'application/json'}, body=data) if resp.status >= 400: raise exceptions.from_response(resp) except urllib3.exceptions.HTTPError as exc: oio_exception_from_httperror(exc) def process(self, env, cb): event = Event(env) url = env['url'] alias, content_header, properties = extract_data_from_event(env) data = { 'id': url['content'], 'account': url['account'], 'container': url['user'], 'name': url['path'], } if all((alias, content_header)) \ and event.event_type == EventTypes.CONTENT_NEW: data.update({ 'md5Hash': content_header['hash'], 'contentType': content_header['mime-type'], 'policy': content_header['policy'], 'chunkMethod': content_header['chunk-method'], 'size': content_header['size'], 'creationTime': alias['ctime'], 'modificationTime': alias['mtime'], 'version': alias['version'], 'metadata': properties, }) elif event.event_type not in (EventTypes.CONTENT_DELETED, EventTypes.CONTENT_APPEND): all_properties = self.container_client.content_get_properties( account=url['account'], reference=url['user'], content=url['content']) data.update({ 'md5Hash': all_properties['hash'], 'contentType': all_properties['mime_type'], 'policy': all_properties['policy'], 'chunkMethod': all_properties['chunk_method'], 'size': all_properties['length'], 'creationTime': all_properties['ctime'], 'modificationTime': all_properties['mtime'], 'version': all_properties['version'], 'metadata': all_properties['properties'], }) body = { 'eventId': env['job_id'], 'eventType': env['event'], 'timestamp': env['when'], 'data': data } try: data = json.dumps(body) self._request(data) except exceptions.OioException as exc: return EventError(event=event, body='webhook error: %s' % exc)(env, cb) return self.app(env, cb)
class TestFilters(BaseTestCase): def setUp(self): super(TestFilters, self).setUp() self.account = self.conf['account'] self.namespace = self.conf['namespace'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {'namespace': self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_name = 'TestFilter%f' % time.time() self.container_client = ContainerClient(self.gridconf) self.container_client.container_create(account=self.account, reference=self.container_name, admin_mode=True) self.container_id = cid_from_name(self.account, self.container_name).upper() self.stgpol = "SINGLE" def _prepare_content(self, path, content_id, admin_mode): self.container_client.content_prepare(account=self.account, reference=self.container_name, path=path, content_id=content_id, size=1, stgpol=self.stgpol, autocreate=True, admin_mode=admin_mode) def _new_content(self, data, path, admin_mode): old_content = self.content_factory.new(self.container_id, path, len(data), self.stgpol, admin_mode=admin_mode) old_content.create(BytesIO(data), admin_mode=admin_mode) return self.content_factory.get(self.container_id, old_content.content_id) def test_slave_and_admin(self): if not os.getenv("SLAVE"): self.skipTest("must be in slave mode") data = random_data(10) path = 'test_slave' try: self._new_content(data, path, False) except ClientException as exc: self.assertIn('NS slave!', text_type(exc)) else: self.fail("New content: no exception") content = self._new_content(data, path, True) content.delete(admin_mode=True) def test_worm_and_admin(self): if not os.getenv("WORM"): self.skipTest("must be in worm mode") data = random_data(10) path = 'test_worm' content = self._new_content(data, path, True) # Prepare without admin mode: # Since the 'prepare' step is done in the proxy, there is no check # on the pre-existence of the content. The subsequent prepare MUST # now work despite the presence of the content. self._prepare_content(path, None, False) self._prepare_content(path, content.content_id, False) self._prepare_content('test_worm_prepare', content.content_id, False) self._prepare_content(path, random_id(32), False) # Overwrite without admin mode data2 = random_data(11) self.assertRaises(Conflict, self._new_content, data2, path, False) # Prepare with admin mode self._prepare_content(path, None, True) self._prepare_content(path, content.content_id, True) self._prepare_content('test_worm_prepare', content.content_id, True) self._prepare_content(path, random_id(32), True) # Overwrite with admin mode content = self._new_content(data2, path, True) # Delete without admin mode try: content.delete() except ClientException as exc: self.assertIn('worm', str(exc)) else: self.fail("Delete without admin mode: no exception") downloaded_data = ''.join(content.fetch()) self.assertEqual(downloaded_data, data2) # Delete with admin mode content.delete(admin_mode=True)
class TestContentFactory(BaseTestCase): def setUp(self): super(TestContentFactory, self).setUp() self.namespace = self.conf['namespace'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_name = "TestContentFactory%f" % time.time() self.blob_client = BlobClient() self.container_client = ContainerClient(self.gridconf) self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() def tearDown(self): super(TestContentFactory, self).tearDown() def test_extract_datasec(self): self.content_factory.ns_info = { "data_security": { "DUPONETWO": "DUP:distance=1|nb_copy=2", "RAIN": "RAIN:k=6|m=2|algo=liber8tion" }, "storage_policy": { "RAIN": "NONE:RAIN:NONE", "SINGLE": "NONE:NONE:NONE", "TWOCOPIES": "NONE:DUPONETWO:NONE" } } ds_type, ds_args = self.content_factory._extract_datasec("RAIN") self.assertEqual(ds_type, "RAIN") self.assertEqual(ds_args, { "k": "6", "m": "2", "algo": "liber8tion" }) ds_type, ds_args = self.content_factory._extract_datasec("SINGLE") self.assertEqual(ds_type, "DUP") self.assertEqual(ds_args, { "nb_copy": "1", "distance": "0" }) ds_type, ds_args = self.content_factory._extract_datasec("TWOCOPIES") self.assertEqual(ds_type, "DUP") self.assertEqual(ds_args, { "nb_copy": "2", "distance": "1" }) self.assertRaises(InconsistentContent, self.content_factory._extract_datasec, "UnKnOwN") def test_get_rain(self): meta = { "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash-method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime-type": "application/octet-stream", "name": "tox.ini", "policy": "RAIN", "version": "1450176946676289" } chunks = [ { "url": "http://127.0.0.1:6012/A0A0", "pos": "0.p0", "size": 512, "hash": "E7D4E4AD460971CA2E3141F2102308D4"}, { "url": "http://127.0.0.1:6010/A01", "pos": "0.1", "size": 146, "hash": "760AB5DA7C51A3654F1CA622687CD6C3"}, { "url": "http://127.0.0.1:6011/A00", "pos": "0.0", "size": 512, "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB"}, { "url": "http://127.0.0.1:6013/A0A1", "pos": "0.p1", "size": 512, "hash": "DA9D7F72AEEA5791565724424CE45C16"} ] self.content_factory.container_client.content_show = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id") self.assertEqual(type(c), RainContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual(c.version, "1450176946676289") self.assertEqual(c.algo, "liber8tion") self.assertEqual(c.k, 6) self.assertEqual(c.m, 2) self.assertEqual(len(c.chunks), 4) self.assertEqual(c.chunks[0].raw(), chunks[2]) self.assertEqual(c.chunks[1].raw(), chunks[1]) self.assertEqual(c.chunks[2].raw(), chunks[0]) self.assertEqual(c.chunks[3].raw(), chunks[3]) def test_get_dup(self): meta = { "chunk-method": "plain/bytes", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash-method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime-type": "application/octet-stream", "name": "tox.ini", "policy": "TWOCOPIES", "version": "1450176946676289" } chunks = [ { "url": "http://127.0.0.1:6010/A0", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73"}, { "url": "http://127.0.0.1:6011/A1", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73"} ] self.content_factory.container_client.content_show = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id") self.assertEqual(type(c), DupContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual(c.version, "1450176946676289") self.assertEqual(c.nb_copy, 2) self.assertEqual(c.distance, 1) self.assertEqual(len(c.chunks), 2) self.assertEqual(c.chunks[0].raw(), chunks[0]) self.assertEqual(c.chunks[1].raw(), chunks[1]) def test_get_unknown_content(self): self.assertRaises(ContentNotFound, self.content_factory.get, self.container_id, "1234") def test_new_rain(self): meta = { "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2", "ctime": "1450341162", "deleted": "False", "hash": "", "hash-method": "md5", "id": "F4B1C8DD132705007DE8B43D0709DAA2", "length": "1000", "mime-type": "application/octet-stream", "name": "titi", "policy": "RAIN", "version": "1450341162332663" } chunks = [ { "url": "http://127.0.0.1:6010/0_p1", "pos": "0.p1", "size": 1048576, "hash": "00000000000000000000000000000000"}, { "url": "http://127.0.0.1:6011/0_p0", "pos": "0.p0", "size": 1048576, "hash": "00000000000000000000000000000000"}, { "url": "http://127.0.0.1:6016/0_1", "pos": "0.1", "size": 1048576, "hash": "00000000000000000000000000000000"}, { "url": "http://127.0.0.1:6017/0_0", "pos": "0.0", "size": 1048576, "hash": "00000000000000000000000000000000"} ] self.content_factory.container_client.content_prepare = Mock( return_value=(meta, chunks)) c = self.content_factory.new("xxx_container_id", "titi", 1000, "RAIN") self.assertEqual(type(c), RainContent) self.assertEqual(c.content_id, "F4B1C8DD132705007DE8B43D0709DAA2") self.assertEqual(c.length, 1000) self.assertEqual(c.path, "titi") self.assertEqual(c.version, "1450341162332663") self.assertEqual(c.algo, "liber8tion") self.assertEqual(c.k, 6) self.assertEqual(c.m, 2) self.assertEqual(len(c.chunks), 4) self.assertEqual(c.chunks[0].raw(), chunks[3]) self.assertEqual(c.chunks[1].raw(), chunks[2]) self.assertEqual(c.chunks[2].raw(), chunks[1]) self.assertEqual(c.chunks[3].raw(), chunks[0]) def _new_content(self, stgpol, data, path="titi"): old_content = self.content_factory.new(self.container_id, path, len(data), stgpol) old_content.upload(StringIO.StringIO(data)) return self.content_factory.get(self.container_id, old_content.content_id) def _test_change_policy(self, data_size, old_policy, new_policy): if (old_policy == "RAIN" or new_policy == "RAIN") \ and len(self.conf['rawx']) < 8: self.skipTest("RAIN: Need more than 8 rawx to run") data = random_data(data_size) obj_type = { "SINGLE": DupContent, "TWOCOPIES": DupContent, "THREECOPIES": DupContent, "RAIN": RainContent } old_content = self._new_content(old_policy, data) self.assertEqual(type(old_content), obj_type[old_policy]) changed_content = self.content_factory.change_policy( old_content.container_id, old_content.content_id, new_policy) self.assertRaises(NotFound, self.container_client.content_show, self.account, cid=old_content.container_id, content=old_content.content_id) new_content = self.content_factory.get(self.container_id, changed_content.content_id) self.assertEqual(type(new_content), obj_type[new_policy]) downloaded_data = "".join(new_content.download()) self.assertEqual(downloaded_data, data) def test_change_content_0_byte_policy_single_to_rain(self): self._test_change_policy(0, "SINGLE", "RAIN") def test_change_content_0_byte_policy_rain_to_twocopies(self): self._test_change_policy(0, "RAIN", "TWOCOPIES") def test_change_content_1_byte_policy_single_to_rain(self): self._test_change_policy(1, "SINGLE", "RAIN") def test_change_content_chunksize_bytes_policy_twocopies_to_rain(self): self._test_change_policy(self.chunk_size, "TWOCOPIES", "RAIN") def test_change_content_2xchunksize_bytes_policy_threecopies_to_rain(self): self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "RAIN") def test_change_content_1_byte_policy_rain_to_threecopies(self): self._test_change_policy(1, "RAIN", "THREECOPIES") def test_change_content_chunksize_bytes_policy_rain_to_twocopies(self): self._test_change_policy(self.chunk_size, "RAIN", "TWOCOPIES") def test_change_content_2xchunksize_bytes_policy_rain_to_single(self): self._test_change_policy(self.chunk_size * 2, "RAIN", "SINGLE") def test_change_content_0_byte_policy_twocopies_to_threecopies(self): self._test_change_policy(0, "TWOCOPIES", "THREECOPIES") def test_change_content_chunksize_bytes_policy_single_to_twocopies(self): self._test_change_policy(self.chunk_size, "SINGLE", "TWOCOPIES") def test_change_content_2xchunksize_bytes_policy_3copies_to_single(self): self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "SINGLE") def test_change_content_with_same_policy(self): data = random_data(10) old_content = self._new_content("TWOCOPIES", data) changed_content = self.content_factory.change_policy( old_content.container_id, old_content.content_id, "TWOCOPIES") self.assertEqual(old_content.content_id, changed_content.content_id) def test_change_policy_unknown_content(self): self.assertRaises(ContentNotFound, self.content_factory.change_policy, self.container_id, "1234", "SINGLE") def test_change_policy_unknown_storage_policy(self): data = random_data(10) old_content = self._new_content("TWOCOPIES", data) self.assertRaises(ClientException, self.content_factory.change_policy, self.container_id, old_content.content_id, "UnKnOwN") def _test_move_chunk(self, policy): data = random_data(self.chunk_size) content = self._new_content(policy, data) chunk_id = content.chunks.filter(metapos=0)[0].id chunk_url = content.chunks.filter(metapos=0)[0].url chunk_meta, chunk_stream = self.blob_client.chunk_get(chunk_url) chunk_hash = md5_stream(chunk_stream) new_chunk = content.move_chunk(chunk_id) content_updated = self.content_factory.get(self.container_id, content.content_id) hosts = [] for c in content_updated.chunks.filter(metapos=0): self.assertThat(hosts, Not(Contains(c.host))) self.assertNotEquals(c.id, chunk_id) hosts.append(c.host) new_chunk_meta, new_chunk_stream = self.blob_client.chunk_get( new_chunk["url"]) new_chunk_hash = md5_stream(new_chunk_stream) self.assertEqual(new_chunk_hash, chunk_hash) del chunk_meta["chunk_id"] del new_chunk_meta["chunk_id"] self.assertEqual(new_chunk_meta, chunk_meta) def test_single_move_chunk(self): self._test_move_chunk("SINGLE") def test_twocopies_move_chunk(self): self._test_move_chunk("TWOCOPIES") def test_rain_move_chunk(self): if len(self.conf['rawx']) < 9: self.skipTest("Need more than 8 rawx") self._test_move_chunk("RAIN") def test_move_chunk_not_in_content(self): data = random_data(self.chunk_size) content = self._new_content("TWOCOPIES", data) with ExpectedException(OrphanChunk): content.move_chunk("1234") def test_strange_paths(self): strange_paths = [ "Annual report.txt", "foo+bar=foobar.txt", "100%_bug_free.c", "forward/slash/allowed", "I\\put\\backslashes\\and$dollar$signs$in$file$names", "Je suis tombé sur la tête, mais ça va bien.", "%s%f%u%d%%", "carriage\rreturn", "line\nfeed", "ta\tbu\tla\ttion", "controlchars", ] answers = dict() for cname in strange_paths: content = self._new_content("SINGLE", "nobody cares", cname) answers[cname] = content listing = self.container_client.container_list(self.account, self.container_name) obj_set = {k["name"].encode("utf8", "ignore") for k in listing["objects"]} try: # Ensure the saved path is the one we gave the object for cname in answers: self.assertEqual(cname, answers[cname].path) # Ensure all objects appear in listing for cname in strange_paths: self.assertIn(cname, obj_set) finally: # Cleanup for cname in answers: try: content.delete() except: pass
class TestRebuilderCrawler(BaseTestCase): def setUp(self): super(TestRebuilderCrawler, self).setUp() self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.gridconf = {"namespace": self.namespace} self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient() self.container_name = "TestRebuilderCrawler%d" % int(time.time()) self.container_client.container_create(acct=self.account, ref=self.container_name) def _push_content(self, content): for c in content.chunks: self.blob_client.chunk_put(c.url, c.get_create_xattr(), c.data) self.container_client.content_create(acct=content.account, ref=content.container_name, path=content.content_name, size=content.size, checksum=content.hash, content_id=content.content_id, stgpol=content.stgpol, data=content.get_create_meta2()) def tearDown(self): super(TestRebuilderCrawler, self).tearDown() def test_rebuild_chunk(self): # push a new content content = TestContent(self.conf, self.account, self.container_name, "mycontent", "TWOCOPIES") data = "azerty" content.add_chunk(data, pos='0', rawx=0) content.add_chunk(data, pos='0', rawx=1) self._push_content(content) # rebuild the first rawx rebuilder = BlobRebuilderWorker(self.gridconf, None, self.conf['rawx'][0]['addr']) rebuilder.chunk_rebuild(content.container_id, content.content_id, content.chunks[0].id) # check meta2 information _, res = self.container_client.content_show(acct=content.account, ref=content.container_name, content=content.content_id) new_chunk_info = None for c in res: if (c['url'] != content.chunks[0].url and c['url'] != content.chunks[1].url): new_chunk_info = c new_chunk_id = new_chunk_info['url'].split('/')[-1] self.assertEqual(new_chunk_info['hash'], content.chunks[0].hash) self.assertEqual(new_chunk_info['pos'], content.chunks[0].pos) self.assertEqual(new_chunk_info['size'], content.chunks[0].size) # check chunk information meta, stream = self.blob_client.chunk_get(new_chunk_info['url']) self.assertEqual(meta['content_size'], str(content.chunks[0].size)) self.assertEqual(meta['content_path'], content.content_name) self.assertEqual(meta['content_cid'], content.container_id) self.assertEqual(meta['content_id'], content.content_id) self.assertEqual(meta['chunk_id'], new_chunk_id) self.assertEqual(meta['chunk_pos'], content.chunks[0].pos) self.assertEqual(meta['content_version'], content.version) self.assertEqual(meta['chunk_hash'], content.chunks[0].hash) self.assertEqual(stream.next(), content.chunks[0].data) # check rtime flag in rdir rdir_client = RdirClient(self.gridconf) res = rdir_client.chunk_fetch(self.conf['rawx'][0]['addr']) key = (content.container_id, content.content_id, content.chunks[0].id) for i_container, i_content, i_chunk, i_value in res: if (i_container, i_content, i_chunk) == key: check_value = i_value self.assertIsNotNone(check_value.get('rtime')) @unittest.skipIf(len(get_config()['rawx']) != 3, "The number of rawx must be 3") def test_rebuild_no_spare(self): # push a new content content = TestContent(self.conf, self.account, self.container_name, "mycontent", "THREECOPIES") data = "azerty" content.add_chunk(data, pos='0', rawx=0) content.add_chunk(data, pos='0', rawx=1) content.add_chunk(data, pos='0', rawx=2) self._push_content(content) # rebuild the first rawx rebuilder = BlobRebuilderWorker(self.gridconf, None, self.conf['rawx'][0]['addr']) self.assertRaises(SpareChunkException, rebuilder.chunk_rebuild, content.container_id, content.content_id, content.chunks[0].id) def test_rebuild_upload_failed(self): # push a new content content = TestContent(self.conf, self.account, self.container_name, "mycontent", "TWOCOPIES") data = "azerty" content.add_chunk(data, pos='0', rawx=0) content.add_chunk(data, pos='0', rawx=1) self._push_content(content) # rebuild the first rawx rebuilder = BlobRebuilderWorker(self.gridconf, None, self.conf['rawx'][0]['addr']) # Force upload to raise an exception with patch('oio.content.content.BlobClient') as MockClass: instance = MockClass.return_value instance.chunk_copy.side_effect = Exception("xx") self.assertRaises(UnrecoverableContent, rebuilder.chunk_rebuild, content.container_id, content.content_id, content.chunks[0].id) def test_rebuild_nonexistent_chunk(self): rebuilder = BlobRebuilderWorker(self.gridconf, None, self.conf['rawx'][0]['addr']) # try to rebuild an nonexistant chunk self.assertRaises(OrphanChunk, rebuilder.chunk_rebuild, 64 * '0', 32 * '0', 64 * '0') def test_rebuild_orphan_chunk(self): # push a new content content = TestContent(self.conf, self.account, self.container_name, "mycontent", "TWOCOPIES") data = "azerty" content.add_chunk(data, pos='0', rawx=0) content.add_chunk(data, pos='0', rawx=1) self._push_content(content) # rebuild the first rawx rebuilder = BlobRebuilderWorker(self.gridconf, None, self.conf['rawx'][0]['addr']) # try to rebuild an nonexistant chunk self.assertRaises(OrphanChunk, rebuilder.chunk_rebuild, content.container_id, content.content_id, 64 * '0') def test_rebuild_with_no_copy(self): # push a new content content = TestContent(self.conf, self.account, self.container_name, "mycontent", "SINGLE") data = "azerty" content.add_chunk(data, pos='0', rawx=0) self._push_content(content) # rebuild the first rawx rebuilder = BlobRebuilderWorker(self.gridconf, None, self.conf['rawx'][0]['addr']) # try to rebuild chunk without copy self.assertRaises(UnrecoverableContent, rebuilder.chunk_rebuild, content.container_id, content.content_id, content.chunks[0].id)
class BlobMoverWorker(object): def __init__(self, conf, logger, volume): self.conf = conf self.logger = logger or get_logger(conf) self.volume = volume self.run_time = 0 self.passes = 0 self.errors = 0 self.last_reported = 0 self.last_usage_check = 0 self.chunks_run_time = 0 self.bytes_running_time = 0 self.bytes_processed = 0 self.total_bytes_processed = 0 self.total_chunks_processed = 0 self.usage_target = int_value( conf.get('usage_target'), 0) self.usage_check_interval = int_value( conf.get('usage_check_interval'), 3600) self.report_interval = int_value( conf.get('report_interval'), 3600) self.max_chunks_per_second = int_value( conf.get('chunks_per_second'), 30) self.max_bytes_per_second = int_value( conf.get('bytes_per_second'), 10000000) self.blob_client = BlobClient() self.container_client = ContainerClient(conf) def mover_pass(self): self.namespace, self.address = check_volume(self.volume) start_time = report_time = time.time() total_errors = 0 mover_time = 0 paths = paths_gen(self.volume) for path in paths: loop_time = time.time() now = time.time() if now - self.last_usage_check >= self.usage_check_interval: used, total = statfs(self.volume) usage = (float(used) / total) * 100 if usage <= self.usage_target: self.logger.info( 'current usage %.2f%%: target reached (%.2f%%)', usage, self.usage_target) self.last_usage_check = now break self.safe_chunk_move(path) self.chunks_run_time = ratelimit( self.chunks_run_time, self.max_chunks_per_second ) self.total_chunks_processed += 1 now = time.time() if now - self.last_reported >= self.report_interval: self.logger.info( '%(start_time)s ' '%(passes)d ' '%(errors)d ' '%(c_rate).2f ' '%(b_rate).2f ' '%(total).2f ' '%(mover_time).2f' '%(mover_rate).2f' % { 'start_time': time.ctime(report_time), 'passes': self.passes, 'errors': self.errors, 'c_rate': self.passes / (now - report_time), 'b_rate': self.bytes_processed / (now - report_time), 'total': (now - start_time), 'mover_time': mover_time, 'mover_rate': mover_time / (now - start_time) } ) report_time = now total_errors += self.errors self.passes = 0 self.bytes_processed = 0 self.last_reported = now mover_time += (now - loop_time) elapsed = (time.time() - start_time) or 0.000001 self.logger.info( '%(elapsed).02f ' '%(errors)d ' '%(chunk_rate).2f ' '%(bytes_rate).2f ' '%(mover_time).2f ' '%(mover_rate).2f' % { 'elapsed': elapsed, 'errors': total_errors + self.errors, 'chunk_rate': self.total_chunks_processed / elapsed, 'bytes_rate': self.total_bytes_processed / elapsed, 'mover_time': mover_time, 'mover_rate': mover_time / elapsed } ) def safe_chunk_move(self, path): try: self.chunk_move(path) except Exception as e: self.errors += 1 self.logger.error('ERROR while moving chunk %s: %s', path, e) self.passes += 1 def load_chunk_metadata(self, path): with open(path) as f: return read_chunk_metadata(f) def chunk_move(self, path): meta = self.load_chunk_metadata(path) content_cid = meta['content_cid'] content_path = meta['content_path'] chunk_url = 'http://%s/%s' % \ (self.address, meta['chunk_id']) try: data = self.container_client.content_show( cid=content_cid, path=content_path) except exc.NotFound: raise exc.OrphanChunk('Content not found') current_chunk = None notin = [] for c in data: if c['pos'] == meta['chunk_pos']: notin.append(c) for c in notin: if c['url'] == chunk_url: current_chunk = c notin.remove(c) if not current_chunk: raise exc.OrphanChunk('Chunk not found in content') spare_data = {'notin': notin, 'broken': [current_chunk], 'size': 0} spare_resp = self.container_client.content_spare( cid=content_cid, path=content_path, data=spare_data) new_chunk = spare_resp['chunks'][0] self.blob_client.chunk_copy( current_chunk['url'], new_chunk['id']) old = [{'type': 'chunk', 'id': current_chunk['url'], 'hash': meta['chunk_hash'], 'size': int(meta['chunk_size'])}] new = [{'type': 'chunk', 'id': new_chunk['id'], 'hash': meta['chunk_hash'], 'size': int(meta['chunk_size'])}] update_data = {'old': old, 'new': new} self.container_client.container_raw_update( cid=content_cid, data=update_data) self.blob_client.chunk_delete(current_chunk['url']) self.logger.info( 'moved chunk %s to %s', current_chunk['url'], new_chunk['id'])
class TestBlobAuditorFunctional(BaseTestCase): def setUp(self): super(TestBlobAuditorFunctional, self).setUp() self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.test_dir = self.conf['sds_path'] self.chars = string.ascii_lowercase + string.ascii_uppercase +\ string.digits self.chars_id = string.digits + 'ABCDEF' self.rawx = 'http://' + self.conf["rawx"][0]['addr'] self.h = hashlib.new('md5') conf = {"namespace": self.namespace} self.auditor = BlobAuditorWorker(conf, get_logger(None), None) self.container_c = ContainerClient(conf) self.blob_c = BlobClient() self.ref = rand_generator(self.chars, 8) self.container_c.container_create(self.account, self.ref) self.url_rand = rand_generator(self.chars_id, 64) self.data = rand_generator(self.chars, 1280) self.h.update(self.data) self.hash_rand = self.h.hexdigest().lower() self.content = TestContent(rand_generator(self.chars, 6), len(self.data), self.url_rand, 1) self.content.id_container = cid_from_name(self.account, self.ref).upper() self.chunk = TestChunk(self.content.size, self.url_rand, 0, self.hash_rand) self.chunk_url = "%s/%s" % (self.rawx, self.chunk.id_chunk) self.chunk_proxy = { "hash": self.chunk.md5, "pos": "0", "size": self.chunk.size, "url": self.chunk_url } chunk_meta = { 'content_size': self.content.size, 'content_chunksnb': self.content.nb_chunks, 'content_path': self.content.path, 'content_cid': self.content.id_container, 'content_mimetype': 'application/octet-stream', 'content_chunkmethod': 'bytes', 'content_policy': 'TESTPOLICY', 'content_id': '0000', 'content_version': 1, 'chunk_id': self.chunk.id_chunk, 'chunk_pos': self.chunk.pos } self.blob_c.chunk_put(self.chunk_url, chunk_meta, self.data) self.chunk_path = self.test_dir + '/data/NS-rawx-1/' +\ self.chunk.id_chunk[0:2] + "/" + self.chunk.id_chunk self.bad_container_id = '0' * 64 def tearDown(self): super(TestBlobAuditorFunctional, self).tearDown() try: self.container_c.content_delete(self.account, self.ref, self.content.path) except Exception: pass try: self.container_c.container_destroy(self.account, self.ref) except Exception: pass try: os.remove(self.chunk_path) except Exception: pass def init_content(self): self.container_c.content_create(self.account, self.ref, self.content.path, self.chunk.size, self.hash_rand, data=[self.chunk_proxy]) def test_chunk_audit(self): self.init_content() self.auditor.chunk_audit(self.chunk_path) def test_content_deleted(self): self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit, self.chunk_path) def test_container_deleted(self): self.container_c.container_destroy(self.account, self.ref) self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit, self.chunk_path) def test_chunk_corrupted(self): self.init_content() with open(self.chunk_path, "w") as f: f.write(rand_generator(self.chars, 1280)) self.assertRaises(exc.CorruptedChunk, self.auditor.chunk_audit, self.chunk_path) def test_chunk_bad_size(self): self.init_content() with open(self.chunk_path, "w") as f: f.write(rand_generator(self.chars, 320)) self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit, self.chunk_path) def test_xattr_bad_content_nbchunk(self): self.init_content() xattr.setxattr(self.chunk_path, 'user.grid.content.nbchunk', '42') self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit, self.chunk_path) def test_xattr_bad_chunk_size(self): self.init_content() xattr.setxattr(self.chunk_path, 'user.grid.chunk.size', '-1') self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit, self.chunk_path) def test_xattr_bad_chunk_hash(self): self.init_content() xattr.setxattr(self.chunk_path, 'user.grid.chunk.hash', 'WRONG_HASH') self.assertRaises(exc.CorruptedChunk, self.auditor.chunk_audit, self.chunk_path) def test_xattr_bad_content_size(self): self.init_content() xattr.setxattr(self.chunk_path, 'user.grid.content.size', '-1') self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit, self.chunk_path) def test_xattr_bad_content_path(self): self.init_content() xattr.setxattr(self.chunk_path, 'user.grid.content.path', 'WRONG_PATH') self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit, self.chunk_path) def test_xattr_bad_chunk_id(self): self.init_content() xattr.setxattr(self.chunk_path, 'user.grid.chunk.id', 'WRONG_ID') self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit, self.chunk_path) def test_xattr_bad_content_container(self): self.init_content() xattr.setxattr(self.chunk_path, 'user.grid.content.container', self.bad_container_id) self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit, self.chunk_path) def test_xattr_bad_chunk_position(self): self.init_content() xattr.setxattr(self.chunk_path, 'user.grid.chunk.position', '42') self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit, self.chunk_path) def test_chunk_bad_hash(self): self.h.update(self.data) self.hash_rand = self.h.hexdigest().lower() self.chunk.md5 = self.hash_rand self.chunk_proxy['hash'] = self.chunk.md5 self.init_content() self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit, self.chunk_path) def test_chunk_bad_length(self): self.chunk.size = 320 self.chunk_proxy['size'] = self.chunk.size self.init_content() self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit, self.chunk_path) def test_chunk_bad_chunk_size(self): self.chunk.size = 320 self.chunk_proxy['size'] = self.chunk.size self.init_content() self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit, self.chunk_path) def test_chunk_bad_url(self): self.chunk_proxy['url'] = '%s/WRONG_ID' % self.rawx self.init_content() self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit, self.chunk_path) def test_content_bad_path(self): self.content.path = 'BAD_PATH' self.init_content() self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit, self.chunk_path)
def setUp(self): super(TestBlobAuditorFunctional, self).setUp() self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.test_dir = self.conf['sds_path'] self.chars = string.ascii_lowercase + string.ascii_uppercase +\ string.digits self.chars_id = string.digits + 'ABCDEF' self.rawx = 'http://' + self.conf["rawx"][0]['addr'] self.h = hashlib.new('md5') conf = {"namespace": self.namespace} self.auditor = BlobAuditorWorker(conf, get_logger(None), None) self.container_c = ContainerClient(conf) self.blob_c = BlobClient() self.ref = rand_generator(self.chars, 8) self.container_c.container_create(self.account, self.ref) self.url_rand = rand_generator(self.chars_id, 64) self.data = rand_generator(self.chars, 1280) self.h.update(self.data) self.hash_rand = self.h.hexdigest().lower() self.content = TestContent(rand_generator(self.chars, 6), len(self.data), self.url_rand, 1) self.content.id_container = cid_from_name(self.account, self.ref).upper() self.chunk = TestChunk(self.content.size, self.url_rand, 0, self.hash_rand) self.chunk_url = "%s/%s" % (self.rawx, self.chunk.id_chunk) self.chunk_proxy = { "hash": self.chunk.md5, "pos": "0", "size": self.chunk.size, "url": self.chunk_url } chunk_meta = { 'content_size': self.content.size, 'content_chunksnb': self.content.nb_chunks, 'content_path': self.content.path, 'content_cid': self.content.id_container, 'content_mimetype': 'application/octet-stream', 'content_chunkmethod': 'bytes', 'content_policy': 'TESTPOLICY', 'content_id': '0000', 'content_version': 1, 'chunk_id': self.chunk.id_chunk, 'chunk_pos': self.chunk.pos } self.blob_c.chunk_put(self.chunk_url, chunk_meta, self.data) self.chunk_path = self.test_dir + '/data/NS-rawx-1/' +\ self.chunk.id_chunk[0:2] + "/" + self.chunk.id_chunk self.bad_container_id = '0' * 64
class TestRebuilderCrawler(BaseTestCase): def setUp(self): super(TestRebuilderCrawler, self).setUp() self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.gridconf = {"namespace": self.namespace} self.container_client = ContainerClient(self.gridconf) self.blob_client = BlobClient() self.container_name = "TestRebuilderCrawler%d" % int(time.time()) self.container_client.container_create(acct=self.account, ref=self.container_name) def _push_content(self, content): for c in content.chunks: self.blob_client.chunk_put(c.url, c.get_create_xattr(), c.data) self.container_client.content_create(acct=content.account, ref=content.container_name, path=content.content_name, size=content.size, checksum=content.hash, content_id=content.content_id, stgpol=content.stgpol, data=content.get_create_meta2()) def tearDown(self): super(TestRebuilderCrawler, self).tearDown() def test_rebuild_chunk(self): # push a new content content = TestContent(self.conf, self.account, self.container_name, "mycontent", "TWOCOPIES") data = "azerty" content.add_chunk(data, pos='0', rawx=0) content.add_chunk(data, pos='0', rawx=1) self._push_content(content) # rebuild the first rawx rebuilder = BlobRebuilderWorker(self.gridconf, None, self.conf['rawx'][0]['addr']) rebuilder.chunk_rebuild(content.container_id, content.content_id, content.chunks[0].id) # check meta2 information _, res = self.container_client.content_show(acct=content.account, ref=content.container_name, content=content.content_id) new_chunk_info = None for c in res: if (c['url'] != content.chunks[0].url and c['url'] != content.chunks[1].url): new_chunk_info = c new_chunk_id = new_chunk_info['url'].split('/')[-1] self.assertEqual(new_chunk_info['hash'], content.chunks[0].hash) self.assertEqual(new_chunk_info['pos'], content.chunks[0].pos) self.assertEqual(new_chunk_info['size'], content.chunks[0].size) # check chunk information meta, stream = self.blob_client.chunk_get(new_chunk_info['url']) self.assertEqual(meta['content_size'], str(content.chunks[0].size)) self.assertEqual(meta['content_path'], content.content_name) self.assertEqual(meta['content_cid'], content.container_id) self.assertEqual(meta['content_id'], content.content_id) self.assertEqual(meta['chunk_id'], new_chunk_id) self.assertEqual(meta['chunk_pos'], content.chunks[0].pos) self.assertEqual(meta['content_version'], content.version) self.assertEqual(meta['chunk_hash'], content.chunks[0].hash) self.assertEqual(stream.next(), content.chunks[0].data) # check rtime flag in rdir rdir_client = RdirClient(self.gridconf) res = rdir_client.chunk_fetch(self.conf['rawx'][0]['addr']) key = (content.container_id, content.content_id, content.chunks[0].id) for i_container, i_content, i_chunk, i_value in res: if (i_container, i_content, i_chunk) == key: check_value = i_value self.assertIsNotNone(check_value.get('rtime')) @unittest.skipIf( len(get_config()['rawx']) != 3, "The number of rawx must be 3") def test_rebuild_no_spare(self): # push a new content content = TestContent(self.conf, self.account, self.container_name, "mycontent", "THREECOPIES") data = "azerty" content.add_chunk(data, pos='0', rawx=0) content.add_chunk(data, pos='0', rawx=1) content.add_chunk(data, pos='0', rawx=2) self._push_content(content) # rebuild the first rawx rebuilder = BlobRebuilderWorker(self.gridconf, None, self.conf['rawx'][0]['addr']) self.assertRaises(SpareChunkException, rebuilder.chunk_rebuild, content.container_id, content.content_id, content.chunks[0].id) def test_rebuild_upload_failed(self): # push a new content content = TestContent(self.conf, self.account, self.container_name, "mycontent", "TWOCOPIES") data = "azerty" content.add_chunk(data, pos='0', rawx=0) content.add_chunk(data, pos='0', rawx=1) self._push_content(content) # rebuild the first rawx rebuilder = BlobRebuilderWorker(self.gridconf, None, self.conf['rawx'][0]['addr']) # Force upload to raise an exception with patch('oio.content.content.BlobClient') as MockClass: instance = MockClass.return_value instance.chunk_copy.side_effect = Exception("xx") self.assertRaises(UnrecoverableContent, rebuilder.chunk_rebuild, content.container_id, content.content_id, content.chunks[0].id) def test_rebuild_nonexistent_chunk(self): rebuilder = BlobRebuilderWorker(self.gridconf, None, self.conf['rawx'][0]['addr']) # try to rebuild an nonexistant chunk self.assertRaises(OrphanChunk, rebuilder.chunk_rebuild, 64 * '0', 32 * '0', 64 * '0') def test_rebuild_orphan_chunk(self): # push a new content content = TestContent(self.conf, self.account, self.container_name, "mycontent", "TWOCOPIES") data = "azerty" content.add_chunk(data, pos='0', rawx=0) content.add_chunk(data, pos='0', rawx=1) self._push_content(content) # rebuild the first rawx rebuilder = BlobRebuilderWorker(self.gridconf, None, self.conf['rawx'][0]['addr']) # try to rebuild an nonexistant chunk self.assertRaises(OrphanChunk, rebuilder.chunk_rebuild, content.container_id, content.content_id, 64 * '0') def test_rebuild_with_no_copy(self): # push a new content content = TestContent(self.conf, self.account, self.container_name, "mycontent", "SINGLE") data = "azerty" content.add_chunk(data, pos='0', rawx=0) self._push_content(content) # rebuild the first rawx rebuilder = BlobRebuilderWorker(self.gridconf, None, self.conf['rawx'][0]['addr']) # try to rebuild chunk without copy self.assertRaises(UnrecoverableContent, rebuilder.chunk_rebuild, content.container_id, content.content_id, content.chunks[0].id)
class TestPlainContent(BaseTestCase): def setUp(self): super(TestPlainContent, self).setUp() if len(self.conf['services']['rawx']) < 4: self.skipTest("Plain tests needs more than 3 rawx to run") self.namespace = self.conf['namespace'] self.account = self.conf['account'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf, logger=self.logger) self.container_client = ContainerClient(self.gridconf, logger=self.logger) self.blob_client = BlobClient(self.conf, logger=self.logger) self.container_name = "TestPlainContent-%f" % time.time() self.container_client.container_create(account=self.account, reference=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() self.content = random_str(64) self.stgpol = "SINGLE" self.stgpol_twocopies = "TWOCOPIES" self.stgpol_threecopies = "THREECOPIES" def _test_create(self, stgpol, data_size): data = random_data(data_size) content = self.content_factory.new(self.container_id, self.content, len(data), stgpol) content.create(BytesIO(data)) meta, chunks = self.container_client.content_locate( cid=self.container_id, content=content.content_id) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], stgpol) self.assertEqual(meta['name'], self.content) metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) if metachunk_nb == 0: metachunk_nb = 1 # special case for empty content chunks = ChunksHelper(chunks) # TODO NO NO NO if stgpol == self.stgpol_threecopies: nb_copy = 3 elif stgpol == self.stgpol_twocopies: nb_copy = 2 elif stgpol == self.stgpol: nb_copy = 1 self.assertEqual(len(chunks), metachunk_nb * nb_copy) for pos in range(metachunk_nb): chunks_at_pos = chunks.filter(pos=pos) self.assertEqual(len(chunks_at_pos), nb_copy) data_begin = pos * self.chunk_size data_end = pos * self.chunk_size + self.chunk_size chunk_hash = md5_data(data[data_begin:data_end]) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(md5_stream(stream), chunk_hash) self.assertEqual(meta['content_path'], self.content) self.assertEqual(meta['container_id'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], str(pos)) # Check that chunk data matches chunk hash from xattr self.assertEqual(meta['chunk_hash'], chunk_hash) # Check that chunk data matches chunk hash from database self.assertEqual(chunk.checksum, chunk_hash) full_path = encode_fullpath(self.account, self.container_name, self.content, meta['content_version'], meta['content_id']) self.assertEqual(meta['full_path'], full_path) self.assertEqual(meta['oio_version'], '4.2') def test_twocopies_create_0_byte(self): self._test_create(self.stgpol_twocopies, 0) def test_twocopies_create_1_byte(self): self._test_create(self.stgpol_twocopies, 1) def test_twocopies_create_chunksize_bytes(self): self._test_create(self.stgpol_twocopies, self.chunk_size) def test_twocopies_create_chunksize_plus_1_bytes(self): self._test_create(self.stgpol_twocopies, self.chunk_size + 1) def test_twocopies_create_6294503_bytes(self): self._test_create(self.stgpol_twocopies, 6294503) def test_single_create_0_byte(self): self._test_create(self.stgpol, 0) def test_single_create_chunksize_plus_1_bytes(self): self._test_create(self.stgpol, self.chunk_size + 1) def _new_content(self, stgpol, data, broken_pos_list=[]): old_content = self.content_factory.new(self.container_id, self.content, len(data), stgpol) old_content.create(BytesIO(data)) broken_chunks_info = {} for pos, idx in broken_pos_list: c = old_content.chunks.filter(pos=pos)[idx] meta, stream = self.blob_client.chunk_get(c.url) if pos not in broken_chunks_info: broken_chunks_info[pos] = {} broken_chunks_info[pos][idx] = { "url": c.url, "id": c.id, "hash": c.checksum, "dl_meta": meta, "dl_hash": md5_stream(stream) } self.blob_client.chunk_delete(c.url) # get the new structure of the uploaded content return (self.content_factory.get(self.container_id, old_content.content_id), broken_chunks_info) def _rebuild_and_check(self, content, broken_chunks_info, full_rebuild_pos, allow_frozen_container=False): rebuild_pos, rebuild_idx = full_rebuild_pos rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx] content.rebuild_chunk(rebuild_chunk_info["id"], allow_frozen_container=allow_frozen_container) # get the new structure of the content rebuilt_content = self.content_factory.get(self.container_id, content.content_id) # find the rebuilt chunk for c in rebuilt_content.chunks.filter(pos=rebuild_pos): if len(content.chunks.filter(id=c.id)) > 0: # not the rebuilt chunk # if this chunk is broken, it must not have been rebuilt for b_c_i in broken_chunks_info[rebuild_pos].values(): if c.id == b_c_i["id"]: with ExpectedException(NotFound): _, _ = self.blob_client.chunk_get(c.url) continue meta, stream = self.blob_client.chunk_get(c.url) self.assertEqual(meta["chunk_id"], c.id) self.assertEqual(md5_stream(stream), rebuild_chunk_info["dl_hash"]) self.assertEqual(c.checksum, rebuild_chunk_info["hash"]) self.assertThat(c.url, NotEquals(rebuild_chunk_info["url"])) del meta["chunk_id"] del rebuild_chunk_info["dl_meta"]["chunk_id"] self.assertEqual(meta, rebuild_chunk_info["dl_meta"]) def _test_rebuild(self, stgpol, data_size, broken_pos_list, full_rebuild_pos): data = random_data(data_size) content, broken_chunks_info = self._new_content( stgpol, data, broken_pos_list) self._rebuild_and_check(content, broken_chunks_info, full_rebuild_pos) def test_2copies_content_0_byte_1broken_rebuild_pos_0_idx_0(self): self._test_rebuild(self.stgpol_twocopies, 0, [(0, 0)], (0, 0)) def test_2copies_content_1_byte_1broken_rebuild_pos_0_idx_1(self): self._test_rebuild(self.stgpol_twocopies, 1, [(0, 1)], (0, 1)) def test_3copies_content_chunksize_bytes_2broken_rebuild_pos_0_idx_1(self): if len(self.conf['services']['rawx']) <= 3: self.skipTest("Need more than 3 rawx") self._test_rebuild(self.stgpol_threecopies, self.chunk_size, [(0, 0), (0, 1)], (0, 1)) def test_3copies_content_2xchksize_bytes_2broken_rebuild_pos_1_idx_2(self): self._test_rebuild(self.stgpol_threecopies, 2 * self.chunk_size, [(1, 0), (1, 2)], (1, 2)) def test_2copies_content_0_byte_2broken_rebuild_pos_0_idx_0(self): with ExpectedException(UnrecoverableContent): self._test_rebuild(self.stgpol_twocopies, 0, [(0, 0), (0, 1)], (0, 0)) def test_rebuild_chunk_in_frozen_container(self): data = random_data(self.chunk_size) content, broken_chunks_info = self._new_content( self.stgpol_twocopies, data, [(0, 0)]) system = dict() system['sys.status'] = str(OIO_DB_FROZEN) self.container_client.container_set_properties(self.account, self.container_name, None, system=system) try: full_rebuild_pos = (0, 0) rebuild_pos, rebuild_idx = full_rebuild_pos rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx] self.assertRaises(ServiceBusy, content.rebuild_chunk, rebuild_chunk_info["id"]) finally: system['sys.status'] = str(OIO_DB_ENABLED) self.container_client.container_set_properties(self.account, self.container_name, None, system=system) self._rebuild_and_check(content, broken_chunks_info, full_rebuild_pos, allow_frozen_container=True) def _test_fetch(self, stgpol, data_size, broken_pos_list): data = random_data(data_size) content, _ = self._new_content(stgpol, data, broken_pos_list) fetched_data = b''.join(content.fetch()) self.assertEqual(fetched_data, data) for pos, idx in broken_pos_list: # check nothing has been rebuilt c = content.chunks.filter(pos=pos)[0] self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url) def test_twocopies_fetch_content_0_byte_without_broken_chunks(self): self._test_fetch(self.stgpol_twocopies, 0, []) def test_twocopies_fetch_content_0_byte_with_broken_0_0(self): self._test_fetch(self.stgpol_twocopies, 0, [(0, 0)]) def test_twocopies_fetch_content_1_byte_without_broken_chunks(self): self._test_fetch(self.stgpol_twocopies, 1, []) def test_twocopies_fetch_content_1_byte_with_broken_0_0(self): self._test_fetch(self.stgpol_twocopies, 1, [(0, 0)]) def test_twocopies_fetch_chunksize_bytes_without_broken_chunks(self): self._test_fetch(self.stgpol_twocopies, self.chunk_size, []) def test_twocopies_fetch_2xchuksize_bytes_with_broken_0_0_and_1_0(self): self._test_fetch(self.stgpol_twocopies, self.chunk_size * 2, [(0, 0), (1, 0)]) def test_twocopies_fetch_content_chunksize_bytes_2_broken_chunks(self): data = random_data(self.chunk_size) content, _ = self._new_content(self.stgpol_twocopies, data, [(0, 0), (0, 1)]) gen = content.fetch() self.assertRaises(UnrecoverableContent, lambda: next(gen)) def test_single_fetch_content_1_byte_without_broken_chunks(self): self._test_fetch(self.stgpol, 1, []) def test_single_fetch_chunksize_bytes_plus_1_without_broken_chunk(self): self._test_fetch(self.stgpol, self.chunk_size * 2, [])
class TestContentFactory(BaseTestCase): def setUp(self): super(TestContentFactory, self).setUp() self.namespace = self.conf['namespace'] self.chunk_size = self.conf['chunk_size'] self.gridconf = {"namespace": self.namespace} self.content_factory = ContentFactory(self.gridconf) self.container_name = "TestContentFactory%f" % time.time() self.container_client = ContainerClient(self.gridconf) self.container_client.container_create(acct=self.account, ref=self.container_name) self.container_id = cid_from_name(self.account, self.container_name).upper() def tearDown(self): super(TestContentFactory, self).tearDown() def test_extract_datasec(self): self.content_factory.ns_info = { "data_security": { "DUPONETWO": "DUP:distance=1|nb_copy=2", "RAIN": "RAIN:k=6|m=2|algo=liber8tion" }, "storage_policy": { "RAIN": "NONE:RAIN:NONE", "SINGLE": "NONE:NONE:NONE", "TWOCOPIES": "NONE:DUPONETWO:NONE" } } ds_type, ds_args = self.content_factory._extract_datasec("RAIN") self.assertEqual(ds_type, "RAIN") self.assertEqual(ds_args, {"k": "6", "m": "2", "algo": "liber8tion"}) ds_type, ds_args = self.content_factory._extract_datasec("SINGLE") self.assertEqual(ds_type, "DUP") self.assertEqual(ds_args, {"nb_copy": "1", "distance": "0"}) ds_type, ds_args = self.content_factory._extract_datasec("TWOCOPIES") self.assertEqual(ds_type, "DUP") self.assertEqual(ds_args, {"nb_copy": "2", "distance": "1"}) self.assertRaises(InconsistentContent, self.content_factory._extract_datasec, "UnKnOwN") def test_get_rain(self): meta = { "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash-method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime-type": "application/octet-stream", "name": "tox.ini", "policy": "RAIN", "version": "1450176946676289" } chunks = [{ "url": "http://127.0.0.1:6012/A0A0", "pos": "0.p0", "size": 512, "hash": "E7D4E4AD460971CA2E3141F2102308D4" }, { "url": "http://127.0.0.1:6010/A01", "pos": "0.1", "size": 146, "hash": "760AB5DA7C51A3654F1CA622687CD6C3" }, { "url": "http://127.0.0.1:6011/A00", "pos": "0.0", "size": 512, "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB" }, { "url": "http://127.0.0.1:6013/A0A1", "pos": "0.p1", "size": 512, "hash": "DA9D7F72AEEA5791565724424CE45C16" }] self.content_factory.container_client.content_show = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id") self.assertEqual(type(c), RainContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual(c.version, "1450176946676289") self.assertEqual(c.algo, "liber8tion") self.assertEqual(c.k, 6) self.assertEqual(c.m, 2) self.assertEqual(len(c.chunks), 4) self.assertEqual(c.chunks[0].raw(), chunks[2]) self.assertEqual(c.chunks[1].raw(), chunks[1]) self.assertEqual(c.chunks[2].raw(), chunks[0]) self.assertEqual(c.chunks[3].raw(), chunks[3]) def test_get_dup(self): meta = { "chunk-method": "plain/bytes", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash-method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime-type": "application/octet-stream", "name": "tox.ini", "policy": "TWOCOPIES", "version": "1450176946676289" } chunks = [{ "url": "http://127.0.0.1:6010/A0", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73" }, { "url": "http://127.0.0.1:6011/A1", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73" }] self.content_factory.container_client.content_show = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id") self.assertEqual(type(c), DupContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual(c.version, "1450176946676289") self.assertEqual(c.nb_copy, 2) self.assertEqual(c.distance, 1) self.assertEqual(len(c.chunks), 2) self.assertEqual(c.chunks[0].raw(), chunks[0]) self.assertEqual(c.chunks[1].raw(), chunks[1]) def test_new_rain(self): meta = { "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2", "ctime": "1450341162", "deleted": "False", "hash": "", "hash-method": "md5", "id": "F4B1C8DD132705007DE8B43D0709DAA2", "length": "1000", "mime-type": "application/octet-stream", "name": "titi", "policy": "RAIN", "version": "1450341162332663" } chunks = [{ "url": "http://127.0.0.1:6010/0_p1", "pos": "0.p1", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6011/0_p0", "pos": "0.p0", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6016/0_1", "pos": "0.1", "size": 1048576, "hash": "00000000000000000000000000000000" }, { "url": "http://127.0.0.1:6017/0_0", "pos": "0.0", "size": 1048576, "hash": "00000000000000000000000000000000" }] self.content_factory.container_client.content_prepare = Mock( return_value=(meta, chunks)) c = self.content_factory.new("xxx_container_id", "titi", 1000, "RAIN") self.assertEqual(type(c), RainContent) self.assertEqual(c.content_id, "F4B1C8DD132705007DE8B43D0709DAA2") self.assertEqual(c.length, 1000) self.assertEqual(c.path, "titi") self.assertEqual(c.version, "1450341162332663") self.assertEqual(c.algo, "liber8tion") self.assertEqual(c.k, 6) self.assertEqual(c.m, 2) self.assertEqual(len(c.chunks), 4) self.assertEqual(c.chunks[0].raw(), chunks[3]) self.assertEqual(c.chunks[1].raw(), chunks[2]) self.assertEqual(c.chunks[2].raw(), chunks[1]) self.assertEqual(c.chunks[3].raw(), chunks[0]) def _new_content(self, stgpol, data): old_content = self.content_factory.new(self.container_id, "titi", len(data), stgpol) old_content.upload(StringIO.StringIO(data)) return self.content_factory.get(self.container_id, old_content.content_id) def _test_change_policy(self, data_size, old_policy, new_policy): if (old_policy == "RAIN" or new_policy == "RAIN") \ and len(self.conf['rawx']) < 8: self.skipTest("RAIN: Need more than 8 rawx to run") data = random_data(data_size) obj_type = { "SINGLE": DupContent, "TWOCOPIES": DupContent, "THREECOPIES": DupContent, "RAIN": RainContent } old_content = self._new_content(old_policy, data) self.assertEqual(type(old_content), obj_type[old_policy]) changed_content = self.content_factory.change_policy( old_content.container_id, old_content.content_id, new_policy) self.assertRaises(NotFound, self.container_client.content_show, self.account, cid=old_content.container_id, content=old_content.content_id) new_content = self.content_factory.get(self.container_id, changed_content.content_id) self.assertEqual(type(new_content), obj_type[new_policy]) downloaded_data = "".join(new_content.download()) self.assertEqual(downloaded_data, data) def test_change_content_0_byte_policy_single_to_rain(self): self._test_change_policy(0, "SINGLE", "RAIN") def test_change_content_0_byte_policy_rain_to_twocopies(self): self._test_change_policy(0, "RAIN", "TWOCOPIES") def test_change_content_1_byte_policy_single_to_rain(self): self._test_change_policy(1, "SINGLE", "RAIN") def test_change_content_chunksize_bytes_policy_twocopies_to_rain(self): self._test_change_policy(self.chunk_size, "TWOCOPIES", "RAIN") def test_change_content_2xchunksize_bytes_policy_threecopies_to_rain(self): self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "RAIN") def test_change_content_1_byte_policy_rain_to_threecopies(self): self._test_change_policy(1, "RAIN", "THREECOPIES") def test_change_content_chunksize_bytes_policy_rain_to_twocopies(self): self._test_change_policy(self.chunk_size, "RAIN", "TWOCOPIES") def test_change_content_2xchunksize_bytes_policy_rain_to_single(self): self._test_change_policy(self.chunk_size * 2, "RAIN", "SINGLE") def test_change_content_0_byte_policy_twocopies_to_threecopies(self): self._test_change_policy(0, "TWOCOPIES", "THREECOPIES") def test_change_content_chunksize_bytes_policy_single_to_twocopies(self): self._test_change_policy(self.chunk_size, "SINGLE", "TWOCOPIES") def test_change_content_2xchunksize_bytes_policy_3copies_to_single(self): self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "SINGLE") def test_change_content_with_same_policy(self): data = random_data(10) old_content = self._new_content("TWOCOPIES", data) changed_content = self.content_factory.change_policy( old_content.container_id, old_content.content_id, "TWOCOPIES") self.assertEqual(old_content.content_id, changed_content.content_id) def test_change_policy_unknown_content(self): self.assertRaises(ContentNotFound, self.content_factory.change_policy, self.container_id, "1234", "SINGLE") def test_change_policy_unknown_storage_policy(self): data = random_data(10) old_content = self._new_content("TWOCOPIES", data) self.assertRaises(ClientException, self.content_factory.change_policy, self.container_id, old_content.content_id, "UnKnOwN")
class TestStorageTierer(BaseTestCase): def setUp(self): super(TestStorageTierer, self).setUp() self.namespace = self.conf['namespace'] self.test_account = "test_storage_tiering_%f" % time.time() self.gridconf = {"namespace": self.namespace, "container_fetch_limit": 2, "content_fetch_limit": 2, "account": self.test_account, "outdated_threshold": 0, "new_policy": "RAIN"} self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self._populate() def _populate(self): self.container_0_name = "container_empty" self.container_0_id = cid_from_name( self.test_account, self.container_0_name) self.container_client.container_create( acct=self.test_account, ref=self.container_0_name) self.container_1_name = "container_with_1_content" self.container_1_id = cid_from_name( self.test_account, self.container_1_name) self.container_client.container_create( acct=self.test_account, ref=self.container_1_name) self.container_1_content_0_name = "container_1_content_0" self.container_1_content_0 = self._new_content( self.container_1_id, self.container_1_content_0_name, "SINGLE") self.container_2_name = "container_with_2_contents" self.container_2_id = cid_from_name( self.test_account, self.container_2_name) self.container_client.container_create( acct=self.test_account, ref=self.container_2_name) self.container_2_content_0_name = "container_2_content_0" self.container_2_content_0 = self._new_content( self.container_2_id, self.container_2_content_0_name, "SINGLE") self.container_2_content_1_name = "container_2_content_1" self.container_2_content_1 = self._new_content( self.container_2_id, self.container_2_content_1_name, "TWOCOPIES") def _new_content(self, container_id, content_name, stgpol): data = random_data(10) content = self.content_factory.new(container_id, content_name, len(data), stgpol) content.upload(StringIO.StringIO(data)) return content def tearDown(self): super(TestStorageTierer, self).tearDown() def test_iter_container_list(self): worker = StorageTiererWorker(self.gridconf, Mock()) gen = worker._list_containers() self.assertEqual(gen.next(), self.container_0_name) self.assertEqual(gen.next(), self.container_1_name) self.assertEqual(gen.next(), self.container_2_name) self.assertRaises(StopIteration, gen.next) def test_iter_content_list_outdated_threshold_0(self): self.gridconf["outdated_threshold"] = 0 worker = StorageTiererWorker(self.gridconf, Mock()) gen = worker._list_contents() self.assertEqual(gen.next(), ( self.container_1_id, self.container_1_content_0.content_id)) self.assertEqual(gen.next(), ( self.container_2_id, self.container_2_content_0.content_id)) self.assertEqual(gen.next(), ( self.container_2_id, self.container_2_content_1.content_id)) self.assertRaises(StopIteration, gen.next) def test_iter_content_list_outdated_threshold_9999999999(self): self.gridconf["outdated_threshold"] = 9999999999 worker = StorageTiererWorker(self.gridconf, Mock()) gen = worker._list_contents() self.assertRaises(StopIteration, gen.next) def test_iter_content_list_outdated_threshold_2(self): # add a new content created after the three previous contents now = int(time.time()) time.sleep(2) self._new_content(self.container_2_id, "titi", "TWOCOPIES") self.gridconf["outdated_threshold"] = 2 worker = StorageTiererWorker(self.gridconf, Mock()) with mock.patch('oio.crawler.storage_tierer.time.time', mock.MagicMock(return_value=now+2)): gen = worker._list_contents() self.assertEqual(gen.next(), ( self.container_1_id, self.container_1_content_0.content_id)) self.assertEqual(gen.next(), ( self.container_2_id, self.container_2_content_0.content_id)) self.assertEqual(gen.next(), ( self.container_2_id, self.container_2_content_1.content_id)) self.assertRaises(StopIteration, gen.next) def test_iter_content_list_skip_good_policy(self): self.gridconf["new_policy"] = "SINGLE" worker = StorageTiererWorker(self.gridconf, Mock()) gen = worker._list_contents() self.assertEqual(gen.next(), ( self.container_2_id, self.container_2_content_1.content_id)) self.assertRaises(StopIteration, gen.next)
class TestContentRebuildFilter(BaseTestCase): def setUp(self): super(TestContentRebuildFilter, self).setUp() self.namespace = self.conf['namespace'] self.gridconf = {"namespace": self.namespace} self.container = "TestContentRebuildFilter%f" % time.time() self.ref = self.container self.container_client = ContainerClient(self.conf) self.container_client.container_create(self.account, self.container) syst = self.container_client.container_get_properties( self.account, self.container)['system'] self.container_id = syst['sys.name'].split('.', 1)[0] self.object_storage_api = ObjectStorageApi(namespace=self.namespace) queue_addr = choice(self.conf['services']['beanstalkd'])['addr'] self.queue_url = queue_addr self.conf['queue_url'] = 'beanstalk://' + self.queue_url self.conf['tube'] = DEFAULT_REBUILDER_TUBE self.notify_filter = NotifyFilter(app=_App, conf=self.conf) bt = Beanstalk.from_url(self.conf['queue_url']) bt.drain_tube(DEFAULT_REBUILDER_TUBE) bt.close() def _create_event(self, content_name, present_chunks, missing_chunks, content_id): event = {} event["when"] = time.time() event["event"] = "storage.content.broken" event["data"] = { "present_chunks": present_chunks, "missing_chunks": missing_chunks } event["url"] = { "ns": self.namespace, "account": self.account, "user": self.container, "path": content_name, "id": self.container_id, "content": content_id } return event def _is_chunks_created(self, previous, after, pos_created): remain = list(after) for p in previous: for r in remain: if p["url"] == r["url"]: remain.remove(r) break if len(remain) != len(pos_created): return False for r in remain: if r["pos"] in pos_created: remain.remove(r) else: return False return True def _rebuild(self, event, job_id=0): self.blob_rebuilder = subprocess.Popen([ 'oio-blob-rebuilder', self.namespace, '--beanstalkd=' + self.queue_url ]) time.sleep(3) self.blob_rebuilder.kill() def _remove_chunks(self, chunks, content_id): if not chunks: return for chunk in chunks: chunk['id'] = chunk['url'] chunk['content'] = content_id chunk['type'] = 'chunk' self.container_client.container_raw_delete(self.account, self.container, data=chunks) def _check_rebuild(self, content_name, chunks, missing_pos, meta, chunks_to_remove, chunk_created=True): self._remove_chunks(chunks_to_remove, meta['id']) event = self._create_event(content_name, chunks, missing_pos, meta['id']) self.notify_filter.process(env=event, cb=None) self._rebuild(event) _, after = self.object_storage_api.object_locate( container=self.container, obj=content_name, account=self.account) self.assertIs(chunk_created, self._is_chunks_created(chunks, after, missing_pos)) def test_nothing_missing(self): content_name = "test_nothing_missing" self.object_storage_api.object_create(account=self.account, container=self.container, data="test", policy="THREECOPIES", obj_name=content_name) meta, chunks = self.object_storage_api.object_locate( container=self.container, obj=content_name, account=self.account) chunks_to_remove = [] for chunk in chunks: chunk.pop('score', None) missing_pos = [] self._check_rebuild(content_name, chunks, missing_pos, meta, chunks_to_remove, chunk_created=True) def test_missing_1_chunk(self): content_name = "test_missing_1_chunk" self.object_storage_api.object_create(account=self.account, container=self.container, data="test", policy="THREECOPIES", obj_name=content_name) meta, chunks = self.object_storage_api.object_locate( container=self.container, obj=content_name, account=self.account) chunks_to_remove = [] chunks_to_remove.append(chunks.pop(0)) for chunk in chunks: chunk.pop('score', None) missing_pos = ["0"] self._check_rebuild(content_name, chunks, missing_pos, meta, chunks_to_remove) def test_missing_last_chunk(self): content_name = "test_missing_last_chunk" data = random_str(1024 * 1024 * 4) self.object_storage_api.object_create(account=self.account, container=self.container, data=data, policy="THREECOPIES", obj_name=content_name) meta, chunks = self.object_storage_api.object_locate( container=self.container, obj=content_name, account=self.account) chunks_to_remove = [] chunks_to_remove.append(chunks.pop(0)) for chunk in chunks: chunk.pop('score', None) missing_pos = ["3"] self._check_rebuild(content_name, chunks, missing_pos, meta, chunks_to_remove) def test_missing_2_chunks(self): content_name = "test_missing_2_chunks" self.object_storage_api.object_create(account=self.account, container=self.container, data="test", policy="THREECOPIES", obj_name=content_name) meta, chunks = self.object_storage_api.object_locate( container=self.container, obj=content_name, account=self.account) chunks_to_remove = [] for i in range(0, 2): chunks_to_remove.append(chunks.pop(0)) for chunk in chunks: chunk.pop('score', None) missing_pos = ["0", "0"] self._check_rebuild(content_name, chunks, missing_pos, meta, chunks_to_remove) def test_missing_all_chunks(self): content_name = "test_missing_all_chunks" self.object_storage_api.object_create(account=self.account, container=self.container, data="test", policy="SINGLE", obj_name=content_name) meta, chunks = self.object_storage_api.object_locate( container=self.container, obj=content_name, account=self.account) chunks_to_remove = [] chunks_to_remove.append(chunks.pop(0)) for chunk in chunks: chunk.pop('score', None) missing_pos = ["0"] self._check_rebuild(content_name, chunks, missing_pos, meta, chunks_to_remove, chunk_created=False) def test_missing_all_chunks_of_a_pos(self): content_name = "test_missing_2_chunks" self.object_storage_api.object_create(account=self.account, container=self.container, data="test", policy="THREECOPIES", obj_name=content_name) meta, chunks = self.object_storage_api.object_locate( container=self.container, obj=content_name, account=self.account) chunks_to_remove = [] for i in range(0, 3): chunks_to_remove.append(chunks.pop(0)) for chunk in chunks: chunk.pop('score', None) missing_pos = ["0"] self._check_rebuild(content_name, chunks, missing_pos, meta, chunks_to_remove, chunk_created=False) def test_missing_multiple_chunks(self): content_name = "test_missing_multiple_chunks" data = random_str(1024 * 1024 * 4) self.object_storage_api.object_create(account=self.account, container=self.container, data=data, policy="THREECOPIES", obj_name=content_name) meta, chunks = self.object_storage_api.object_locate( container=self.container, obj=content_name, account=self.account) chunks_to_remove = [] chunks_to_remove.append(chunks.pop(9)) chunks_to_remove.append(chunks.pop(6)) chunks_to_remove.append(chunks.pop(4)) chunks_to_remove.append(chunks.pop(0)) for chunk in chunks: chunk.pop('score', None) missing_pos = ["0", "1", "2", "3"] self._check_rebuild(content_name, chunks, missing_pos, meta, chunks_to_remove) def test_missing_1_chunk_ec(self): if len(self.conf['services']['rawx']) < 9: self.skipTest("Not enough rawx. " "EC tests needs at least 9 rawx to run") content_name = "test_missing_1_chunk_ec" self.object_storage_api.object_create(account=self.account, container=self.container, data="test", policy="EC", obj_name=content_name) meta, chunks = self.object_storage_api.object_locate( container=self.container, obj=content_name, account=self.account) chunks_to_remove = [] chunks_to_remove.append(chunks.pop(0)) for chunk in chunks: chunk.pop('score', None) missing_pos = ["0.1"] self._check_rebuild(content_name, chunks, missing_pos, meta, chunks_to_remove) def test_missing_m_chunk_ec(self): if len(self.conf['services']['rawx']) < 9: self.skipTest("Not enough rawx. " "EC tests needs at least 9 rawx to run") content_name = "test_missing_m_chunk_ec" self.object_storage_api.object_create(account=self.account, container=self.container, data="test", policy="EC", obj_name=content_name) meta, chunks = self.object_storage_api.object_locate( container=self.container, obj=content_name, account=self.account) chunks_to_remove = [] for i in range(0, 3): chunks_to_remove.append(chunks.pop(0)) for chunk in chunks: chunk.pop('score', None) missing_pos = ["0.1", "0.2", "0.3"] self._check_rebuild(content_name, chunks, missing_pos, meta, chunks_to_remove) def test_missing_m_chunk_ec_2(self): if len(self.conf['services']['rawx']) < 9: self.skipTest("Not enough rawx. " "EC tests needs at least 9 rawx to run") content_name = "test_missing_m_chunk_ec" self.object_storage_api.object_create(account=self.account, container=self.container, data="test", policy="EC", obj_name=content_name) meta, chunks = self.object_storage_api.object_locate( container=self.container, obj=content_name, account=self.account) chunks_to_remove = [] chunks_to_remove.append(chunks.pop(0)) chunks_to_remove.append(chunks.pop(3)) chunks_to_remove.append(chunks.pop(5)) for chunk in chunks: chunk.pop('score', None) missing_pos = ["0.1", "0.5", "0.8"] self._check_rebuild(content_name, chunks, missing_pos, meta, chunks_to_remove) def test_missing_m1_chunk_ec(self): if len(self.conf['services']['rawx']) < 9: self.skipTest("Not enough rawx. " "EC tests needs at least 9 rawx to run") content_name = "test_missing_m1_chunk_ec" self.object_storage_api.object_create(account=self.account, container=self.container, data="test", policy="EC", obj_name=content_name) meta, chunks = self.object_storage_api.object_locate( container=self.container, obj=content_name, account=self.account) chunks_to_remove = [] chunks_to_remove.append(chunks.pop(0)) chunks_to_remove.append(chunks.pop(0)) chunks_to_remove.append(chunks.pop(0)) chunks_to_remove.append(chunks.pop(0)) for chunk in chunks: chunk.pop('score', None) missing_pos = ["0.1", "0.2", "0.3", "0.4"] self._check_rebuild(content_name, chunks, missing_pos, meta, chunks_to_remove, chunk_created=False)
class TestStorageTierer(BaseTestCase): def setUp(self): super(TestStorageTierer, self).setUp() self.namespace = self.conf['namespace'] self.test_account = "test_storage_tiering_%f" % time.time() self.gridconf = { "namespace": self.namespace, "container_fetch_limit": 2, "content_fetch_limit": 2, "account": self.test_account, "outdated_threshold": 0, "new_policy": "EC" } self.content_factory = ContentFactory(self.gridconf) self.container_client = ContainerClient(self.gridconf) self._populate() def _populate(self): self.container_0_name = "container_empty" self.container_0_id = cid_from_name(self.test_account, self.container_0_name) self.container_client.container_create(account=self.test_account, reference=self.container_0_name) self.container_1_name = "container_with_1_content" self.container_1_id = cid_from_name(self.test_account, self.container_1_name) self.container_client.container_create(account=self.test_account, reference=self.container_1_name) self.container_1_content_0_name = "container_1_content_0" self.container_1_content_0 = self._new_content( self.container_1_id, self.container_1_content_0_name, "SINGLE", self.test_account, self.container_0_name) self.container_2_name = "container_with_2_contents" self.container_2_id = cid_from_name(self.test_account, self.container_2_name) self.container_client.container_create(account=self.test_account, reference=self.container_2_name) self.container_2_content_0_name = "container_2_content_0" self.container_2_content_0 = self._new_content( self.container_2_id, self.container_2_content_0_name, "SINGLE", self.test_account, self.container_0_name) self.container_2_content_1_name = "container_2_content_1" self.container_2_content_1 = self._new_content( self.container_2_id, self.container_2_content_1_name, "TWOCOPIES", self.test_account, self.container_0_name) def _new_content(self, container_id, content_name, stgpol, account, reference): data = random_data(10) content = self.content_factory.new(container_id, content_name, len(data), stgpol, account=account, container_name=reference) content.create(BytesIO(data)) return content def tearDown(self): super(TestStorageTierer, self).tearDown() def test_iter_container_list(self): worker = StorageTiererWorker(self.gridconf, Mock()) api = ObjectStorageApi(self.namespace) actual = [x[0] for x in api.container_list(self.test_account)] if len(actual) < 3: print "Slow event propagation!" # account events have not yet propagated time.sleep(3.0) actual = [x[0] for x in api.container_list(self.test_account)[0]] gen = worker._list_containers() self.assertListEqual(list(gen), actual) def test_iter_content_list_outdated_threshold_0(self): self.gridconf["outdated_threshold"] = 0 worker = StorageTiererWorker(self.gridconf, Mock()) gen = worker._list_contents() self.assertEqual( gen.next(), (self.container_1_id, self.container_1_content_0.content_id)) self.assertEqual( gen.next(), (self.container_2_id, self.container_2_content_0.content_id)) self.assertEqual( gen.next(), (self.container_2_id, self.container_2_content_1.content_id)) self.assertRaises(StopIteration, gen.next) def test_iter_content_list_outdated_threshold_9999999999(self): self.gridconf["outdated_threshold"] = 9999999999 worker = StorageTiererWorker(self.gridconf, Mock()) gen = worker._list_contents() self.assertRaises(StopIteration, gen.next) def test_iter_content_list_outdated_threshold_2(self): # add a new content created after the three previous contents now = int(time.time()) time.sleep(2) self._new_content(self.container_2_id, "titi", "TWOCOPIES", self.test_account, self.container_2_name) self.gridconf["outdated_threshold"] = 2 worker = StorageTiererWorker(self.gridconf, Mock()) with mock.patch('oio.crawler.storage_tierer.time.time', mock.MagicMock(return_value=now + 1)): gen = worker._list_contents() self.assertEqual( gen.next(), (self.container_1_id, self.container_1_content_0.content_id)) self.assertEqual( gen.next(), (self.container_2_id, self.container_2_content_0.content_id)) self.assertEqual( gen.next(), (self.container_2_id, self.container_2_content_1.content_id)) self.assertRaises(StopIteration, gen.next) def test_iter_content_list_skip_good_policy(self): self.gridconf["new_policy"] = "SINGLE" worker = StorageTiererWorker(self.gridconf, Mock()) gen = worker._list_contents() self.assertEqual( gen.next(), (self.container_2_id, self.container_2_content_1.content_id)) self.assertRaises(StopIteration, gen.next)
class TestAccountClient(BaseTestCase): def setUp(self): super(TestAccountClient, self).setUp() self.account_id = "test_account_%f" % time.time() self.account_client = AccountClient(self.conf) self.container_client = ContainerClient(self.conf) retry = 3 for i in range(retry + 1): try: self.account_client.account_create(self.account_id) break except ClientException: if i < retry: time.sleep(2) else: raise self.container_client.container_create(account=self.account_id, reference="container1") self.container_client.container_create(account=self.account_id, reference="container2") time.sleep(.5) # ensure container event have been processed def test_container_list(self): resp = self.account_client.container_list(self.account_id) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [["container1", 0, 0, 0], ["container2", 0, 0, 0]]) resp = self.account_client.container_list(self.account_id, limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [["container1", 0, 0, 0]]) resp = self.account_client.container_list(self.account_id, marker="container1", limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [["container2", 0, 0, 0]]) resp = self.account_client.container_list(self.account_id, marker="container2", limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], []) # TODO: move this test somewhere under tests/unit/ def test_account_service_refresh(self): self.account_client.endpoint = "126.0.0.1:6666" self.account_client._last_refresh = time.time() self.account_client._get_account_addr = Mock( return_value="126.0.0.1:6667") self.assertRaises(OioNetworkException, self.account_client.account_list) self.account_client._get_account_addr.assert_called_once() self.assertIn("126.0.0.1:6667", self.account_client.endpoint) def test_container_reset(self): metadata = dict() metadata["mtime"] = time.time() metadata["bytes"] = 42 metadata["objects"] = 12 self.account_client.container_update(self.account_id, "container1", metadata=metadata) self.account_client.container_reset(self.account_id, "container1", time.time()) resp = self.account_client.container_list(self.account_id, prefix="container1") for container in resp["listing"]: name, nb_objects, nb_bytes, _ = container if name == 'container1': self.assertEqual(nb_objects, 0) self.assertEqual(nb_bytes, 0) return self.fail("No container container1") def test_account_refresh(self): metadata = dict() metadata["mtime"] = time.time() metadata["bytes"] = 42 metadata["objects"] = 12 self.account_client.container_update(self.account_id, "container1", metadata=metadata) self.account_client.account_refresh(self.account_id) resp = self.account_client.account_show(self.account_id) self.assertEqual(resp["bytes"], 42) self.assertEqual(resp["objects"], 12) def test_account_flush(self): metadata = dict() metadata["mtime"] = time.time() metadata["bytes"] = 42 metadata["objects"] = 12 self.account_client.container_update(self.account_id, "container1", metadata=metadata) self.account_client.account_flush(self.account_id) resp = self.account_client.account_show(self.account_id) self.assertEqual(resp["bytes"], 0) self.assertEqual(resp["objects"], 0) resp = self.account_client.container_list(self.account_id) self.assertEqual(len(resp["listing"]), 0)
class BlobAuditorWorker(object): def __init__(self, conf, logger, volume): self.conf = conf self.logger = logger self.volume = volume self.run_time = 0 self.passes = 0 self.errors = 0 self.orphan_chunks = 0 self.faulty_chunks = 0 self.corrupted_chunks = 0 self.last_reported = 0 self.chunks_run_time = 0 self.bytes_running_time = 0 self.bytes_processed = 0 self.total_bytes_processed = 0 self.total_chunks_processed = 0 self.report_interval = int_value(conf.get("report_interval"), 3600) self.max_chunks_per_second = int_value(conf.get("chunks_per_second"), 30) self.max_bytes_per_second = int_value(conf.get("bytes_per_second"), 10000000) self.container_client = ContainerClient(conf) def audit_pass(self): self.namespace, self.address = check_volume(self.volume) start_time = report_time = time.time() total_errors = 0 total_corrupted = 0 total_orphans = 0 total_faulty = 0 audit_time = 0 paths = paths_gen(self.volume) for path in paths: loop_time = time.time() self.safe_chunk_audit(path) self.chunks_run_time = ratelimit(self.chunks_run_time, self.max_chunks_per_second) self.total_chunks_processed += 1 now = time.time() if now - self.last_reported >= self.report_interval: self.logger.info( "%(start_time)s " "%(passes)d " "%(corrupted)d " "%(faulty)d " "%(orphans)d " "%(errors)d " "%(c_rate).2f " "%(b_rate).2f " "%(total).2f " "%(audit_time).2f" "%(audit_rate).2f" % { "start_time": time.ctime(report_time), "passes": self.passes, "corrupted": self.corrupted_chunks, "faulty": self.faulty_chunks, "orphans": self.orphan_chunks, "errors": self.errors, "c_rate": self.passes / (now - report_time), "b_rate": self.bytes_processed / (now - report_time), "total": (now - start_time), "audit_time": audit_time, "audit_rate": audit_time / (now - start_time), } ) report_time = now total_corrupted += self.corrupted_chunks total_orphans += self.orphan_chunks total_faulty += self.faulty_chunks total_errors += self.errors self.passes = 0 self.corrupted_chunks = 0 self.orphan_chunks = 0 self.faulty_chunks = 0 self.errors = 0 self.bytes_processed = 0 self.last_reported = now audit_time += now - loop_time elapsed = (time.time() - start_time) or 0.000001 self.logger.info( "%(elapsed).02f " "%(corrupted)d " "%(faulty)d " "%(orphans)d " "%(errors)d " "%(chunk_rate).2f " "%(bytes_rate).2f " "%(audit_time).2f " "%(audit_rate).2f" % { "elapsed": elapsed, "corrupted": total_corrupted + self.corrupted_chunks, "faulty": total_faulty + self.faulty_chunks, "orphans": total_orphans + self.orphan_chunks, "errors": total_errors + self.errors, "chunk_rate": self.total_chunks_processed / elapsed, "bytes_rate": self.total_bytes_processed / elapsed, "audit_time": audit_time, "audit_rate": audit_time / elapsed, } ) def safe_chunk_audit(self, path): try: self.chunk_audit(path) except exc.FaultyChunk as err: self.faulty_chunks += 1 self.logger.error("ERROR faulty chunk %s: %s", path, err) except exc.CorruptedChunk as err: self.corrupted_chunks += 1 self.logger.error("ERROR corrupted chunk %s: %s", path, err) except exc.OrphanChunk as err: self.orphan_chunks += 1 self.logger.error("ERROR orphan chunk %s: %s", path, err) except Exception: self.errors += 1 self.logger.exception("ERROR while auditing chunk %s", path) self.passes += 1 def chunk_audit(self, path): with open(path) as f: try: meta = read_chunk_metadata(f) except exc.MissingAttribute as e: raise exc.FaultyChunk("Missing extended attribute %s" % e) size = int(meta["chunk_size"]) md5_checksum = meta["chunk_hash"].lower() reader = ChunkReader(f, size, md5_checksum) with closing(reader): for buf in reader: buf_len = len(buf) self.bytes_running_time = ratelimit( self.bytes_running_time, self.max_bytes_per_second, increment=buf_len ) self.bytes_processed += buf_len self.total_bytes_processed += buf_len try: container_id = meta["container_id"] content_path = meta["content_path"] content_attr, data = self.container_client.content_show(cid=container_id, path=content_path) # Check chunk data chunk_data = None metachunks = set() for c in data: if c["url"].endswith(meta["chunk_id"]): metachunks.add(c["pos"].split(".", 2)[0]) chunk_data = c if not chunk_data: raise exc.OrphanChunk("Not found in content") if chunk_data["size"] != int(meta["chunk_size"]): raise exc.FaultyChunk("Invalid chunk size found") if chunk_data["hash"] != meta["chunk_hash"]: raise exc.FaultyChunk("Invalid chunk hash found") if chunk_data["pos"] != meta["chunk_pos"]: raise exc.FaultyChunk("Invalid chunk position found") except exc.NotFound: raise exc.OrphanChunk("Chunk not found in container")
class ContentRepairerWorker(ToolWorker): def __init__(self, tool, queue_workers, queue_reply): super(ContentRepairerWorker, self).__init__(tool, queue_workers, queue_reply) self.chunk_operator = ChunkOperator(self.conf, logger=self.logger) self.blob_client = BlobClient(self.conf) self.container_client = ContainerClient(self.conf, logger=self.logger) def _safe_chunk_rebuild(self, item, content_id, chunk_id_or_pos, **kwargs): _, account, container, _, _ = item try: container_id = cid_from_name(account, container) self.chunk_operator.rebuild(container_id, content_id, chunk_id_or_pos, **kwargs) except Exception as exc: # pylint: disable=broad-except self.logger.error('ERROR when rebuilding chunk %s (%s): %s', self.tool.string_from_item(item), chunk_id_or_pos, exc) return exc def _repair_metachunk(self, item, content_id, stg_met, pos, chunks): """ Check that a metachunk has the right number of chunks. :returns: the list (generator) of missing chunks """ exceptions = list() required = stg_met.expected_chunks if len(chunks) < required: if stg_met.ec: subs = {x['num'] for x in chunks} for sub in range(required): if sub not in subs: exc = self._safe_chunk_rebuild(item, content_id, "%d.%d" % (pos, sub)) if exc: exceptions.append(exc) else: missing_chunks = required - len(chunks) for _ in range(missing_chunks): exc = self._safe_chunk_rebuild(item, content_id, pos) if exc: exceptions.append(exc) for chunk in chunks: try: self.blob_client.chunk_head(chunk['url'], xattr=True, check_hash=True) except (NotFound, ClientPreconditionFailed) as e: kwargs = { 'try_chunk_delete': isinstance(e, ClientPreconditionFailed) } exc = self._safe_chunk_rebuild(item, content_id, chunk['url'], **kwargs) if exc: exceptions.append(exc) except Exception as exc: # pylint: disable=broad-except self.logger.error('ERROR when checking chunk %s (%s): %s', self.tool.string_from_item(item), chunk['url'], exc) exceptions.append(exc) return exceptions def _process_item(self, item): namespace, account, container, obj_name, version = item if namespace != self.tool.namespace: raise ValueError('Invalid namespace (actual=%s, expected=%s)' % (namespace, self.tool.namespace)) obj_meta, chunks = self.container_client.content_locate( account=account, reference=container, path=obj_name, version=version, properties=False) content_id = obj_meta['id'] exceptions = list() stg_met = STORAGE_METHODS.load(obj_meta['chunk_method']) chunks_by_pos = _sort_chunks(chunks, stg_met.ec) for pos, chunks in iteritems(chunks_by_pos): try: exceptions += self._repair_metachunk(item, content_id, stg_met, pos, chunks) except Exception as exc: # pylint: disable=broad-except self.logger.error('ERROR when repair metachunk %s (%d): %s', self.tool.string_from_item(item), pos, exc) exceptions.append(exc) if exceptions: raise Exception(exceptions) self.container_client.content_touch(account=account, reference=container, path=obj_name, version=version)
class BlobAuditorWorker(object): def __init__(self, conf, logger, volume): self.conf = conf self.logger = logger self.volume = volume self.run_time = 0 self.passes = 0 self.errors = 0 self.orphan_chunks = 0 self.faulty_chunks = 0 self.corrupted_chunks = 0 self.last_reported = 0 self.chunks_run_time = 0 self.bytes_running_time = 0 self.bytes_processed = 0 self.total_bytes_processed = 0 self.total_chunks_processed = 0 self.report_interval = int_value( conf.get('report_interval'), 3600) self.max_chunks_per_second = int_value( conf.get('chunks_per_second'), 30) self.max_bytes_per_second = int_value( conf.get('bytes_per_second'), 10000000) self.container_client = ContainerClient(conf) def audit_pass(self): self.namespace, self.address = check_volume(self.volume) start_time = report_time = time.time() total_errors = 0 total_corrupted = 0 total_orphans = 0 total_faulty = 0 audit_time = 0 paths = paths_gen(self.volume) for path in paths: loop_time = time.time() self.safe_chunk_audit(path) self.chunks_run_time = ratelimit( self.chunks_run_time, self.max_chunks_per_second ) self.total_chunks_processed += 1 now = time.time() if now - self.last_reported >= self.report_interval: self.logger.info( '%(start_time)s ' '%(passes)d ' '%(corrupted)d ' '%(faulty)d ' '%(orphans)d ' '%(errors)d ' '%(c_rate).2f ' '%(b_rate).2f ' '%(total).2f ' '%(audit_time).2f' '%(audit_rate).2f' % { 'start_time': time.ctime(report_time), 'passes': self.passes, 'corrupted': self.corrupted_chunks, 'faulty': self.faulty_chunks, 'orphans': self.orphan_chunks, 'errors': self.errors, 'c_rate': self.passes / (now - report_time), 'b_rate': self.bytes_processed / (now - report_time), 'total': (now - start_time), 'audit_time': audit_time, 'audit_rate': audit_time / (now - start_time) } ) report_time = now total_corrupted += self.corrupted_chunks total_orphans += self.orphan_chunks total_faulty += self.faulty_chunks total_errors += self.errors self.passes = 0 self.corrupted_chunks = 0 self.orphan_chunks = 0 self.faulty_chunks = 0 self.errors = 0 self.bytes_processed = 0 self.last_reported = now audit_time += (now - loop_time) elapsed = (time.time() - start_time) or 0.000001 self.logger.info( '%(elapsed).02f ' '%(corrupted)d ' '%(faulty)d ' '%(orphans)d ' '%(errors)d ' '%(chunk_rate).2f ' '%(bytes_rate).2f ' '%(audit_time).2f ' '%(audit_rate).2f' % { 'elapsed': elapsed, 'corrupted': total_corrupted + self.corrupted_chunks, 'faulty': total_faulty + self.faulty_chunks, 'orphans': total_orphans + self.orphan_chunks, 'errors': total_errors + self.errors, 'chunk_rate': self.total_chunks_processed / elapsed, 'bytes_rate': self.total_bytes_processed / elapsed, 'audit_time': audit_time, 'audit_rate': audit_time / elapsed } ) def safe_chunk_audit(self, path): try: self.chunk_audit(path) except exc.FaultyChunk as err: self.faulty_chunks += 1 self.logger.error('ERROR faulty chunk %s: %s', path, err) except exc.CorruptedChunk as err: self.corrupted_chunks += 1 self.logger.error('ERROR corrupted chunk %s: %s', path, err) except exc.OrphanChunk as err: self.orphan_chunks += 1 self.logger.error('ERROR orphan chunk %s: %s', path, err) except Exception: self.errors += 1 self.logger.exception('ERROR while auditing chunk %s', path) self.passes += 1 def chunk_audit(self, path): with open(path) as f: try: meta = read_chunk_metadata(f) except exc.MissingAttribute as e: raise exc.FaultyChunk( 'Missing extended attribute %s' % e) size = int(meta['chunk_size']) md5_checksum = meta['chunk_hash'].lower() reader = ChunkReader(f, size, md5_checksum) with closing(reader): for buf in reader: buf_len = len(buf) self.bytes_running_time = ratelimit( self.bytes_running_time, self.max_bytes_per_second, increment=buf_len) self.bytes_processed += buf_len self.total_bytes_processed += buf_len try: content_cid = meta['content_cid'] content_path = meta['content_path'] content_attr, data = self.container_client.content_show( cid=content_cid, path=content_path) # Check chunk data chunk_data = None metachunks = set() for c in data: if c['url'].endswith(meta['chunk_id']): metachunks.add(c['pos'].split('.', 2)[0]) chunk_data = c if not chunk_data: raise exc.OrphanChunk('Not found in content') if chunk_data['size'] != int(meta['chunk_size']): raise exc.FaultyChunk('Invalid chunk size found') if chunk_data['hash'] != meta['chunk_hash']: raise exc.FaultyChunk('Invalid chunk hash found') if chunk_data['pos'] != meta['chunk_pos']: raise exc.FaultyChunk('Invalid chunk position found') # Check content data if content_attr['length'] != meta['content_size']: raise exc.FaultyChunk('Invalid content size found') if len(metachunks) != int(meta['content_chunksnb']): self.logger.warn('Invalid number of chunks found') raise exc.FaultyChunk('Invalid number of chunks found') except exc.NotFound: raise exc.OrphanChunk('Chunk not found in container')
class Checker(object): def __init__(self, namespace, concurrency=50, error_file=None, rebuild_file=None, full=True, limit_listings=0, request_attempts=1): self.pool = GreenPool(concurrency) self.error_file = error_file self.full = bool(full) # Optimisation for when we are only checking one object # or one container. # 0 -> do not limit # 1 -> limit account listings (list of containers) # 2 -> limit container listings (list of objects) self.limit_listings = limit_listings if self.error_file: f = open(self.error_file, 'a') self.error_writer = csv.writer(f, delimiter=' ') self.rebuild_file = rebuild_file if self.rebuild_file: fd = open(self.rebuild_file, 'a') self.rebuild_writer = csv.writer(fd, delimiter='|') conf = {'namespace': namespace} self.account_client = AccountClient( conf, max_retries=request_attempts - 1) self.container_client = ContainerClient( conf, max_retries=request_attempts - 1, request_attempts=request_attempts) self.blob_client = BlobClient() self.accounts_checked = 0 self.containers_checked = 0 self.objects_checked = 0 self.chunks_checked = 0 self.account_not_found = 0 self.container_not_found = 0 self.object_not_found = 0 self.chunk_not_found = 0 self.account_exceptions = 0 self.container_exceptions = 0 self.object_exceptions = 0 self.chunk_exceptions = 0 self.list_cache = {} self.running = {} def write_error(self, target): error = [target.account] if target.container: error.append(target.container) if target.obj: error.append(target.obj) if target.chunk: error.append(target.chunk) self.error_writer.writerow(error) def write_rebuilder_input(self, target, obj_meta, ct_meta): try: cid = ct_meta['system']['sys.name'].split('.', 1)[0] except KeyError: cid = ct_meta['properties']['sys.name'].split('.', 1)[0] self.rebuild_writer.writerow((cid, obj_meta['id'], target.chunk)) def write_chunk_error(self, target, obj_meta, chunk=None): if chunk is not None: target = target.copy() target.chunk = chunk if self.error_file: self.write_error(target) if self.rebuild_file: self.write_rebuilder_input( target, obj_meta, self.list_cache[(target.account, target.container)][1]) def _check_chunk_xattr(self, target, obj_meta, xattr_meta): error = False # Composed position -> erasure coding attr_prefix = 'meta' if '.' in obj_meta['pos'] else '' attr_key = attr_prefix + 'chunk_size' if str(obj_meta['size']) != xattr_meta.get(attr_key): print(" Chunk %s '%s' xattr (%s) " "differs from size in meta2 (%s)" % (target, attr_key, xattr_meta.get(attr_key), obj_meta['size'])) error = True attr_key = attr_prefix + 'chunk_hash' if obj_meta['hash'] != xattr_meta.get(attr_key): print(" Chunk %s '%s' xattr (%s) " "differs from hash in meta2 (%s)" % (target, attr_key, xattr_meta.get(attr_key), obj_meta['hash'])) error = True return error def check_chunk(self, target): chunk = target.chunk obj_listing, obj_meta = self.check_obj(target) error = False if chunk not in obj_listing: print(' Chunk %s missing from object listing' % target) error = True db_meta = dict() else: db_meta = obj_listing[chunk] try: xattr_meta = self.blob_client.chunk_head(chunk, xattr=self.full) except exc.NotFound as e: self.chunk_not_found += 1 error = True print(' Not found chunk "%s": %s' % (target, str(e))) except Exception as e: self.chunk_exceptions += 1 error = True print(' Exception chunk "%s": %s' % (target, str(e))) else: if db_meta and self.full: error = self._check_chunk_xattr(target, db_meta, xattr_meta) if error: self.write_chunk_error(target, obj_meta) self.chunks_checked += 1 def check_obj_policy(self, target, obj_meta, chunks): """ Check that the list of chunks of an object matches the object's storage policy. """ stg_met = STORAGE_METHODS.load(obj_meta['chunk_method']) chunks_by_pos = _sort_chunks(chunks, stg_met.ec) if stg_met.ec: required = stg_met.ec_nb_data + stg_met.ec_nb_parity else: required = stg_met.nb_copy for pos, clist in chunks_by_pos.iteritems(): if len(clist) < required: print(' Missing %d chunks at position %s of %s' % ( required - len(clist), pos, target)) if stg_met.ec: subs = {x['num'] for x in clist} for sub in range(required): if sub not in subs: self.write_chunk_error(target, obj_meta, '%d.%d' % (pos, sub)) else: self.write_chunk_error(target, obj_meta, str(pos)) def check_obj(self, target, recurse=False): account = target.account container = target.container obj = target.obj if (account, container, obj) in self.running: self.running[(account, container, obj)].wait() if (account, container, obj) in self.list_cache: return self.list_cache[(account, container, obj)] self.running[(account, container, obj)] = Event() print('Checking object "%s"' % target) container_listing, ct_meta = self.check_container(target) error = False if obj not in container_listing: print(' Object %s missing from container listing' % target) error = True # checksum = None else: # TODO check checksum match # checksum = container_listing[obj]['hash'] pass results = [] meta = dict() try: meta, results = self.container_client.content_locate( account=account, reference=container, path=obj, properties=False) except exc.NotFound as e: self.object_not_found += 1 error = True print(' Not found object "%s": %s' % (target, str(e))) except Exception as e: self.object_exceptions += 1 error = True print(' Exception object "%s": %s' % (target, str(e))) chunk_listing = dict() for chunk in results: chunk_listing[chunk['url']] = chunk # Skip the check if we could not locate the object if meta: self.check_obj_policy(target.copy(), meta, results) self.list_cache[(account, container, obj)] = (chunk_listing, meta) self.objects_checked += 1 self.running[(account, container, obj)].send(True) del self.running[(account, container, obj)] if recurse: for chunk in chunk_listing: t = target.copy() t.chunk = chunk self.pool.spawn_n(self.check_chunk, t) if error and self.error_file: self.write_error(target) return chunk_listing, meta def check_container(self, target, recurse=False): account = target.account container = target.container if (account, container) in self.running: self.running[(account, container)].wait() if (account, container) in self.list_cache: return self.list_cache[(account, container)] self.running[(account, container)] = Event() print('Checking container "%s"' % target) account_listing = self.check_account(target) error = False if container not in account_listing: error = True print(' Container %s missing from account listing' % target) marker = None results = [] ct_meta = dict() extra_args = dict() if self.limit_listings > 1 and target.obj: # When we are explicitly checking one object, start the listing # where this object is supposed to be, and list only one object. extra_args['prefix'] = target.obj extra_args['limit'] = 1 while True: try: _, resp = self.container_client.content_list( account=account, reference=container, marker=marker, **extra_args) except exc.NotFound as e: self.container_not_found += 1 error = True print(' Not found container "%s": %s' % (target, str(e))) break except Exception as e: self.container_exceptions += 1 error = True print(' Exception container "%s": %s' % (target, str(e))) break if resp['objects']: marker = resp['objects'][-1]['name'] results.extend(resp['objects']) if self.limit_listings > 1: break else: ct_meta = resp ct_meta.pop('objects') break container_listing = dict() for obj in results: container_listing[obj['name']] = obj if self.limit_listings <= 1: # We just listed the whole container, keep the result in a cache self.containers_checked += 1 self.list_cache[(account, container)] = container_listing, ct_meta self.running[(account, container)].send(True) del self.running[(account, container)] if recurse: for obj in container_listing: t = target.copy() t.obj = obj self.pool.spawn_n(self.check_obj, t, True) if error and self.error_file: self.write_error(target) return container_listing, ct_meta def check_account(self, target, recurse=False): account = target.account if account in self.running: self.running[account].wait() if account in self.list_cache: return self.list_cache[account] self.running[account] = Event() print('Checking account "%s"' % target) error = False marker = None results = [] extra_args = dict() if self.limit_listings > 0 and target.container: # When we are explicitly checking one container, start the listing # where this container is supposed to be, and list only one # container. extra_args['prefix'] = target.container extra_args['limit'] = 1 while True: try: resp = self.account_client.container_list( account, marker=marker, **extra_args) except Exception as e: self.account_exceptions += 1 error = True print(' Exception account "%s": %s' % (target, str(e))) break if resp['listing']: marker = resp['listing'][-1][0] results.extend(resp['listing']) if self.limit_listings > 0: break else: break containers = dict() for e in results: containers[e[0]] = (e[1], e[2]) if self.limit_listings <= 0: # We just listed the whole account, keep the result in a cache self.accounts_checked += 1 self.list_cache[account] = containers self.running[account].send(True) del self.running[account] if recurse: for container in containers: t = target.copy() t.container = container self.pool.spawn_n(self.check_container, t, True) if error and self.error_file: self.write_error(target) return containers def check(self, target): if target.chunk and target.obj and target.container: self.pool.spawn_n(self.check_chunk, target) elif target.obj and target.container: self.pool.spawn_n(self.check_obj, target, True) elif target.container: self.pool.spawn_n(self.check_container, target, True) else: self.pool.spawn_n(self.check_account, target, True) def wait(self): self.pool.waitall() def report(self): def _report_stat(name, stat): print("{0:18}: {1}".format(name, stat)) print() print('Report') _report_stat("Accounts checked", self.accounts_checked) if self.account_not_found: _report_stat("Missing accounts", self.account_not_found) if self.account_exceptions: _report_stat("Exceptions", self.account_not_found) print() _report_stat("Containers checked", self.containers_checked) if self.container_not_found: _report_stat("Missing containers", self.container_not_found) if self.container_exceptions: _report_stat("Exceptions", self.container_exceptions) print() _report_stat("Objects checked", self.objects_checked) if self.object_not_found: _report_stat("Missing objects", self.object_not_found) if self.object_exceptions: _report_stat("Exceptions", self.object_exceptions) print() _report_stat("Chunks checked", self.chunks_checked) if self.chunk_not_found: _report_stat("Missing chunks", self.chunk_not_found) if self.chunk_exceptions: _report_stat("Exceptions", self.chunk_exceptions)