def test_blob_indexer_with_old_chunk(self): expected_account, expected_container, expected_cid, \ expected_content_path, expected_content_version, \ expected_content_id, expected_chunk_id = self._put_chunk() chunks = list(self.rdir_client.chunk_fetch(self.rawx_id)) self.assertEqual(1, len(chunks)) cid, content_id, chunk_id, _ = chunks[0] self.assertEqual(expected_cid, cid) self.assertEqual(expected_content_id, content_id) self.assertEqual(expected_chunk_id, chunk_id) convert_to_old_chunk(self._chunk_path(chunk_id), expected_account, expected_container, expected_content_path, expected_content_version, expected_content_id) self.rdir_client.admin_clear(self.rawx_id, clear_all=True) self.blob_indexer.index_pass() self.assertEqual(1, self.blob_indexer.successes) self.assertEqual(0, self.blob_indexer.errors) chunks = self.rdir_client.chunk_fetch(self.rawx_id) chunks = list(chunks) self.assertEqual(1, len(chunks)) cid, content_id, chunk_id, _ = chunks[0] self.assertEqual(expected_cid, cid) self.assertEqual(expected_content_id, content_id) self.assertEqual(expected_chunk_id, chunk_id) self._delete_chunk(expected_chunk_id) chunks = self.rdir_client.chunk_fetch(self.rawx_id) chunks = list(chunks) self.assertEqual(0, len(chunks))
def test_converter_old_linked_chunk_with_link_on_same_object(self): self.api.object_link( self.account, self.container, self.path, self.account, self.container, self.path + '.link') linked_meta, linked_chunks = self.api.object_locate( self.account, self.container, self.path + '.link') self.assertNotEqual(self.content_id, linked_meta['id']) for c in linked_chunks: convert_to_old_chunk( self._chunk_path(c), self.account, self.container, self.path + '.link', 'None', '0123456789ABCDEF0123456789ABCDEF', add_old_fullpath=True) for c in self.chunks: convert_to_old_chunk( self._chunk_path(c), self.account, self.container, self.path, self.version, self.content_id, add_old_fullpath=True) self.api.object_link( self.account, self.container, self.path + '.link', self.account, self.container, self.path + '.link') linked_meta, linked_chunks = self.api.object_locate( self.account, self.container, self.path + '.link') self.assertNotEqual(self.content_id, linked_meta['id']) chunk = random.choice(self.chunks) chunk_volume = chunk['url'].split('/')[2] chunk_id = chunk['url'].split('/')[3] chunk_path = self._chunk_path(chunk) for c in linked_chunks: if chunk_volume == c['url'].split('/')[2]: linked_chunk_id2 = c['url'].split('/')[3] break linked_chunk = random.choice(linked_chunks) linked_chunk_volume = linked_chunk['url'].split('/')[2] linked_chunk_id = linked_chunk['url'].split('/')[3] linked_chunk_path = self._chunk_path(linked_chunk) for c in self.chunks: if linked_chunk_volume == c['url'].split('/')[2]: chunk_id2 = c['url'].split('/')[3] break self._convert_and_check( chunk_volume, chunk_path, {chunk_id: (self.account, self.container, self.path, self.version, self.content_id), linked_chunk_id2: (self.account, self.container, self.path + '.link', linked_meta['version'], linked_meta['id'])}) self._convert_and_check( linked_chunk_volume, linked_chunk_path, {chunk_id2: (self.account, self.container, self.path, self.version, self.content_id), linked_chunk_id: (self.account, self.container, self.path + '.link', linked_meta['version'], linked_meta['id'])})
def test_converter_old_chunk_with_link_on_same_object(self): for c in self.chunks: convert_to_old_chunk( self._chunk_path(c), self.account, self.container, self.path, self.version, self.content_id) self.api.object_link( self.account, self.container, self.path, self.account, self.container, self.path) linked_meta, linked_chunks = self.api.object_locate( self.account, self.container, self.path) self.assertNotEqual(self.content_id, linked_meta['id']) linked_chunk = random.choice(linked_chunks) linked_chunk_volume = linked_chunk['url'].split('/')[2] linked_chunk_id = linked_chunk['url'].split('/')[3] linked_chunk_path = self._chunk_path(linked_chunk) # old xattr not removed _, expected_raw_meta = read_chunk_metadata(linked_chunk_path, linked_chunk_id) expected_raw_meta[chunk_xattr_keys['oio_version']] = OIO_VERSION self._convert_and_check( linked_chunk_volume, linked_chunk_path, {linked_chunk_id: (self.account, self.container, self.path, linked_meta['version'], linked_meta['id'])}, expected_raw_meta=expected_raw_meta, expected_errors=1)
def test_rebuild_old_chunk(self): for c in self.chunks: convert_to_old_chunk( self._chunk_path(c), self.account, self.container, self.path, self.version, self.content_id) chunk = random.choice(self.chunks) chunk_volume = chunk['url'].split('/')[2] chunk_id = chunk['url'].split('/')[3] chunk_headers, chunk_stream = self.blob_client.chunk_get( chunk['url'], check_headers=False) os.remove(self._chunk_path(chunk)) chunks_kept = list(self.chunks) chunks_kept.remove(chunk) conf = self.conf.copy() conf['allow_same_rawx'] = True rebuilder = BlobRebuilder(conf, service_id=chunk_volume) rebuilder_worker = rebuilder.create_worker(None, None) rebuilder_worker._process_item( (self.ns, self.cid, self.content_id, chunk_id)) _, new_chunks = self.api.object_locate( self.account, self.container, self.path) new_chunk = list(new_chunks) self.assertEqual(len(new_chunks), len(chunks_kept) + 1) url_kept = [c['url'] for c in chunks_kept] new_chunk = None for c in new_chunks: if c['url'] not in url_kept: self.assertIsNone(new_chunk) new_chunk = c self.assertNotEqual(chunk['real_url'], new_chunk['real_url']) self.assertNotEqual(chunk['url'], new_chunk['url']) self.assertEqual(chunk['pos'], new_chunk['pos']) self.assertEqual(chunk['size'], new_chunk['size']) self.assertEqual(chunk['hash'], new_chunk['hash']) new_chunk_headers, new_chunk_stream = self.blob_client.chunk_get( new_chunk['url']) chunk_data = b''.join(chunk_stream) new_chunk_data = b''.join(new_chunk_stream) self.assertEqual(chunk_data, new_chunk_data) fullpath = encode_fullpath(self.account, self.container, self.path, self.version, self.content_id) self.assertEqual(fullpath, new_chunk_headers['full_path']) del new_chunk_headers['full_path'] self.assertNotEqual(chunk_headers['chunk_id'], new_chunk_headers['chunk_id']) new_chunk_id = new_chunk['url'].split('/')[3] self.assertEqual(new_chunk_id, new_chunk_headers['chunk_id']) del chunk_headers['chunk_id'] del new_chunk_headers['chunk_id'] self.assertEqual(OIO_VERSION, new_chunk_headers['oio_version']) del chunk_headers['oio_version'] del new_chunk_headers['oio_version'] self.assertEqual(chunk_headers, new_chunk_headers)
def test_recover_missing_old_fullpath(self): for c in self.chunks: convert_to_old_chunk( self._chunk_path(c), self.account, self.container, self.path, self.version, self.content_id) victim = random.choice(self.chunks) self._test_converter_single_chunk(victim)
def test_recover_missing_old_fullpath_and_content_path(self): for c in self.chunks: convert_to_old_chunk( self._chunk_path(c), self.account, self.container, self.path, self.version, self.content_id) victim = random.choice(self.chunks) path = self._chunk_path(victim) remove_xattr(path, chunk_xattr_keys['content_path']) self._test_converter_single_chunk(victim)
def test_move_old_chunk(self): for chunk in self.chunks: convert_to_old_chunk(self._chunk_path(chunk), self.account, self.container, self.path, self.version, self.content_id) orig_chunk = random.choice(self.chunks) chunk_volume = orig_chunk['url'].split('/')[2] chunk_id = orig_chunk['url'].split('/')[3] chunk_headers, chunk_stream = self.blob_client.chunk_get( orig_chunk['url'], check_headers=False) chunks_kept = list(self.chunks) chunks_kept.remove(orig_chunk) mover = BlobMoverWorker(self.conf, None, self.rawx_volumes[chunk_volume]) mover.chunk_move(self._chunk_path(orig_chunk), chunk_id) _, new_chunks = self.api.object_locate(self.account, self.container, self.path) new_chunk = list(new_chunks) self.assertEqual(len(new_chunks), len(chunks_kept) + 1) url_kept = [c['url'] for c in chunks_kept] new_chunk = None for chunk in new_chunks: if chunk['url'] not in url_kept: self.assertIsNone(new_chunk) new_chunk = chunk self.assertNotEqual(orig_chunk['real_url'], new_chunk['real_url']) self.assertNotEqual(orig_chunk['url'], new_chunk['url']) self.assertEqual(orig_chunk['pos'], new_chunk['pos']) self.assertEqual(orig_chunk['size'], new_chunk['size']) self.assertEqual(orig_chunk['hash'], new_chunk['hash']) new_chunk_headers, new_chunk_stream = self.blob_client.chunk_get( new_chunk['url']) chunk_data = b''.join(chunk_stream) new_chunk_data = b''.join(new_chunk_stream) self.assertEqual(chunk_data, new_chunk_data) fullpath = encode_fullpath(self.account, self.container, self.path, self.version, self.content_id) self.assertEqual(fullpath, new_chunk_headers['full_path']) del new_chunk_headers['full_path'] self.assertNotEqual(chunk_headers['chunk_id'], new_chunk_headers['chunk_id']) new_chunk_id = new_chunk['url'].split('/')[3] self.assertEqual(new_chunk_id, new_chunk_headers['chunk_id']) del chunk_headers['chunk_id'] del new_chunk_headers['chunk_id'] self.assertEqual(OIO_VERSION, new_chunk_headers['oio_version']) del chunk_headers['oio_version'] del new_chunk_headers['oio_version'] self.assertEqual(chunk_headers, new_chunk_headers)
def test_audit_old_chunk(self): for c in self.chunks: convert_to_old_chunk( self._chunk_path(c), self.account, self.container, self.path, self.version, self.content_id) chunk = random.choice(self.chunks) chunk_volume = chunk['url'].split('/')[2] chunk_id = chunk['url'].split('/')[3] auditor = BlobAuditorWorker(self.conf, None, self.rawx_volumes[chunk_volume]) auditor.chunk_audit(self._chunk_path(chunk), chunk_id)
def test_recover_missing_content_path(self): for c in self.chunks: convert_to_old_chunk(self._chunk_path(c), self.account, self.container, self.path, self.version, self.content_id, add_old_fullpath=True) victim = random.choice(self.chunks) path = self._chunk_path(victim) remove_xattr(path, CHUNK_XATTR_KEYS['content_path']) self._test_converter_single_chunk(victim)
def test_converter_old_chunk_with_old_fullpath(self): for c in self.chunks: convert_to_old_chunk( self._chunk_path(c), self.account, self.container, self.path, self.version, self.content_id, add_old_fullpath=True) chunk = random.choice(self.chunks) chunk_volume = chunk['url'].split('/')[2] chunk_id = chunk['url'].split('/')[3] chunk_path = self._chunk_path(chunk) self._convert_and_check( chunk_volume, chunk_path, {chunk_id: (self.account, self.container, self.path, self.version, self.content_id)})
def test_converter_old_chunk_with_wrong_content_id(self): for c in self.chunks: convert_to_old_chunk( self._chunk_path(c), self.account, self.container, self.path, self.version, '0123456789ABCDEF0123456789ABCDEF') chunk = random.choice(self.chunks) chunk_volume = chunk['url'].split('/')[2] chunk_id = chunk['url'].split('/')[3] chunk_path = self._chunk_path(chunk) self._convert_and_check( chunk_volume, chunk_path, {chunk_id: (self.account, self.container, self.path, self.version, self.content_id)})
def test_converter_old_chunk_with_versioning(self): for c in self.chunks: convert_to_old_chunk(self._chunk_path(c), self.account, self.container, self.path, self.version, self.content_id) self.api.container_set_properties( self.account, self.container, system={'sys.m2.policy.version': '2'}) self.api.object_create(self.account, self.container, obj_name=self.path, data='version') versioned_meta, versioned_chunks = self.api.object_locate( self.account, self.container, self.path) self.assertNotEqual(self.content_id, versioned_meta['id']) for c in versioned_chunks: convert_to_old_chunk(self._chunk_path(c), self.account, self.container, self.path, versioned_meta['version'], versioned_meta['id']) chunk = random.choice(self.chunks) chunk_volume = chunk['url'].split('/')[2] chunk_id = chunk['url'].split('/')[3] chunk_path = self._chunk_path(chunk) versioned_chunk = random.choice(versioned_chunks) versioned_chunk_volume = versioned_chunk['url'].split('/')[2] versioned_chunk_id = versioned_chunk['url'].split('/')[3] versioned_chunk_path = self._chunk_path(versioned_chunk) self._convert_and_check( chunk_volume, chunk_path, { chunk_id: (self.account, self.container, self.path, self.version, self.content_id) }) self._convert_and_check( versioned_chunk_volume, versioned_chunk_path, { versioned_chunk_id: (self.account, self.container, self.path, versioned_meta['version'], versioned_meta['id']) })
def test_rebuild_old_chunk(self): for c in self.chunks: convert_to_old_chunk(self._chunk_path(c), self.account, self.container, self.path, self.version, self.content_id) chunk = random.choice(self.chunks) chunk_volume = chunk['url'].split('/')[2] chunk_id = chunk['url'].split('/')[3] chunk_headers, chunk_stream = self.blob_client.chunk_get( chunk['url'], check_headers=False) os.remove(self._chunk_path(chunk)) chunks_kept = list(self.chunks) chunks_kept.remove(chunk) conf = self.conf.copy() conf['allow_same_rawx'] = True rebuilder = BlobRebuilder(conf, service_id=chunk_volume) rebuilder_worker = rebuilder.create_worker(None, None) rebuilder_worker._process_item( (self.ns, self.cid, self.content_id, chunk_id)) _, new_chunks = self.api.object_locate(self.account, self.container, self.path) new_chunk = list(new_chunks) self.assertEqual(len(new_chunks), len(chunks_kept) + 1) url_kept = [c['url'] for c in chunks_kept] new_chunk = None for c in new_chunks: if c['url'] not in url_kept: self.assertIsNone(new_chunk) new_chunk = c # Cannot check if the URL is different: it may be the same since we # generate predictible chunk IDs. # self.assertNotEqual(chunk['real_url'], new_chunk['real_url']) # self.assertNotEqual(chunk['url'], new_chunk['url']) self.assertEqual(chunk['pos'], new_chunk['pos']) self.assertEqual(chunk['size'], new_chunk['size']) self.assertEqual(chunk['hash'], new_chunk['hash']) new_chunk_headers, new_chunk_stream = self.blob_client.chunk_get( new_chunk['url']) chunk_data = b''.join(chunk_stream) new_chunk_data = b''.join(new_chunk_stream) self.assertEqual(chunk_data, new_chunk_data) fullpath = encode_fullpath(self.account, self.container, self.path, self.version, self.content_id) self.assertEqual(fullpath, new_chunk_headers['full_path']) del new_chunk_headers['full_path'] # Since we generate predictible chunk IDs, they can be equal # self.assertNotEqual(chunk_headers['chunk_id'], # new_chunk_headers['chunk_id']) # We could compare the modification time of the chunks, # but unfortunately they have a 1s resolution... # self.assertNotEqual(chunk_headers['chunk_mtime'], # new_chunk_headers['chunk_mtime']) new_chunk_id = new_chunk['url'].split('/')[3] self.assertEqual(new_chunk_id, new_chunk_headers['chunk_id']) del chunk_headers['chunk_id'] del new_chunk_headers['chunk_id'] self.assertEqual(OIO_VERSION, new_chunk_headers['oio_version']) del chunk_headers['oio_version'] del new_chunk_headers['oio_version'] del chunk_headers['chunk_mtime'] del new_chunk_headers['chunk_mtime'] self.assertEqual(chunk_headers, new_chunk_headers)
def test_read_old_chunk(self): metachunk_hash = md5().hexdigest() trailers = {'x-oio-chunk-meta-metachunk-size': '1', 'x-oio-chunk-meta-metachunk-hash': metachunk_hash} chunkid = random_chunk_id() chunkdata = random_buffer(string.printable, 1).encode('utf-8') chunkurl = self._rawx_url(chunkid) chunkpath = self._chunk_path(chunkid) headers = self._chunk_attr(chunkid, chunkdata) self._check_not_present(chunkurl) resp, _ = self._http_request(chunkurl, 'PUT', chunkdata, headers, trailers) self.assertEqual(201, resp.status) resp1, data1 = self._http_request(chunkurl, 'GET', '', {}) self.assertEqual(200, resp1.status) headers1 = HeadersDict(resp1.getheaders()) with open(chunkpath, 'r') as fd: meta1, _ = read_chunk_metadata(fd, chunkid) convert_to_old_chunk( chunkpath, self.account, self.container, self.content_path, self.content_version, self.content_id) resp2, data2 = self._http_request(chunkurl, 'GET', '', {}) self.assertEqual(200, resp2.status) headers2 = HeadersDict(resp2.getheaders()) with open(chunkpath, 'r') as fd: meta2, _ = read_chunk_metadata(fd, chunkid) self.assertEqual(data1, data2) del headers1[CHUNK_HEADERS['full_path']] del headers1[CHUNK_HEADERS['oio_version']] del headers2[CHUNK_HEADERS['oio_version']] del headers1["date"] del headers2["date"] self.assertDictEqual(headers1, headers2) del meta1['full_path'] del meta1['oio_version'] del meta2['oio_version'] self.assertDictEqual(meta1, meta2) # Copy old chunk copyid = random_chunk_id() copyid = chunkid[:-60] + copyid[-60:] copyurl = self._rawx_url(copyid) copypath = self._chunk_path(copyid) copycontentid = random_id(32) copyheaders = {} copyheaders["Destination"] = copyurl copyheaders['x-oio-chunk-meta-full-path'] = encode_fullpath( "account-snapshot", "container-snapshot", self.content_path+"-snapshot", 1456938361143741, copycontentid) resp, _ = self._http_request(chunkurl, 'COPY', '', copyheaders) self.assertEqual(201, resp.status) resp2, data2 = self._http_request(chunkurl, 'GET', '', {}) self.assertEqual(200, resp2.status) headers2 = HeadersDict(resp2.getheaders()) with open(chunkpath, 'r') as fd: meta2, _ = read_chunk_metadata(fd, chunkid) self.assertEqual(1, len(meta2['links'])) self.assertEqual(copyheaders['x-oio-chunk-meta-full-path'], meta2['links'][copyid]) meta2['links'] = dict() self.assertEqual(data1, data2) del headers2[CHUNK_HEADERS['oio_version']] del headers2["date"] self.assertDictEqual(headers1, headers2) del meta2['oio_version'] self.assertDictEqual(meta1, meta2) resp3, data3 = self._http_request(copyurl, 'GET', '', {}) self.assertEqual(200, resp3.status) headers3 = HeadersDict(resp3.getheaders()) with open(copypath, 'r') as fd: meta3, _ = read_chunk_metadata(fd, copyid) self.assertEqual( copyheaders['x-oio-chunk-meta-full-path'], headers3['x-oio-chunk-meta-full-path']) del headers3['x-oio-chunk-meta-full-path'] self.assertEqual( cid_from_name("account-snapshot", "container-snapshot"), headers3['x-oio-chunk-meta-container-id']) del headers1['x-oio-chunk-meta-container-id'] del headers3['x-oio-chunk-meta-container-id'] self.assertEqual( self.content_path+"-snapshot", unquote(headers3['x-oio-chunk-meta-content-path'])) del headers1['x-oio-chunk-meta-content-path'] del headers3['x-oio-chunk-meta-content-path'] self.assertEqual( '1456938361143741', headers3['x-oio-chunk-meta-content-version']) del headers1['x-oio-chunk-meta-content-version'] del headers3['x-oio-chunk-meta-content-version'] self.assertEqual( copycontentid, headers3['x-oio-chunk-meta-content-id']) del headers1['x-oio-chunk-meta-content-id'] del headers3['x-oio-chunk-meta-content-id'] self.assertEqual(copyid, headers3['x-oio-chunk-meta-chunk-id']) del headers1['x-oio-chunk-meta-chunk-id'] del headers3['x-oio-chunk-meta-chunk-id'] self.assertEqual( copyheaders['x-oio-chunk-meta-full-path'], meta3['full_path']) del meta3['full_path'] self.assertEqual( cid_from_name("account-snapshot", "container-snapshot"), meta3['container_id']) del meta1['container_id'] del meta3['container_id'] self.assertEqual(self.content_path+"-snapshot", meta3['content_path']) del meta1['content_path'] del meta3['content_path'] self.assertEqual('1456938361143741', meta3['content_version']) del meta1['content_version'] del meta3['content_version'] self.assertEqual(copycontentid, meta3['content_id']) del meta1['content_id'] del meta3['content_id'] self.assertEqual(copyid, meta3['chunk_id']) del meta1['chunk_id'] del meta3['chunk_id'] # FIXME the old chunk is invisible self.assertEqual(0, len(meta3['links'])) self.assertEqual(data1, data3) del headers3[CHUNK_HEADERS['oio_version']] del headers3["date"] self.assertDictEqual(headers1, headers3) del meta3['oio_version'] self.assertDictEqual(meta1, meta3)