def test_sort_chunks(self): raw_chunks = [ chunk("AAAA", "0"), chunk("BBBB", "0"), chunk("CCCC", "1"), chunk("DDDD", "1"), chunk("EEEE", "2"), chunk("FFFF", "2"), ] chunks = _sort_chunks(raw_chunks, False) sorted_chunks = { 0: [extend(chunk("AAAA", "0"), {"offset": 0}), extend(chunk("BBBB", "0"), {"offset": 0})], 1: [extend(chunk("CCCC", "1"), {"offset": 32}), extend(chunk("DDDD", "1"), {"offset": 32})], 2: [extend(chunk("EEEE", "2"), {"offset": 64}), extend(chunk("FFFF", "2"), {"offset": 64})] } self.assertEqual(chunks, sorted_chunks) raw_chunks = [ chunk("AAAA", "0.0"), chunk("BBBB", "0.1"), chunk("CCCC", "0.2"), chunk("DDDD", "1.0"), chunk("EEEE", "1.1"), chunk("FFFF", "1.2"), ] chunks = _sort_chunks(raw_chunks, True) sorted_chunks = { 0: [extend(chunk("AAAA", "0.0"), {"num": 0, "offset": 0}), extend(chunk("BBBB", "0.1"), {"num": 1, "offset": 0}), extend(chunk("CCCC", "0.2"), {"num": 2, "offset": 0})], 1: [extend(chunk("DDDD", "1.0"), {"num": 0, "offset": 32}), extend(chunk("EEEE", "1.1"), {"num": 1, "offset": 32}), extend(chunk("FFFF", "1.2"), {"num": 2, "offset": 32})] } self.assertEqual(chunks, sorted_chunks)
def test_download_with_missing_chunks(self): container = random_str(8) obj = random_str(8) expected_data = random_data(10) chunks, _, _, meta = self.storage.object_create_ext(self.account, container, obj_name=obj, data=expected_data, policy='EC') storage_method = STORAGE_METHODS.load(meta['chunk_method']) sorted_chunks = _sort_chunks(chunks, storage_method.ec) for i in range(storage_method.ec_nb_parity): data = b'' for pos in range(len(sorted_chunks)): chunk = random.choice(sorted_chunks[pos]) sorted_chunks[pos].remove(chunk) resp = self._download_metachunk(meta, sorted_chunks[pos]) self.assertEqual(200, resp.status) data += resp.data self.assertEqual(expected_data, data) for pos in range(len(sorted_chunks)): chunk = random.choice(sorted_chunks[pos]) sorted_chunks[pos].remove(chunk) resp = self._download_metachunk(meta, sorted_chunks[pos]) self.assertEqual(500, resp.status)
def _check_obj_policy(self, target, obj_meta, chunks, recurse=0): """ Check that the list of chunks of an object matches the object's storage policy. :returns: the list of errors encountered """ stg_met = STORAGE_METHODS.load(obj_meta['chunk_method']) chunks_by_pos = _sort_chunks(chunks, stg_met.ec) tasks = list() for pos, pchunks in iteritems(chunks_by_pos): tasks.append((pos, self._spawn(self._check_metachunk, target.copy(), stg_met, pos, pchunks, recurse=recurse))) errors = list() for pos, task in tasks: if not task and not self.running: errors.append("Pos %d skipped: checker is exiting" % pos) continue try: errors.extend(task.wait()) except Exception as err: errors.append("Check failed: pos %d: %s" % (pos, err)) return errors
def _process_item(self, item): namespace, account, container, obj_name, version = item if namespace != self.tool.namespace: raise ValueError('Invalid namespace (actual=%s, expected=%s)' % (namespace, self.tool.namespace)) obj_meta, chunks = self.container_client.content_locate( account=account, reference=container, path=obj_name, version=version, properties=False) content_id = obj_meta['id'] exceptions = list() stg_met = STORAGE_METHODS.load(obj_meta['chunk_method']) chunks_by_pos = _sort_chunks(chunks, stg_met.ec) for pos, chunks in iteritems(chunks_by_pos): try: exceptions += self._repair_metachunk(item, content_id, stg_met, pos, chunks) except Exception as exc: # pylint: disable=broad-except self.logger.error('ERROR when repair metachunk %s (%d): %s', self.tool.string_from_item(item), pos, exc) exceptions.append(exc) if exceptions: raise Exception(exceptions) self.container_client.content_touch(account=account, reference=container, path=obj_name, version=version)
def test_download_with_stopped_rawx(self): container = random_str(8) obj = random_str(8) expected_data = random_data(10) chunks, _, _, meta = self.storage.object_create_ext( self.account, container, obj_name=obj, data=expected_data, policy='EC') storage_method = STORAGE_METHODS.load(meta['chunk_method']) sorted_chunks = _sort_chunks(chunks, storage_method.ec) sorted_present_chunks = sorted_chunks.copy() try: for i in range(storage_method.ec_nb_parity): data = '' for pos in range(len(sorted_chunks)): if pos == 0: chunk = random.choice(sorted_present_chunks[pos]) sorted_present_chunks[pos].remove(chunk) gridinit_key = self.service_to_gridinit_key( urlparse(chunk['url']).netloc, 'rawx') self._service(gridinit_key, 'stop') resp = self._download_metachunk(meta, sorted_chunks[pos]) self.assertEqual(200, resp.status) data += resp.data self.assertEqual(expected_data, data) chunk = random.choice(sorted_present_chunks[0]) sorted_present_chunks[0].remove(chunk) gridinit_key = self.service_to_gridinit_key( urlparse(chunk['url']).netloc, 'rawx') self._service(gridinit_key, 'stop') resp = self._download_metachunk(meta, sorted_chunks[pos]) self.assertEqual(500, resp.status) finally: self._service('@rawx', 'start')
def create(self, stream): sysmeta = {} sysmeta['id'] = self.content_id sysmeta['version'] = self.version sysmeta['policy'] = self.stgpol sysmeta['mime_type'] = self.mime_type sysmeta['chunk_method'] = self.chunk_method sysmeta['chunk_size'] = self.metadata['chunk_size'] chunks = _sort_chunks(self.chunks.raw(), self.storage_method.ec) sysmeta['content_path'] = self.path sysmeta['container_id'] = self.container_id headers = {} handler = ECWriteHandler(stream, sysmeta, chunks, self.storage_method, headers=headers) final_chunks, bytes_transferred, content_checksum = handler.stream() # TODO sanity checks self.checksum = content_checksum self._create_object() return final_chunks, bytes_transferred, content_checksum
def create(self, stream): sysmeta = {} sysmeta["id"] = self.content_id sysmeta["version"] = self.version sysmeta["policy"] = self.stgpol sysmeta["mime_type"] = self.mime_type sysmeta["chunk_method"] = self.chunk_method sysmeta["chunk_size"] = self.metadata["chunk-size"] storage_method = STORAGE_METHODS.load(self.chunk_method) chunks = _sort_chunks(self.chunks.raw(), storage_method.ec) sysmeta["content_path"] = self.path sysmeta["container_id"] = self.container_id # TODO deal with headers headers = {} handler = ReplicatedWriteHandler(stream, sysmeta, chunks, storage_method, headers=headers) final_chunks, bytes_transferred, content_checksum = handler.stream() # TODO sanity checks self.checksum = content_checksum.upper() self._create_object() return final_chunks, bytes_transferred, content_checksum
def create(self, stream): sysmeta = {} sysmeta['id'] = self.content_id sysmeta['version'] = self.version sysmeta['policy'] = self.stgpol sysmeta['mime_type'] = self.mime_type sysmeta['chunk_method'] = self.chunk_method storage_method = STORAGE_METHODS.load(self.chunk_method) chunks = _sort_chunks(self.chunks.raw(), storage_method.ec) sysmeta['content_path'] = self.path sysmeta['container_id'] = self.container_id # TODO deal with headers headers = {} handler = ReplicatedWriteHandler(stream, sysmeta, chunks, storage_method, headers=headers) final_chunks, bytes_transferred, content_checksum = handler.stream() # TODO sanity checks self.checksum = content_checksum.upper() self._create_object() return final_chunks, bytes_transferred, content_checksum
def get_object_head_resp(self, req): storage = self.app.storage oio_headers = {REQID_HEADER: self.trans_id} version = req.environ.get('oio.query', {}).get('version') force_master = False while True: try: if self.app.check_state: metadata, chunks = storage.object_locate( self.account_name, self.container_name, self.object_name, version=version, headers=oio_headers, force_master=force_master) else: metadata = storage.object_get_properties( self.account_name, self.container_name, self.object_name, version=version, headers=oio_headers, force_master=force_master) break except (exceptions.NoSuchObject, exceptions.NoSuchContainer): if force_master \ or not self.container_name.endswith('+segments'): # Either the request failed with the master, # or it is not an MPU return HTTPNotFound(request=req) # This part appears in the manifest, so it should be there. # To be sure, we must go check the master # in case of desynchronization. force_master = True if self.app.check_state: storage_method = STORAGE_METHODS.load(metadata['chunk_method']) # TODO(mbo): use new property of STORAGE_METHODS min_chunks = storage_method.ec_nb_data if storage_method.ec else 1 chunks_by_pos = _sort_chunks(chunks, storage_method.ec) for idx, entries in enumerate(chunks_by_pos.iteritems()): if idx != entries[0]: return HTTPBadRequest(request=req) nb_chunks_ok = 0 for entry in entries[1]: try: storage.blob_client.chunk_head(entry['url']) nb_chunks_ok += 1 except exceptions.OioException: pass if nb_chunks_ok >= min_chunks: break else: return HTTPBadRequest(request=req) resp = self.make_object_response(req, metadata) return resp
def test_sort_chunks(self): raw_chunks = [ {"url": "http://1.2.3.4:6000/AAAA", "pos": "0", "size": 32}, {"url": "http://1.2.3.4:6000/BBBB", "pos": "0", "size": 32}, {"url": "http://1.2.3.4:6000/CCCC", "pos": "1", "size": 32}, {"url": "http://1.2.3.4:6000/DDDD", "pos": "1", "size": 32}, {"url": "http://1.2.3.4:6000/EEEE", "pos": "2", "size": 32}, {"url": "http://1.2.3.4:6000/FFFF", "pos": "2", "size": 32}, ] chunks = _sort_chunks(raw_chunks, False) sorted_chunks = { 0: [ {"url": "http://1.2.3.4:6000/AAAA", "pos": "0", "size": 32}, {"url": "http://1.2.3.4:6000/BBBB", "pos": "0", "size": 32}], 1: [ {"url": "http://1.2.3.4:6000/CCCC", "pos": "1", "size": 32}, {"url": "http://1.2.3.4:6000/DDDD", "pos": "1", "size": 32}], 2: [ {"url": "http://1.2.3.4:6000/EEEE", "pos": "2", "size": 32}, {"url": "http://1.2.3.4:6000/FFFF", "pos": "2", "size": 32} ]} self.assertEqual(chunks, sorted_chunks) raw_chunks = [ {"url": "http://1.2.3.4:6000/AAAA", "pos": "0.0", "size": 32}, {"url": "http://1.2.3.4:6000/BBBB", "pos": "0.1", "size": 32}, {"url": "http://1.2.3.4:6000/CCCC", "pos": "0.2", "size": 32}, {"url": "http://1.2.3.4:6000/DDDD", "pos": "1.0", "size": 32}, {"url": "http://1.2.3.4:6000/EEEE", "pos": "1.1", "size": 32}, {"url": "http://1.2.3.4:6000/FFFF", "pos": "1.2", "size": 32}, ] chunks = _sort_chunks(raw_chunks, True) sorted_chunks = { 0: [{"url": "http://1.2.3.4:6000/AAAA", "pos": "0.0", "size": 32, "num": 0}, {"url": "http://1.2.3.4:6000/BBBB", "pos": "0.1", "size": 32, "num": 1}, {"url": "http://1.2.3.4:6000/CCCC", "pos": "0.2", "size": 32, "num": 2}], 1: [{"url": "http://1.2.3.4:6000/DDDD", "pos": "1.0", "size": 32, "num": 0}, {"url": "http://1.2.3.4:6000/EEEE", "pos": "1.1", "size": 32, "num": 1}, {"url": "http://1.2.3.4:6000/FFFF", "pos": "1.2", "size": 32, "num": 2}] } self.assertEqual(chunks, sorted_chunks)
def create(self, stream, **kwargs): sysmeta = self._generate_sysmeta() chunks = _sort_chunks(self.chunks.raw(), self.storage_method.ec) headers = {} handler = ECWriteHandler( stream, sysmeta, chunks, self.storage_method, headers=headers) final_chunks, bytes_transferred, content_checksum = handler.stream() # TODO sanity checks self.checksum = content_checksum self._create_object(**kwargs) return final_chunks, bytes_transferred, content_checksum
def _check_obj_policy(self, target, obj_meta, chunks, recurse=False): """ Check that the list of chunks of an object matches the object's storage policy. """ stg_met = STORAGE_METHODS.load(obj_meta['chunk_method']) chunks_by_pos = _sort_chunks(chunks, stg_met.ec) for pos, chunks in chunks_by_pos.iteritems(): self.pool.spawn_n(self._check_metachunk, target.copy(), obj_meta, stg_met, pos, chunks, recurse=recurse)
def _test_download(self, length): container = random_str(8) obj = random_str(8) expected_data = random_data(length) chunks, _, _, meta = self.storage.object_create_ext( self.account, container, obj_name=obj, data=expected_data, policy='EC') storage_method = STORAGE_METHODS.load(meta['chunk_method']) sorted_chunks = _sort_chunks(chunks, storage_method.ec) data = '' for pos in range(len(sorted_chunks)): resp = self._download_metachunk(meta, sorted_chunks[pos]) self.assertEqual(200, resp.status) data += resp.data self.assertEqual(expected_data, data)
def get_object_head_resp(self, req): storage = self.app.storage oio_headers = {REQID_HEADER: self.trans_id} version = req.environ.get('oio.query', {}).get('version') try: if self.app.check_state: metadata, chunks = storage.object_locate(self.account_name, self.container_name, self.object_name, version=version, headers=oio_headers) else: metadata = storage.object_get_properties(self.account_name, self.container_name, self.object_name, version=version, headers=oio_headers) except (exceptions.NoSuchObject, exceptions.NoSuchContainer): return HTTPNotFound(request=req) if self.app.check_state: storage_method = STORAGE_METHODS.load(metadata['chunk_method']) # TODO(mbo): use new property of STORAGE_METHODS min_chunks = storage_method.ec_nb_data if storage_method.ec else 1 chunks_by_pos = _sort_chunks(chunks, storage_method.ec) for idx, entries in enumerate(chunks_by_pos.iteritems()): if idx != entries[0]: return HTTPBadRequest(request=req) nb_chunks_ok = 0 for entry in entries[1]: try: storage.blob_client.chunk_head(entry['url']) nb_chunks_ok += 1 except exceptions.OioException: pass if nb_chunks_ok >= min_chunks: break else: return HTTPBadRequest(request=req) resp = self.make_object_response(req, metadata) return resp
def create(self, stream, **kwargs): storage_method = STORAGE_METHODS.load(self.chunk_method) sysmeta = self._generate_sysmeta() chunks = _sort_chunks(self.chunks.raw(), storage_method.ec) # TODO deal with headers headers = {} handler = ReplicatedWriteHandler(stream, sysmeta, chunks, storage_method, headers=headers) final_chunks, bytes_transferred, content_checksum = handler.stream() # TODO sanity checks self.checksum = content_checksum.upper() self._create_object(**kwargs) return final_chunks, bytes_transferred, content_checksum
def get_object_head_resp(self, req): storage = self.app.storage oio_headers = {'X-oio-req-id': self.trans_id} version = req.environ.get('oio.query', {}).get('version') try: if self.app.check_state: metadata, chunks = storage.object_locate( self.account_name, self.container_name, self.object_name, version=version, headers=oio_headers) else: metadata = storage.object_get_properties( self.account_name, self.container_name, self.object_name, version=version, headers=oio_headers) except (exceptions.NoSuchObject, exceptions.NoSuchContainer): return HTTPNotFound(request=req) if self.app.check_state: storage_method = STORAGE_METHODS.load(metadata['chunk_method']) # TODO(mbo): use new property of STORAGE_METHODS min_chunks = storage_method.ec_nb_data if storage_method.ec else 1 chunks_by_pos = _sort_chunks(chunks, storage_method.ec) for idx, entries in enumerate(chunks_by_pos.iteritems()): if idx != entries[0]: return HTTPBadRequest(request=req) nb_chunks_ok = 0 for entry in entries[1]: try: storage.blob_client.chunk_head(entry['url']) nb_chunks_ok += 1 except exceptions.OioException: pass if nb_chunks_ok >= min_chunks: break else: return HTTPBadRequest(request=req) resp = self.make_object_response(req, metadata) return resp
def check_obj_policy(self, target, obj_meta, chunks): """ Check that the list of chunks of an object matches the object's storage policy. """ stg_met = STORAGE_METHODS.load(obj_meta['chunk_method']) chunks_by_pos = _sort_chunks(chunks, stg_met.ec) if stg_met.ec: required = stg_met.ec_nb_data + stg_met.ec_nb_parity else: required = stg_met.nb_copy for pos, clist in chunks_by_pos.iteritems(): if len(clist) < required: print(' Missing %d chunks at position %s of %s' % (required - len(clist), pos, target)) if stg_met.ec: subs = {x['num'] for x in clist} for sub in range(required): if sub not in subs: self.write_chunk_error(target, obj_meta, '%d.%d' % (pos, sub)) else: self.write_chunk_error(target, obj_meta, str(pos))
def create(self, stream): sysmeta = {} sysmeta['id'] = self.content_id sysmeta['version'] = self.version sysmeta['policy'] = self.stgpol sysmeta['mime_type'] = self.mime_type sysmeta['chunk_method'] = self.chunk_method sysmeta['chunk_size'] = self.metadata['chunk-size'] chunks = _sort_chunks(self.chunks.raw(), self.storage_method.ec) sysmeta['content_path'] = self.path sysmeta['container_id'] = self.container_id headers = {} handler = ECWriteHandler( stream, sysmeta, chunks, self.storage_method, headers=headers) final_chunks, bytes_transferred, content_checksum = handler.stream() # TODO sanity checks self.checksum = content_checksum self._create_object() return final_chunks, bytes_transferred, content_checksum
def fetch(self): storage_method = STORAGE_METHODS.load(self.chunk_method) chunks = _sort_chunks(self.chunks.raw(), storage_method.ec) stream = fetch_stream(chunks, None, storage_method) return stream
def compute(self, conn, data=None): tarinfo = TarInfo() tarinfo.name = self.name tarinfo.mod = 0o700 tarinfo.uid = 0 tarinfo.gid = 0 tarinfo.type = REGTYPE tarinfo.linkname = "" if self.name == CONTAINER_PROPERTIES: meta = data or conn.container_get_properties(self.acct, self.ref) tarinfo.size = len(json.dumps(meta['properties'], sort_keys=True)) self._filesize = tarinfo.size self._buf = tarinfo.tobuf(format=PAX_FORMAT) return elif self.name == CONTAINER_MANIFEST: tarinfo.size = len(json.dumps(data, sort_keys=True)) self._filesize = tarinfo.size self._buf = tarinfo.tobuf(format=PAX_FORMAT) return entry = conn.object_get_properties(self.acct, self.ref, self.name) properties = entry['properties'] # x-static-large-object if properties.get(SLO, False): tarinfo.size = int(properties.get(SLO_SIZE)) _, slo = conn.object_fetch(self.acct, self.ref, self.name, properties=False) self._slo = json.loads("".join(slo), object_pairs_hook=OrderedDict) self._checksums = {} # format MD5 to share same format as multi chunks object offset = 0 for idx, ck in enumerate(self._slo): self._checksums[idx] = { 'hash': ck['hash'].upper(), 'size': ck['bytes'], 'offset': offset } offset += ck['bytes'] else: tarinfo.size = int(entry['length']) meta, chunks = conn.object_locate(self.acct, self.ref, self.name, properties=False) storage_method = STORAGE_METHODS.load(meta['chunk_method']) chunks = _sort_chunks(chunks, storage_method.ec) for idx in chunks: chunks[idx] = chunks[idx][0] del chunks[idx]['url'] del chunks[idx]['score'] del chunks[idx]['pos'] self._checksums = chunks self._filesize = tarinfo.size # XATTR # do we have to store basic properties like policy, ... ? for key, val in properties.items(): assert isinstance(val, basestring), \ "Invalid type for %s:%s:%s" % (self.acct, self.name, key) if self.slo and key in SLO_HEADERS: continue tarinfo.pax_headers[SCHILY + key] = val tarinfo.pax_headers['mime_type'] = entry['mime_type'] self._buf = tarinfo.tobuf(format=PAX_FORMAT)
def fetch(self): chunks = _sort_chunks(self.chunks.raw(), self.storage_method.ec) headers = {} stream = self._fetch_stream(chunks, self.storage_method, headers) return stream
def fetch(self): chunks = _sort_chunks(self.chunks.raw(), self.storage_method.ec) stream = fetch_stream_ec(chunks, None, self.storage_method) return stream
def fetch(self): storage_method = STORAGE_METHODS.load(self.chunk_method) chunks = _sort_chunks(self.chunks.raw(), storage_method.ec) headers = {} stream = self._fetch_stream(chunks, storage_method, headers) return stream
def test_sort_chunks(self): raw_chunks = [ { "url": "http://1.2.3.4:6000/AAAA", "pos": "0", "size": 32 }, { "url": "http://1.2.3.4:6000/BBBB", "pos": "0", "size": 32 }, { "url": "http://1.2.3.4:6000/CCCC", "pos": "1", "size": 32 }, { "url": "http://1.2.3.4:6000/DDDD", "pos": "1", "size": 32 }, { "url": "http://1.2.3.4:6000/EEEE", "pos": "2", "size": 32 }, { "url": "http://1.2.3.4:6000/FFFF", "pos": "2", "size": 32 }, ] chunks = _sort_chunks(raw_chunks, False) sorted_chunks = { 0: [{ "url": "http://1.2.3.4:6000/AAAA", "pos": "0", "size": 32 }, { "url": "http://1.2.3.4:6000/BBBB", "pos": "0", "size": 32 }], 1: [{ "url": "http://1.2.3.4:6000/CCCC", "pos": "1", "size": 32 }, { "url": "http://1.2.3.4:6000/DDDD", "pos": "1", "size": 32 }], 2: [{ "url": "http://1.2.3.4:6000/EEEE", "pos": "2", "size": 32 }, { "url": "http://1.2.3.4:6000/FFFF", "pos": "2", "size": 32 }] } self.assertEqual(chunks, sorted_chunks) raw_chunks = [ { "url": "http://1.2.3.4:6000/AAAA", "pos": "0.0", "size": 32 }, { "url": "http://1.2.3.4:6000/BBBB", "pos": "0.1", "size": 32 }, { "url": "http://1.2.3.4:6000/CCCC", "pos": "0.2", "size": 32 }, { "url": "http://1.2.3.4:6000/DDDD", "pos": "1.0", "size": 32 }, { "url": "http://1.2.3.4:6000/EEEE", "pos": "1.1", "size": 32 }, { "url": "http://1.2.3.4:6000/FFFF", "pos": "1.2", "size": 32 }, ] chunks = _sort_chunks(raw_chunks, True) sorted_chunks = { 0: [{ "url": "http://1.2.3.4:6000/AAAA", "pos": "0.0", "size": 32, "num": 0 }, { "url": "http://1.2.3.4:6000/BBBB", "pos": "0.1", "size": 32, "num": 1 }, { "url": "http://1.2.3.4:6000/CCCC", "pos": "0.2", "size": 32, "num": 2 }], 1: [{ "url": "http://1.2.3.4:6000/DDDD", "pos": "1.0", "size": 32, "num": 0 }, { "url": "http://1.2.3.4:6000/EEEE", "pos": "1.1", "size": 32, "num": 1 }, { "url": "http://1.2.3.4:6000/FFFF", "pos": "1.2", "size": 32, "num": 2 }] } self.assertEqual(chunks, sorted_chunks)