def dispatch_request(self, req): if req.method == 'PUT': source = req.input_stream size = req.content_length sysmeta = load_sysmeta(req) storage_method = STORAGE_METHODS.load(sysmeta['chunk_method']) if storage_method.ec: if not size: # FIXME: get chunk size from proxy size = (storage_method.ec_nb_data * 10 * storage_method.ec_segment_size) nb_chunks = (storage_method.ec_nb_data + storage_method.ec_nb_parity) pos = req.headers[sys_headers['chunk_pos']] meta_chunk = load_meta_chunk(req, nb_chunks, pos) return self.write_ec_meta_chunk(source, size, storage_method, sysmeta, meta_chunk) elif storage_method.backblaze: nb_chunks = int(sysmeta['content_chunksnb']) meta_chunk = load_meta_chunk(req, nb_chunks) return self.write_backblaze_meta_chunk(source, size, storage_method, sysmeta, meta_chunk) else: # FIXME: check and fix size nb_chunks = int(sysmeta['content_chunksnb']) meta_chunk = load_meta_chunk(req, nb_chunks) return self.write_repli_meta_chunk(source, size, storage_method, sysmeta, meta_chunk) elif req.method == 'GET': chunk_method = req.headers[sys_headers['content_chunkmethod']] storage_method = STORAGE_METHODS.load(chunk_method) if storage_method.ec: nb_chunks = storage_method.ec_nb_data + \ storage_method.ec_nb_parity meta_chunk = load_meta_chunk(req, nb_chunks) meta_chunk[0]['size'] = \ int(req.headers[sys_headers['chunk_size']]) return self.read_ec_meta_chunk(storage_method, meta_chunk) elif storage_method.backblaze: meta_chunk = load_meta_chunk(req, 1) return self.read_backblaze_meta_chunk(req, storage_method, meta_chunk) else: nb_chunks = int(req.headers[sys_headers['content_chunksnb']]) meta_chunk = load_meta_chunk(req, nb_chunks) return self.read_meta_chunk(storage_method, meta_chunk) else: return Response(status=403)
def dispatch_request(self, req): if req.method == 'PUT': source = req.input_stream size = req.content_length sysmeta = load_sysmeta(req) storage_method = STORAGE_METHODS.load(sysmeta['chunk_method']) if storage_method.ec: if not size: # FIXME: get chunk size from proxy size = (storage_method.ec_nb_data * 10 * storage_method.ec_segment_size) nb_chunks = (storage_method.ec_nb_data + storage_method.ec_nb_parity) pos = req.headers[sys_headers['chunk_pos']] meta_chunk = load_meta_chunk(req, nb_chunks, pos) return self.write_ec_meta_chunk(source, size, storage_method, sysmeta, meta_chunk) elif storage_method.backblaze: nb_chunks = int(sysmeta['content_chunksnb']) meta_chunk = load_meta_chunk(req, nb_chunks) return self.write_backblaze_meta_chunk(source, size, storage_method, sysmeta, meta_chunk) else: # FIXME: check and fix size nb_chunks = int(sysmeta['content_chunksnb']) meta_chunk = load_meta_chunk(req, nb_chunks) return self.write_repli_meta_chunk(source, size, storage_method, sysmeta, meta_chunk) elif req.method == 'GET': chunk_method = req.headers[sys_headers['content_chunkmethod']] storage_method = STORAGE_METHODS.load(chunk_method) if storage_method.ec: nb_chunks = storage_method.ec_nb_data + \ storage_method.ec_nb_parity meta_chunk = load_meta_chunk(req, nb_chunks) meta_chunk[0]['size'] = \ int(req.headers[sys_headers['chunk_size']]) return self.read_ec_meta_chunk(storage_method, meta_chunk) elif storage_method.backblaze: meta_chunk = load_meta_chunk(req, 1) return self.read_backblaze_meta_chunk(req, storage_method, meta_chunk) else: nb_chunks = int(req.headers[sys_headers['content_chunksnb']]) meta_chunk = load_meta_chunk(req, nb_chunks) return self.read_meta_chunk(storage_method, meta_chunk) else: return Response(status=403)
def setUp(self): self.chunk_method = 'plain/nb_copy=3' storage_method = STORAGE_METHODS.load(self.chunk_method) self.storage_method = storage_method self.cid = \ '3E32B63E6039FD3104F63BFAE034FADAA823371DD64599A8779BA02B3439A268' self.sysmeta = { 'id': '705229BB7F330500A65C3A49A3116B83', 'version': '1463998577463950', 'chunk_method': self.chunk_method, 'container_id': self.cid, 'policy': 'REPLI3', 'content_path': 'test', 'full_path': ['account/container/test'], 'oio_version': OIO_VERSION, } self._meta_chunk = [ { 'url': 'http://127.0.0.1:7000/0', 'pos': '0' }, { 'url': 'http://127.0.0.1:7001/1', 'pos': '0' }, { 'url': 'http://127.0.0.1:7002/2', 'pos': '0' }, ]
def _get(self, container_id, meta, chunks, account=None, container_name=None, **kwargs): chunk_method = meta['chunk_method'] storage_method = STORAGE_METHODS.load(chunk_method) if not account or not container_name: container_info = self.container_client.container_get_properties( cid=container_id, **kwargs)['system'] if not account: account = container_info['sys.account'] if not container_name: container_name = container_info['sys.user.name'] cls = ECContent if storage_method.ec else PlainContent return cls(self.conf, container_id, meta, chunks, storage_method, account, container_name, container_client=self.container_client, blob_client=self.blob_client, logger=self.logger)
def _content_preparer(self, account, container, obj_name, policy=None, **kwargs): # TODO: optimize by asking more than one metachunk at a time obj_meta, first_body = self.container.content_prepare( account, container, obj_name, size=1, stgpol=policy, autocreate=True, **kwargs) storage_method = STORAGE_METHODS.load(obj_meta['chunk_method']) def _fix_mc_pos(chunks, mc_pos): for chunk in chunks: raw_pos = chunk["pos"].split(".") if storage_method.ec: chunk['num'] = int(raw_pos[1]) chunk["pos"] = "%d.%d" % (mc_pos, chunk['num']) else: chunk["pos"] = str(mc_pos) def _metachunk_preparer(): mc_pos = kwargs.get('meta_pos', 0) _fix_mc_pos(first_body, mc_pos) yield first_body while True: mc_pos += 1 _, next_body = self.container.content_prepare( account, container, obj_name, 1, stgpol=policy, autocreate=True, **kwargs) _fix_mc_pos(next_body, mc_pos) yield next_body return obj_meta, _metachunk_preparer
def _load_handler(self, chunk_method): storage_method = STORAGE_METHODS.load(chunk_method) handler = self.handlers.get(storage_method.type) if not handler: raise OioException("No handler found for chunk method [%s]" % chunk_method) return handler, storage_method
def copy(self, origin, policy=None): if not policy: policy = origin.policy metadata = origin.metadata.copy() new_metadata, chunks = self.container_client.content_prepare( cid=origin.container_id, path=metadata['name'], size=metadata['length'], stgpol=policy) metadata['chunk_method'] = new_metadata['chunk_method'] metadata['chunk_size'] = new_metadata['chunk_size'] # We must use a new content_id since we change the data metadata['id'] = new_metadata['id'] # We may want to keep the same version, but it is denied by meta2 metadata['version'] = int(metadata['version']) + 1 metadata['policy'] = new_metadata['policy'] # FIXME: meta2 does not allow us to set ctime # and thus the object will appear as new. storage_method = STORAGE_METHODS.load(metadata['chunk_method']) cls = ECContent if storage_method.ec else PlainContent return cls(self.conf, origin.container_id, metadata, chunks, storage_method, origin.account, origin.container_name)
def object_fetch(self, account, container, obj, version=None, ranges=None, key_file=None, **kwargs): meta, raw_chunks = self.object_locate(account, container, obj, version=version, **kwargs) chunk_method = meta['chunk_method'] storage_method = STORAGE_METHODS.load(chunk_method) chunks = _sort_chunks(raw_chunks, storage_method.ec) meta['container_id'] = name2cid(account, container).upper() meta['ns'] = self.namespace self._patch_timeouts(kwargs) if storage_method.ec: stream = fetch_stream_ec(chunks, ranges, storage_method, **kwargs) elif storage_method.backblaze: stream = self._fetch_stream_backblaze(meta, chunks, ranges, storage_method, key_file, **kwargs) else: stream = fetch_stream(chunks, ranges, storage_method, **kwargs) return meta, stream
def _on_metachunk_PUT(self, req): source = req.input_stream size = req.content_length sysmeta = load_sysmeta(req) storage_method = STORAGE_METHODS.load(sysmeta['chunk_method']) if storage_method.ec: nb_chunks = (storage_method.ec_nb_data + storage_method.ec_nb_parity) pos = safe_get_header(req, 'chunk_pos') meta_chunk = load_meta_chunk(req, nb_chunks, pos) return self.write_ec_meta_chunk(source, size, storage_method, sysmeta, meta_chunk) elif storage_method.backblaze: nb_chunks = int(sysmeta['content_chunksnb']) meta_chunk = load_meta_chunk(req, nb_chunks) return self.write_backblaze_meta_chunk(source, size, storage_method, sysmeta, meta_chunk) else: # FIXME: check and fix size nb_chunks = int(sysmeta['content_chunksnb']) meta_chunk = load_meta_chunk(req, nb_chunks) return self.write_repli_meta_chunk(source, size, storage_method, sysmeta, meta_chunk)
def create(self, stream): sysmeta = {} sysmeta["id"] = self.content_id sysmeta["version"] = self.version sysmeta["policy"] = self.stgpol sysmeta["mime_type"] = self.mime_type sysmeta["chunk_method"] = self.chunk_method sysmeta["chunk_size"] = self.metadata["chunk-size"] storage_method = STORAGE_METHODS.load(self.chunk_method) chunks = _sort_chunks(self.chunks.raw(), storage_method.ec) sysmeta["content_path"] = self.path sysmeta["container_id"] = self.container_id # TODO deal with headers headers = {} handler = ReplicatedWriteHandler(stream, sysmeta, chunks, storage_method, headers=headers) final_chunks, bytes_transferred, content_checksum = handler.stream() # TODO sanity checks self.checksum = content_checksum.upper() self._create_object() return final_chunks, bytes_transferred, content_checksum
def _load_handler(self, chunk_method): storage_method = STORAGE_METHODS.load(chunk_method) handler = self.handlers.get(storage_method.type) if not handler: raise OioException("No handler found for chunk method [%s]" % chunk_method) return handler, storage_method
def fetch(self): storage_method = STORAGE_METHODS.load(self.chunk_method) chunks = _sort_chunks(self.chunks.raw(), storage_method.ec, logger=self.logger) stream = fetch_stream(chunks, None, storage_method) return stream
def object_fetch(self, account, container, obj, ranges=None, headers=None, key_file=None): if not headers: headers = dict() if 'X-oio-req-id' not in headers: headers['X-oio-req-id'] = utils.request_id() meta, raw_chunks = self.object_locate(account, container, obj, headers=headers) chunk_method = meta['chunk_method'] storage_method = STORAGE_METHODS.load(chunk_method) chunks = _sort_chunks(raw_chunks, storage_method.ec) meta['container_id'] = utils.name2cid(account, container).upper() meta['ns'] = self.namespace if storage_method.ec: stream = self._fetch_stream_ec(meta, chunks, ranges, storage_method, headers) elif storage_method.backblaze: stream = self._fetch_stream_backblaze(meta, chunks, ranges, storage_method, key_file) else: stream = self._fetch_stream(meta, chunks, ranges, storage_method, headers) return meta, stream
def _on_metachunk_GET(self, req): chunk_method = safe_get_header(req, 'content_chunkmethod') storage_method = STORAGE_METHODS.load(chunk_method) if req.range and req.range.ranges: # Werkzeug give us non-inclusive ranges, but we use inclusive start = req.range.ranges[0][0] if req.range.ranges[0][1] is not None: end = req.range.ranges[0][1] - 1 else: end = None my_range = (start, end) else: my_range = (None, None) if storage_method.ec: nb_chunks = storage_method.ec_nb_data + \ storage_method.ec_nb_parity meta_chunk = load_meta_chunk(req, nb_chunks) meta_chunk[0]['size'] = \ int(safe_get_header(req, 'chunk_size')) return self.read_ec_meta_chunk(storage_method, meta_chunk, my_range[0], my_range[1]) elif storage_method.backblaze: meta_chunk = load_meta_chunk(req, 1) return self.read_backblaze_meta_chunk(req, storage_method, meta_chunk, my_range[0], my_range[1]) else: nb_chunks = int(safe_get_header(req, 'content_chunksnb')) meta_chunk = load_meta_chunk(req, nb_chunks) headers = dict() if req.range and req.range.ranges: headers['Range'] = req.range.to_header() return self.read_meta_chunk(storage_method, meta_chunk, headers)
def test_object_fetch_perfdata(self): perfdata = dict() container = random_str(8) obj = random_str(8) self.api.object_create(self.account, container, obj_name=obj, data=obj) meta, chunks = self.api.object_locate(self.account, container, obj) stg_method = STORAGE_METHODS.load(meta['chunk_method']) _, stream = self.api.object_fetch(self.account, container, obj, perfdata=perfdata) self.assertIn('proxy', perfdata) self.assertIn('resolve', perfdata['proxy']) self.assertIn('meta2', perfdata['proxy']) self.assertIn('overall', perfdata['proxy']) self.assertNotIn('ttfb', perfdata) self.assertNotIn('ttlb', perfdata) buf = b''.join(stream) self.assertEqual(obj, buf) self.assertIn('rawx', perfdata) if stg_method.ec: self.assertIn('ec', perfdata['rawx']) nb_chunks_to_read = 0 for chunk in chunks: if chunk['url'] in perfdata['rawx']: nb_chunks_to_read += 1 self.assertLessEqual(stg_method.min_chunks_to_read, nb_chunks_to_read) self.assertIn('overall', perfdata['rawx']) self.assertIn('ttfb', perfdata) self.assertIn('ttlb', perfdata) self.api.object_delete(self.account, container, obj)
def _process_item(self, item): namespace, account, container, obj_name, version = item if namespace != self.tool.namespace: raise ValueError('Invalid namespace (actual=%s, expected=%s)' % (namespace, self.tool.namespace)) obj_meta, chunks = self.container_client.content_locate( account=account, reference=container, path=obj_name, version=version, properties=False) content_id = obj_meta['id'] exceptions = list() stg_met = STORAGE_METHODS.load(obj_meta['chunk_method']) chunks_by_pos = _sort_chunks(chunks, stg_met.ec) for pos, chunks in iteritems(chunks_by_pos): try: exceptions += self._repair_metachunk(item, content_id, stg_met, pos, chunks) except Exception as exc: # pylint: disable=broad-except self.logger.error('ERROR when repair metachunk %s (%d): %s', self.tool.string_from_item(item), pos, exc) exceptions.append(exc) if exceptions: raise Exception(exceptions) self.container_client.content_touch(account=account, reference=container, path=obj_name, version=version)
def test_download_with_missing_chunks(self): container = random_str(8) obj = random_str(8) expected_data = random_data(10) chunks, _, _, meta = self.storage.object_create_ext(self.account, container, obj_name=obj, data=expected_data, policy='EC') storage_method = STORAGE_METHODS.load(meta['chunk_method']) sorted_chunks = _sort_chunks(chunks, storage_method.ec) for i in range(storage_method.ec_nb_parity): data = b'' for pos in range(len(sorted_chunks)): chunk = random.choice(sorted_chunks[pos]) sorted_chunks[pos].remove(chunk) resp = self._download_metachunk(meta, sorted_chunks[pos]) self.assertEqual(200, resp.status) data += resp.data self.assertEqual(expected_data, data) for pos in range(len(sorted_chunks)): chunk = random.choice(sorted_chunks[pos]) sorted_chunks[pos].remove(chunk) resp = self._download_metachunk(meta, sorted_chunks[pos]) self.assertEqual(500, resp.status)
def _content_preparer(self, account, container, obj_name, policy=None, headers=None): # TODO: optimize by asking more than one metachunk at a time resp_headers, first_body = self._content_prepare(account, container, obj_name, 1, policy, headers) storage_method = STORAGE_METHODS.load(resp_headers[object_headers["chunk_method"]]) def _fix_mc_pos(chunks, mc_pos): for chunk in chunks: raw_pos = chunk["pos"].split(".") if storage_method.ec: chunk["num"] = int(raw_pos[1]) chunk["pos"] = "%d.%d" % (mc_pos, chunk["num"]) else: chunk["pos"] = str(mc_pos) def _metachunk_preparer(): mc_pos = 0 _fix_mc_pos(first_body, mc_pos) yield first_body while True: mc_pos += 1 _, next_body = self._content_prepare(account, container, obj_name, 1, policy, headers) _fix_mc_pos(next_body, mc_pos) yield next_body return resp_headers, _metachunk_preparer
def test_download_with_stopped_rawx(self): container = random_str(8) obj = random_str(8) expected_data = random_data(10) chunks, _, _, meta = self.storage.object_create_ext( self.account, container, obj_name=obj, data=expected_data, policy='EC') storage_method = STORAGE_METHODS.load(meta['chunk_method']) sorted_chunks = _sort_chunks(chunks, storage_method.ec) sorted_present_chunks = sorted_chunks.copy() try: for i in range(storage_method.ec_nb_parity): data = '' for pos in range(len(sorted_chunks)): if pos == 0: chunk = random.choice(sorted_present_chunks[pos]) sorted_present_chunks[pos].remove(chunk) gridinit_key = self.service_to_gridinit_key( urlparse(chunk['url']).netloc, 'rawx') self._service(gridinit_key, 'stop') resp = self._download_metachunk(meta, sorted_chunks[pos]) self.assertEqual(200, resp.status) data += resp.data self.assertEqual(expected_data, data) chunk = random.choice(sorted_present_chunks[0]) sorted_present_chunks[0].remove(chunk) gridinit_key = self.service_to_gridinit_key( urlparse(chunk['url']).netloc, 'rawx') self._service(gridinit_key, 'stop') resp = self._download_metachunk(meta, sorted_chunks[pos]) self.assertEqual(500, resp.status) finally: self._service('@rawx', 'start')
def get(self, container_id, content_id, account=None, container_name=None): try: meta, chunks = self.container_client.content_locate( cid=container_id, content=content_id) except NotFound: raise ContentNotFound("Content %s/%s not found" % (container_id, content_id)) chunk_method = meta['chunk_method'] storage_method = STORAGE_METHODS.load(chunk_method) if not account or not container_name: container_info = self.container_client.container_get_properties( cid=container_id)['system'] if not account: account = container_info['sys.account'] if not container_name: container_name = container_info['sys.user.name'] cls = ECContent if storage_method.ec else PlainContent return cls(self.conf, container_id, meta, chunks, storage_method, account, container_name, container_client=self.container_client)
def create(self, stream): sysmeta = {} sysmeta['id'] = self.content_id sysmeta['version'] = self.version sysmeta['policy'] = self.stgpol sysmeta['mime_type'] = self.mime_type sysmeta['chunk_method'] = self.chunk_method storage_method = STORAGE_METHODS.load(self.chunk_method) chunks = _sort_chunks(self.chunks.raw(), storage_method.ec) sysmeta['content_path'] = self.path sysmeta['container_id'] = self.container_id # TODO deal with headers headers = {} handler = ReplicatedWriteHandler(stream, sysmeta, chunks, storage_method, headers=headers) final_chunks, bytes_transferred, content_checksum = handler.stream() # TODO sanity checks self.checksum = content_checksum.upper() self._create_object() return final_chunks, bytes_transferred, content_checksum
def new(self, container_id, path, size, policy, account=None, container_name=None, **kwargs): meta, chunks = self.container_client.content_prepare(cid=container_id, path=path, size=size, stgpol=policy, **kwargs) chunk_method = meta['chunk_method'] storage_method = STORAGE_METHODS.load(chunk_method) if not account or not container_name: container_info = self.container_client.container_get_properties( cid=container_id)['system'] if not account: account = container_info['sys.account'] if not container_name: container_name = container_info['sys.user.name'] cls = ECContent if storage_method.ec else PlainContent return cls(self.conf, container_id, meta, chunks, storage_method, account, container_name)
def _check_obj_policy(self, target, obj_meta, chunks, recurse=0): """ Check that the list of chunks of an object matches the object's storage policy. :returns: the list of errors encountered """ stg_met = STORAGE_METHODS.load(obj_meta['chunk_method']) chunks_by_pos = _sort_chunks(chunks, stg_met.ec) tasks = list() for pos, pchunks in iteritems(chunks_by_pos): tasks.append((pos, self._spawn(self._check_metachunk, target.copy(), stg_met, pos, pchunks, recurse=recurse))) errors = list() for pos, task in tasks: if not task and not self.running: errors.append("Pos %d skipped: checker is exiting" % pos) continue try: errors.extend(task.wait()) except Exception as err: errors.append("Check failed: pos %d: %s" % (pos, err)) return errors
def __init__(self, container_client, account, container, obj_name, policy=None, **kwargs): self.account = account self.container = container self.obj_name = obj_name self.policy = policy self.container_client = container_client self.extra_kwargs = kwargs # TODO: optimize by asking more than one metachunk at a time self.obj_meta, self.first_body = self.container_client.content_prepare( account, container, obj_name, size=1, stgpol=policy, autocreate=True, **kwargs) self.stg_method = STORAGE_METHODS.load(self.obj_meta['chunk_method']) self._all_chunks = list()
def get_object_head_resp(self, req): storage = self.app.storage oio_headers = {REQID_HEADER: self.trans_id} version = req.environ.get('oio.query', {}).get('version') force_master = False while True: try: if self.app.check_state: metadata, chunks = storage.object_locate( self.account_name, self.container_name, self.object_name, version=version, headers=oio_headers, force_master=force_master) else: metadata = storage.object_get_properties( self.account_name, self.container_name, self.object_name, version=version, headers=oio_headers, force_master=force_master) break except (exceptions.NoSuchObject, exceptions.NoSuchContainer): if force_master \ or not self.container_name.endswith('+segments'): # Either the request failed with the master, # or it is not an MPU return HTTPNotFound(request=req) # This part appears in the manifest, so it should be there. # To be sure, we must go check the master # in case of desynchronization. force_master = True if self.app.check_state: storage_method = STORAGE_METHODS.load(metadata['chunk_method']) # TODO(mbo): use new property of STORAGE_METHODS min_chunks = storage_method.ec_nb_data if storage_method.ec else 1 chunks_by_pos = _sort_chunks(chunks, storage_method.ec) for idx, entries in enumerate(chunks_by_pos.iteritems()): if idx != entries[0]: return HTTPBadRequest(request=req) nb_chunks_ok = 0 for entry in entries[1]: try: storage.blob_client.chunk_head(entry['url']) nb_chunks_ok += 1 except exceptions.OioException: pass if nb_chunks_ok >= min_chunks: break else: return HTTPBadRequest(request=req) resp = self.make_object_response(req, metadata) return resp
def process(self, env, cb): event = Event(env) if event.event_type == EventTypes.CONTENT_DELETED: pile = GreenPile(PARALLEL_CHUNKS_DELETE) url = event.env.get('url') chunks = [] content_headers = None for item in event.data: if item.get('type') == 'chunks': chunks.append(item) if item.get("type") == 'contents_headers': content_headers = item if len(chunks): def delete_chunk(chunk): resp = None p = urlparse(chunk['id']) try: with Timeout(CHUNK_TIMEOUT): conn = http_connect(p.netloc, 'DELETE', p.path) resp = conn.getresponse() resp.chunk = chunk except (Exception, Timeout) as e: self.logger.warn('error while deleting chunk %s "%s"', chunk['id'], str(e.message)) return resp def delete_chunk_backblaze(chunks, url, storage_method): meta = {} meta['container_id'] = url['id'] chunk_list = [] for chunk in chunks: chunk['url'] = chunk['id'] chunk_list.append(chunk) key_file = self.conf.get('key_file') backblaze_info = BackblazeUtils.get_credentials( storage_method, key_file) try: BackblazeDeleteHandler(meta, chunk_list, backblaze_info).delete() except OioException as e: self.logger.warn('delete failed: %s' % str(e)) chunk_method = content_headers['chunk-method'] # don't load storage method other than backblaze if chunk_method.startswith('backblaze'): storage_method = STORAGE_METHODS.load(chunk_method) delete_chunk_backblaze(chunks, url, storage_method) return self.app(env, cb) for chunk in chunks: pile.spawn(delete_chunk, chunk) resps = [resp for resp in pile if resp] for resp in resps: if resp.status != 204: self.logger.warn('failed to delete chunk %s (HTTP %s)', resp.chunk['id'], resp.status) return self.app(env, cb)
def setUp(self): self.chunk_method = 'ec/algo=liberasurecode_rs_vand,k=6,m=2' storage_method = STORAGE_METHODS.load(self.chunk_method) self.storage_method = storage_method self.cid = \ '3E32B63E6039FD3104F63BFAE034FADAA823371DD64599A8779BA02B3439A268' self.sysmeta = { 'id': '705229BB7F330500A65C3A49A3116B83', 'version': '1463998577463950', 'chunk_method': self.chunk_method, 'container_id': self.cid, 'policy': 'EC', 'content_path': 'test', 'full_path': ['account/container/test'], 'oio_version': OIO_VERSION } self._meta_chunk = [ { 'url': 'http://127.0.0.1:7000/0', 'pos': '0.0', 'num': 0 }, { 'url': 'http://127.0.0.1:7001/1', 'pos': '0.1', 'num': 1 }, { 'url': 'http://127.0.0.1:7002/2', 'pos': '0.2', 'num': 2 }, { 'url': 'http://127.0.0.1:7003/3', 'pos': '0.3', 'num': 3 }, { 'url': 'http://127.0.0.1:7004/4', 'pos': '0.4', 'num': 4 }, { 'url': 'http://127.0.0.1:7005/5', 'pos': '0.5', 'num': 5 }, { 'url': 'http://127.0.0.1:7006/6', 'pos': '0.6', 'num': 6 }, { 'url': 'http://127.0.0.1:7007/7', 'pos': '0.7', 'num': 7 }, ]
def object_truncate(self, account, container, obj, version=None, size=None, **kwargs): """ Truncate object at specified size. Only shrink is supported. A download may occur if size is not on chunk boundaries. :param account: name of the account in which the object is stored :param container: name of the container in which the object is stored :param obj: name of the object to query :param version: version of the object to query :param size: new size of object """ # code copied from object_fetch (should be factorized !) meta, raw_chunks = self.object_locate(account, container, obj, version=version, **kwargs) chunk_method = meta['chunk_method'] storage_method = STORAGE_METHODS.load(chunk_method) chunks = _sort_chunks(raw_chunks, storage_method.ec) for pos in sorted(chunks.keys()): chunk = chunks[pos][0] if (size >= chunk['offset'] and size <= chunk['offset'] + chunk['size']): break else: raise exc.OioException("No chunk found at position %d" % size) if chunk['offset'] != size: # retrieve partial chunk ret = self.object_fetch(account, container, obj, version=version, ranges=[(chunk['offset'], size - 1)]) # TODO implement a proper object_update pos = int(chunk['pos'].split('.')[0]) self.object_create(account, container, obj_name=obj, data=ret[1], meta_pos=pos, content_id=meta['id']) return self.container.content_truncate(account, container, obj, version=version, size=size, **kwargs)
def _object_create(self, account, container, obj_name, source, sysmeta, metadata=None, policy=None, headers=None, application_key=None): meta, raw_chunks = self._content_prepare( account, container, obj_name, sysmeta['content_length'], policy=policy, headers=headers) sysmeta['chunk_size'] = int(meta['X-oio-ns-chunk-size']) sysmeta['id'] = meta[object_headers['id']] sysmeta['version'] = meta[object_headers['version']] sysmeta['policy'] = meta[object_headers['policy']] sysmeta['mime_type'] = meta[object_headers['mime_type']] sysmeta['chunk_method'] = meta[object_headers['chunk_method']] storage_method = STORAGE_METHODS.load(sysmeta['chunk_method']) chunks = _sort_chunks(raw_chunks, storage_method.ec) sysmeta['content_path'] = obj_name sysmeta['container_id'] = utils.name2cid(account, container) sysmeta['ns'] = self.namespace if storage_method.ec: handler = ECWriteHandler(source, sysmeta, chunks, storage_method, headers=headers) elif storage_method.backblaze: backblaze_info = self._put_meta_backblaze(storage_method, application_key) handler = BackblazeWriteHandler(source, sysmeta, chunks, storage_method, headers, backblaze_info) else: handler = ReplicatedWriteHandler(source, sysmeta, chunks, storage_method, headers=headers) final_chunks, bytes_transferred, content_checksum = handler.stream() etag = sysmeta['etag'] if etag and etag.lower() != content_checksum.lower(): raise exc.EtagMismatch( "given etag %s != computed %s" % (etag, content_checksum)) sysmeta['etag'] = content_checksum h = {} h[object_headers['size']] = bytes_transferred h[object_headers['hash']] = sysmeta['etag'] h[object_headers['version']] = sysmeta['version'] h[object_headers['id']] = sysmeta['id'] h[object_headers['policy']] = sysmeta['policy'] h[object_headers['mime_type']] = sysmeta['mime_type'] h[object_headers['chunk_method']] = sysmeta['chunk_method'] if metadata: for k, v in metadata.iteritems(): h['%sx-%s' % (constants.OBJECT_METADATA_PREFIX, k)] = v m, body = self._content_create(account, container, obj_name, final_chunks, headers=h) return final_chunks, bytes_transferred, content_checksum
def new(self, container_id, path, size, policy): meta, chunks = self.container_client.content_prepare( cid=container_id, path=path, size=size, stgpol=policy) chunk_method = meta['chunk_method'] storage_method = STORAGE_METHODS.load(chunk_method) cls = ECContent if storage_method.ec else PlainContent return cls(self.conf, container_id, meta, chunks, storage_method)
def new(self, container_id, path, size, policy): meta, chunks = self.container_client.content_prepare( cid=container_id, path=path, size=size, stgpol=policy) chunk_method = meta['chunk_method'] storage_method = STORAGE_METHODS.load(chunk_method) cls = ECContent if storage_method.ec else PlainContent return cls(self.conf, container_id, meta, chunks, storage_method)
def get(self, container_id, content_id): try: meta, chunks = self.container_client.content_locate( cid=container_id, content=content_id) except NotFound: raise ContentNotFound("Content %s/%s not found" % (container_id, content_id)) chunk_method = meta['chunk_method'] storage_method = STORAGE_METHODS.load(chunk_method) cls = ECContent if storage_method.ec else PlainContent return cls(self.conf, container_id, meta, chunks, storage_method)
def get(self, container_id, content_id): try: meta, chunks = self.container_client.content_show( cid=container_id, content=content_id) except NotFound: raise ContentNotFound("Content %s/%s not found" % (container_id, content_id)) chunk_method = meta['chunk_method'] storage_method = STORAGE_METHODS.load(chunk_method) cls = ECContent if storage_method.ec else PlainContent return cls(self.conf, container_id, meta, chunks, storage_method)
def _object_create(self, account, container, obj_name, source, sysmeta, properties=None, policy=None, key_file=None, **kwargs): self._patch_timeouts(kwargs) obj_meta, chunk_prep = self._content_preparer( account, container, obj_name, policy=policy, **kwargs) obj_meta.update(sysmeta) obj_meta['content_path'] = obj_name obj_meta['container_id'] = cid_from_name(account, container).upper() obj_meta['ns'] = self.namespace obj_meta['full_path'] = self._generate_fullpath(account, container, obj_name, obj_meta['version']) obj_meta['oio_version'] = (obj_meta.get('oio_version') or OIO_VERSION) # XXX content_id is necessary to update an existing object kwargs['content_id'] = kwargs.get('content_id', obj_meta['id']) storage_method = STORAGE_METHODS.load(obj_meta['chunk_method']) if storage_method.ec: handler = ECWriteHandler( source, obj_meta, chunk_prep, storage_method, **kwargs) elif storage_method.backblaze: backblaze_info = self._b2_credentials(storage_method, key_file) handler = BackblazeWriteHandler( source, obj_meta, chunk_prep, storage_method, backblaze_info, **kwargs) else: handler = ReplicatedWriteHandler( source, obj_meta, chunk_prep, storage_method, **kwargs) final_chunks, bytes_transferred, content_checksum = handler.stream() etag = obj_meta.get('etag') if etag and etag.lower() != content_checksum.lower(): raise exc.EtagMismatch( "given etag %s != computed %s" % (etag, content_checksum)) obj_meta['etag'] = content_checksum data = {'chunks': final_chunks, 'properties': properties or {}} # FIXME: we may just pass **obj_meta self.container.content_create( account, container, obj_name, size=bytes_transferred, checksum=content_checksum, data=data, stgpol=obj_meta['policy'], version=obj_meta['version'], mime_type=obj_meta['mime_type'], chunk_method=obj_meta['chunk_method'], **kwargs) return final_chunks, bytes_transferred, content_checksum
def chunk_put(self, url, meta, data, **kwargs): if not hasattr(data, 'read'): data = utils.GeneratorReader(data) chunk = {'url': url, 'pos': meta['chunk_pos']} # FIXME: ugly chunk_method = meta.get('chunk_method', meta.get('content_chunkmethod')) storage_method = STORAGE_METHODS.load(chunk_method) checksum = meta['metachunk_hash' if storage_method.ec else 'chunk_hash'] writer = ReplicatedChunkWriteHandler( meta, [chunk], FakeChecksum(checksum), storage_method, quorum=1) writer.stream(data, None)
def object_fetch(self, account, container, obj, ranges=None, headers=None): meta, raw_chunks = self.object_analyze( account, container, obj, headers=headers) chunk_method = meta['chunk-method'] storage_method = STORAGE_METHODS.load(chunk_method) chunks = _sort_chunks(raw_chunks, storage_method.ec) if storage_method.ec: stream = self._fetch_stream_ec(meta, chunks, ranges, storage_method, headers) else: stream = self._fetch_stream(meta, chunks, ranges, storage_method, headers) return meta, stream
def chunk_put(self, url, meta, data, **kwargs): if not hasattr(data, 'read'): data = utils.GeneratorIO(data) chunk = {'url': self.resolve_url(url), 'pos': meta['chunk_pos']} # FIXME: ugly chunk_method = meta.get('chunk_method', meta.get('content_chunkmethod')) storage_method = STORAGE_METHODS.load(chunk_method) checksum = meta['metachunk_hash' if storage_method.ec else 'chunk_hash'] writer = ReplicatedMetachunkWriter( meta, [chunk], FakeChecksum(checksum), storage_method, quorum=1, perfdata=self.perfdata) writer.stream(data, None)
def _check_obj_policy(self, target, obj_meta, chunks, recurse=False): """ Check that the list of chunks of an object matches the object's storage policy. """ stg_met = STORAGE_METHODS.load(obj_meta['chunk_method']) chunks_by_pos = _sort_chunks(chunks, stg_met.ec) for pos, chunks in chunks_by_pos.iteritems(): self.pool.spawn_n(self._check_metachunk, target.copy(), obj_meta, stg_met, pos, chunks, recurse=recurse)
def object_delete(self, account, container, obj, headers={}, application_key=None): uri = self._make_uri('content/delete') params = self._make_params(account, container, obj) meta, raw_chunks = self.object_analyze( account, container, obj, headers=headers) if meta: chunk_method = meta['chunk-method'] storage_method = STORAGE_METHODS.load(chunk_method) meta['ns'] = self.namespace meta['container_id'] = utils.name2cid(account, container) chunks = _sort_chunks(raw_chunks, storage_method.ec) if storage_method.backblaze: backblaze_info = self._put_meta_backblaze(storage_method, application_key) BackblazeDeleteHandler(meta, chunks, backblaze_info).delete() resp, resp_body = self._request( 'POST', uri, params=params, headers=headers)
def object_fetch(self, account, container, obj, ranges=None, headers=None, key_file=None): if not headers: headers = dict() if "X-oio-req-id" not in headers: headers["X-oio-req-id"] = utils.request_id() meta, raw_chunks = self.object_analyze(account, container, obj, headers=headers) chunk_method = meta["chunk-method"] storage_method = STORAGE_METHODS.load(chunk_method) chunks = _sort_chunks(raw_chunks, storage_method.ec) meta["container_id"] = utils.name2cid(account, container).upper() meta["ns"] = self.namespace if storage_method.ec: stream = self._fetch_stream_ec(meta, chunks, ranges, storage_method, headers) elif storage_method.backblaze: stream = self._fetch_stream_backblaze(meta, chunks, ranges, storage_method, key_file) else: stream = self._fetch_stream(meta, chunks, ranges, storage_method, headers) return meta, stream
def object_fetch(self, account, container, obj, ranges=None, headers=None, key_file=None): meta, raw_chunks = self.object_analyze( account, container, obj, headers=headers) chunk_method = meta['chunk-method'] storage_method = STORAGE_METHODS.load(chunk_method) chunks = _sort_chunks(raw_chunks, storage_method.ec) meta['container_id'] = utils.name2cid(account, container).upper() meta['ns'] = self.namespace if storage_method.ec: stream = self._fetch_stream_ec(meta, chunks, ranges, storage_method, headers) elif storage_method.backblaze: stream = self._fetch_stream_backblaze(meta, chunks, ranges, storage_method, key_file) else: stream = self._fetch_stream(meta, chunks, ranges, storage_method, headers) return meta, stream
def _object_create( self, account, container, obj_name, source, sysmeta, metadata=None, policy=None, headers=None, key_file=None ): meta, chunk_prep = self._content_preparer(account, container, obj_name, policy=policy, headers=headers) sysmeta["chunk_size"] = int(meta["X-oio-ns-chunk-size"]) sysmeta["id"] = meta[object_headers["id"]] sysmeta["version"] = meta[object_headers["version"]] sysmeta["policy"] = meta[object_headers["policy"]] if not sysmeta.get("mime_type"): sysmeta["mime_type"] = meta[object_headers["mime_type"]] sysmeta["chunk_method"] = meta[object_headers["chunk_method"]] sysmeta["content_path"] = obj_name sysmeta["container_id"] = utils.name2cid(account, container).upper() sysmeta["ns"] = self.namespace storage_method = STORAGE_METHODS.load(sysmeta["chunk_method"]) if storage_method.ec: handler = ECWriteHandler(source, sysmeta, chunk_prep, storage_method, headers=headers) elif storage_method.backblaze: backblaze_info = self._b2_credentials(storage_method, key_file) handler = BackblazeWriteHandler(source, sysmeta, chunk_prep, storage_method, headers, backblaze_info) else: handler = ReplicatedWriteHandler(source, sysmeta, chunk_prep, storage_method, headers=headers) final_chunks, bytes_transferred, content_checksum = handler.stream() etag = sysmeta["etag"] if etag and etag.lower() != content_checksum.lower(): raise exc.EtagMismatch("given etag %s != computed %s" % (etag, content_checksum)) sysmeta["etag"] = content_checksum h = dict() h.update(headers) h[object_headers["size"]] = bytes_transferred h[object_headers["hash"]] = sysmeta["etag"] h[object_headers["version"]] = sysmeta["version"] h[object_headers["id"]] = sysmeta["id"] h[object_headers["policy"]] = sysmeta["policy"] h[object_headers["mime_type"]] = sysmeta["mime_type"] h[object_headers["chunk_method"]] = sysmeta["chunk_method"] m, body = self._content_create(account, container, obj_name, final_chunks, metadata=metadata, headers=h) return final_chunks, bytes_transferred, content_checksum
def setUp(self): self.chunk_method = 'plain/nb_copy=3' storage_method = STORAGE_METHODS.load(self.chunk_method) self.storage_method = storage_method self.cid = \ '3E32B63E6039FD3104F63BFAE034FADAA823371DD64599A8779BA02B3439A268' self.sysmeta = { 'id': '705229BB7F330500A65C3A49A3116B83', 'version': '1463998577463950', 'chunk_method': self.chunk_method, 'container_id': self.cid, 'policy': 'REPLI3', 'content_path': 'test', } self._meta_chunk = [ {'url': 'http://127.0.0.1:7000/0', 'pos': '0'}, {'url': 'http://127.0.0.1:7001/1', 'pos': '0'}, {'url': 'http://127.0.0.1:7002/2', 'pos': '0'}, ]
def get_object_head_resp(self, req): storage = self.app.storage oio_headers = {'X-oio-req-id': self.trans_id} version = req.environ.get('oio.query', {}).get('version') try: if self.app.check_state: metadata, chunks = storage.object_locate( self.account_name, self.container_name, self.object_name, version=version, headers=oio_headers) else: metadata = storage.object_get_properties( self.account_name, self.container_name, self.object_name, version=version, headers=oio_headers) except (exceptions.NoSuchObject, exceptions.NoSuchContainer): return HTTPNotFound(request=req) if self.app.check_state: storage_method = STORAGE_METHODS.load(metadata['chunk_method']) # TODO(mbo): use new property of STORAGE_METHODS min_chunks = storage_method.ec_nb_data if storage_method.ec else 1 chunks_by_pos = _sort_chunks(chunks, storage_method.ec) for idx, entries in enumerate(chunks_by_pos.iteritems()): if idx != entries[0]: return HTTPBadRequest(request=req) nb_chunks_ok = 0 for entry in entries[1]: try: storage.blob_client.chunk_head(entry['url']) nb_chunks_ok += 1 except exceptions.OioException: pass if nb_chunks_ok >= min_chunks: break else: return HTTPBadRequest(request=req) resp = self.make_object_response(req, metadata) return resp
def setUp(self): self.chunk_method = 'ec/algo=liberasurecode_rs_vand,k=6,m=2' storage_method = STORAGE_METHODS.load(self.chunk_method) self.storage_method = storage_method self.cid = \ '3E32B63E6039FD3104F63BFAE034FADAA823371DD64599A8779BA02B3439A268' self.sysmeta = { 'id': '705229BB7F330500A65C3A49A3116B83', 'version': '1463998577463950', 'chunk_method': self.chunk_method, 'container_id': self.cid, 'policy': 'EC', 'content_path': 'test', } self._meta_chunk = [ {'url': 'http://127.0.0.1:7000/0', 'pos': '0.0', 'num': 0}, {'url': 'http://127.0.0.1:7001/1', 'pos': '0.1', 'num': 1}, {'url': 'http://127.0.0.1:7002/2', 'pos': '0.2', 'num': 2}, {'url': 'http://127.0.0.1:7003/3', 'pos': '0.3', 'num': 3}, {'url': 'http://127.0.0.1:7004/4', 'pos': '0.4', 'num': 4}, {'url': 'http://127.0.0.1:7005/5', 'pos': '0.5', 'num': 5}, {'url': 'http://127.0.0.1:7006/6', 'pos': '0.6', 'num': 6}, {'url': 'http://127.0.0.1:7007/7', 'pos': '0.7', 'num': 7}, ]
def fetch(self): storage_method = STORAGE_METHODS.load(self.chunk_method) chunks = _sort_chunks(self.chunks.raw(), storage_method.ec) headers = {} stream = self._fetch_stream(chunks, storage_method, headers) return stream
def process(self, env, cb): event = Event(env) if event.event_type == EventTypes.CONTENT_DELETED: pile = GreenPile(PARALLEL_CHUNKS_DELETE) url = event.env.get('url') chunks = [] content_headers = None for item in event.data: if item.get('type') == 'chunks': chunks.append(item) if item.get("type") == 'contents_headers': content_headers = item if len(chunks): def delete_chunk(chunk): resp = None p = urlparse(chunk['id']) try: with Timeout(CHUNK_TIMEOUT): conn = http_connect(p.netloc, 'DELETE', p.path) resp = conn.getresponse() resp.chunk = chunk except (Exception, Timeout) as e: self.logger.warn( 'error while deleting chunk %s "%s"', chunk['id'], str(e.message)) return resp def delete_chunk_backblaze(chunks, url, content_headers, storage_method): meta = {} meta['container_id'] = url['id'] chunk_list = [] for chunk in chunks: chunk['url'] = chunk['id'] chunk_list.append([chunk]) key_file = self.conf.get('key_file') backblaze_info = BackblazeUtils.put_meta_backblaze( storage_method, key_file) try: BackblazeDeleteHandler(meta, chunk_list, backblaze_info).delete() except OioException as e: self.logger.warn('delete failed: %s' % str(e)) chunk_method = content_headers['chunk-method'] # don't load storage method else than with b2 if chunk_method.find('backblaze') != -1: storage_method = STORAGE_METHODS.load(chunk_method) delete_chunk_backblaze(chunks, url, content_headers, storage_method) return self.app(env, cb) for chunk in chunks: pile.spawn(delete_chunk, chunk) resps = [resp for resp in pile if resp] for resp in resps: if resp.status != 204: self.logger.warn( 'failed to delete chunk %s (HTTP %s)', resp.chunk['id'], resp.status) return self.app(env, cb)
def dispatch_request(self, req): if req.method == 'PUT': source = req.input_stream size = req.content_length sysmeta = load_sysmeta(req) storage_method = STORAGE_METHODS.load(sysmeta['chunk_method']) if storage_method.ec: if not size: # FIXME: get chunk size from proxy size = (storage_method.ec_nb_data * 10 * storage_method.ec_segment_size) nb_chunks = (storage_method.ec_nb_data + storage_method.ec_nb_parity) pos = req.headers[sys_headers['chunk_pos']] meta_chunk = load_meta_chunk(req, nb_chunks, pos) return self.write_ec_meta_chunk(source, size, storage_method, sysmeta, meta_chunk) elif storage_method.backblaze: nb_chunks = int(sysmeta['content_chunksnb']) meta_chunk = load_meta_chunk(req, nb_chunks) return self.write_backblaze_meta_chunk(source, size, storage_method, sysmeta, meta_chunk) else: # FIXME: check and fix size nb_chunks = int(sysmeta['content_chunksnb']) meta_chunk = load_meta_chunk(req, nb_chunks) return self.write_repli_meta_chunk(source, size, storage_method, sysmeta, meta_chunk) elif req.method == 'GET': chunk_method = req.headers[sys_headers['content_chunkmethod']] storage_method = STORAGE_METHODS.load(chunk_method) if req.range and req.range.ranges: # Werkzeug give us non-inclusive ranges, but we use inclusive start = req.range.ranges[0][0] if req.range.ranges[0][1] is not None: end = req.range.ranges[0][1] - 1 else: end = None my_range = (start, end) else: my_range = (None, None) if storage_method.ec: nb_chunks = storage_method.ec_nb_data + \ storage_method.ec_nb_parity meta_chunk = load_meta_chunk(req, nb_chunks) meta_chunk[0]['size'] = \ int(req.headers[sys_headers['chunk_size']]) return self.read_ec_meta_chunk(storage_method, meta_chunk, my_range[0], my_range[1]) elif storage_method.backblaze: meta_chunk = load_meta_chunk(req, 1) return self.read_backblaze_meta_chunk(req, storage_method, meta_chunk, my_range[0], my_range[1]) else: nb_chunks = int(req.headers[sys_headers['content_chunksnb']]) meta_chunk = load_meta_chunk(req, nb_chunks) headers = dict() if req.range and req.range.ranges: headers['Range'] = req.range.to_header() return self.read_meta_chunk(storage_method, meta_chunk, headers) else: return Response(status=403)