class ObjectStorageApi(object): """ The Object Storage API. High level API that wraps `AccountClient`, `ContainerClient` and `DirectoryClient` classes. Every method that takes a `kwargs` argument accepts the at least the following keywords: - `headers`: `dict` of extra headers to pass to the proxy - `connection_timeout`: `float` - `read_timeout`: `float` - `write_timeout`: `float` """ TIMEOUT_KEYS = ('connection_timeout', 'read_timeout', 'write_timeout') def __init__(self, namespace, logger=None, **kwargs): """ Initialize the object storage API. :param namespace: name of the namespace to interract with :type namespace: `str` :keyword connection_timeout: connection timeout towards rawx services :type connection_timeout: `float` seconds :keyword read_timeout: timeout for rawx responses and data reads from the caller (when uploading) :type read_timeout: `float` seconds :keyword write_timeout: timeout for rawx write requests :type write_timeout: `float` seconds :keyword pool_manager: a pooled connection manager that will be used for all HTTP based APIs (except rawx) :type pool_manager: `urllib3.PoolManager` """ self.namespace = namespace conf = {"namespace": self.namespace} self.logger = logger or get_logger(conf) self.timeouts = {tok: float_value(tov, None) for tok, tov in kwargs.items() if tok in self.__class__.TIMEOUT_KEYS} from oio.account.client import AccountClient from oio.container.client import ContainerClient from oio.directory.client import DirectoryClient self.directory = DirectoryClient(conf, logger=self.logger, **kwargs) self.container = ContainerClient(conf, logger=self.logger, **kwargs) # In AccountClient, "endpoint" is the account service, not the proxy acct_kwargs = kwargs.copy() acct_kwargs["proxy_endpoint"] = acct_kwargs.pop("endpoint", None) self.account = AccountClient(conf, logger=self.logger, **acct_kwargs) def _patch_timeouts(self, kwargs): """ Insert timeout settings from this class's constructor into `kwargs`, if they are not already there. """ for tok, tov in self.timeouts.items(): if tok not in kwargs: kwargs[tok] = tov def account_create(self, account, **kwargs): """ Create an account. :param account: name of the account to create :type account: `str` :returns: `True` if the account has been created """ return self.account.account_create(account, **kwargs) @handle_account_not_found def account_delete(self, account, **kwargs): """ Delete an account. :param account: name of the account to delete :type account: `str` """ self.account.account_delete(account, **kwargs) @handle_account_not_found def account_show(self, account, **kwargs): """ Get information about an account. """ return self.account.account_show(account, **kwargs) def account_list(self, **kwargs): """ List known accounts. Notice that account creation is asynchronous, and an autocreated account may appear in the listing only after several seconds. """ return self.account.account_list(**kwargs) @handle_account_not_found def account_update(self, account, metadata, to_delete=None, **kwargs): warnings.warn("You'd better use account_set_properties()", DeprecationWarning, stacklevel=2) self.account.account_update(account, metadata, to_delete, **kwargs) @handle_account_not_found def account_set_properties(self, account, properties, **kwargs): self.account.account_update(account, properties, None, **kwargs) @handle_account_not_found def account_del_properties(self, account, properties, **kwargs): self.account.account_update(account, None, properties, **kwargs) def container_create(self, account, container, properties=None, **kwargs): """ Create a container. :param account: account in which to create the container :type account: `str` :param container: name of the container :type container: `str` :param properties: properties to set on the container :type properties: `dict` :returns: True if the container has been created, False if it already exists """ return self.container.container_create(account, container, properties=properties, **kwargs) @handle_container_not_found @ensure_headers @ensure_request_id def container_touch(self, account, container, **kwargs): """ Trigger a notification about the container state. :param account: account from which to delete the container :type account: `str` :param container: name of the container :type container: `str` """ self.container.container_touch(account, container, **kwargs) def container_create_many(self, account, containers, properties=None, **kwargs): """ Create Many containers :param account: account in which to create the containers :type account: `str` :param containers: names of the containers :type containers: `list` :param properties: properties to set on the containers :type properties: `dict` """ return self.container.container_create_many(account, containers, properties=properties, **kwargs) @handle_container_not_found def container_delete(self, account, container, **kwargs): """ Delete a container. :param account: account from which to delete the container :type account: `str` :param container: name of the container :type container: `str` """ self.container.container_delete(account, container, **kwargs) @handle_account_not_found def container_list(self, account, limit=None, marker=None, end_marker=None, prefix=None, delimiter=None, **kwargs): """ Get the list of containers of an account. :param account: account from which to get the container list :type account: `str` :keyword limit: maximum number of results to return :type limit: `int` :keyword marker: name of the container from where to start the listing :type marker: `str` :keyword end_marker: :keyword prefix: :keyword delimiter: :return: the list of containers of an account :rtype: `list` of items (`list`) with 4 fields: name, number of objects, number of bytes, and 1 if the item is a prefix or 0 if the item is actually a container """ resp = self.account.container_list(account, limit=limit, marker=marker, end_marker=end_marker, prefix=prefix, delimiter=delimiter, **kwargs) return resp["listing"] @handle_container_not_found def container_show(self, account, container, **kwargs): """ Get information about a container (user properties). :param account: account in which the container is :type account: `str` :param container: name of the container :type container: `str` :returns: a `dict` with "properties" containing a `dict` of user properties. """ return self.container.container_show(account, container, **kwargs) @handle_container_not_found def container_snapshot(self, account, container, dst_account, dst_container, batch=100, **kwargs): """ Create a copy of the container (only the content of the database) :param account: account in which the target is :type account: `str` :param container: name of the target :type container: `str` :param dst_account: account in which the snapshot will be. :type dst_account: `str` :param dst_container: name of the snapshot :type dst_container: `str` """ try: self.container.container_freeze(account, container) self.container.container_snapshot( account, container, dst_account, dst_container) resp = self.object_list(dst_account, dst_container) obj_gen = resp['objects'] target_beans = [] copy_beans = [] for obj in obj_gen: data = self.object_locate( account, container, obj["name"]) chunks = [chunk['url'] for chunk in data[1]] copies = self._generate_copy(chunks) fullpath = self._generate_fullpath( dst_account, dst_container, obj['name'], obj['version']) self._send_copy(chunks, copies, fullpath[0]) t_beans, c_beans = self._prepare_update_meta2( data[1], copies, dst_account, dst_container, obj['content']) target_beans.extend(t_beans) copy_beans.extend(c_beans) if len(target_beans) > batch: self.container.container_raw_update( target_beans, copy_beans, dst_account, dst_container, frozen=True) target_beans = [] copy_beans = [] if target_beans: self.container.container_raw_update( target_beans, copy_beans, dst_account, dst_container, frozen=True) finally: self.container.container_enable(account, container) @handle_container_not_found def container_get_properties(self, account, container, properties=None, **kwargs): """ Get information about a container (user and system properties). :param account: account in which the container is :type account: `str` :param container: name of the container :type container: `str` :param properties: *ignored* :returns: a `dict` with "properties" and "system" entries, containing respectively a `dict` of user properties and a `dict` of system properties. """ return self.container.container_get_properties(account, container, properties=properties, **kwargs) @handle_container_not_found def container_set_properties(self, account, container, properties=None, clear=False, **kwargs): """ Set properties on a container. :param account: name of the account :type account: `str` :param container: name of the container where to set properties :type container: `str` :param properties: a dictionary of properties :type properties: `dict` :param clear: :type clear: `bool` :keyword system: dictionary of system properties to set """ return self.container.container_set_properties( account, container, properties, clear=clear, **kwargs) @handle_container_not_found def container_del_properties(self, account, container, properties, **kwargs): """ Delete properties of a container. :param account: name of the account :type account: `str` :param container: name of the container to deal with :type container: `str` :param properties: a list of property keys :type properties: `list` """ return self.container.container_del_properties( account, container, properties, **kwargs) def container_update(self, account, container, metadata, clear=False, **kwargs): warnings.warn("You'd better use container_set_properties()", DeprecationWarning) if not metadata: self.container_del_properties( account, container, [], **kwargs) else: self.container_set_properties( account, container, metadata, clear, **kwargs) @handle_container_not_found @ensure_headers @ensure_request_id def object_create(self, account, container, file_or_path=None, data=None, etag=None, obj_name=None, mime_type=None, metadata=None, policy=None, key_file=None, append=False, properties=None, **kwargs): """ Create an object or append data to object in *container* of *account* with data taken from either *data* (`str` or `generator`) or *file_or_path* (path to a file or file-like object). The object will be named after *obj_name* if specified, or after the base name of *file_or_path*. :param account: name of the account where to create the object :type account: `str` :param container: name of the container where to create the object :type container: `str` :param file_or_path: file-like object or path to a file from which to read object data :type file_or_path: `str` or file-like object :param data: object data (if `file_or_path` is not set) :type data: `str` or `generator` :keyword etag: entity tag of the object :type etag: `str` :keyword obj_name: name of the object to create. If not set, will use the base name of `file_or_path`. :keyword mime_type: MIME type of the object :type mime_type: `str` :keyword properties: a dictionary of properties :type properties: `dict` :keyword policy: name of the storage policy :type policy: `str` :keyword key_file: :param append: if set, data will be append to existing object (or object will be created if unset) :type append: `bool` :returns: `list` of chunks, size and hash of the what has been uploaded """ if (data, file_or_path) == (None, None): raise exc.MissingData() src = data if data is not None else file_or_path if src is file_or_path: if isinstance(file_or_path, basestring): if not os.path.exists(file_or_path): raise exc.FileNotFound("File '%s' not found." % file_or_path) file_name = os.path.basename(file_or_path) else: try: file_name = os.path.basename(file_or_path.name) except AttributeError: file_name = None obj_name = obj_name or file_name elif isgenerator(src): file_or_path = GeneratorIO(src) src = file_or_path if not obj_name: raise exc.MissingName( "No name for the object has been specified" ) sysmeta = {'mime_type': mime_type, 'etag': etag} if metadata: warnings.warn( "You'd better use 'properties' instead of 'metadata'", DeprecationWarning, stacklevel=4) if not properties: properties = metadata else: properties.update(metadata) if src is data: return self._object_create( account, container, obj_name, BytesIO(data), sysmeta, properties=properties, policy=policy, key_file=key_file, append=append, **kwargs) elif hasattr(file_or_path, "read"): return self._object_create( account, container, obj_name, src, sysmeta, properties=properties, policy=policy, key_file=key_file, append=append, **kwargs) else: with open(file_or_path, "rb") as f: return self._object_create( account, container, obj_name, f, sysmeta, properties=properties, policy=policy, key_file=key_file, append=append, **kwargs) @ensure_headers @ensure_request_id def object_touch(self, account, container, obj, version=None, **kwargs): """ Trigger a notification about an object (as if it just had been created). :param account: name of the account where to create the object :type account: `str` :param container: name of the container where to create the object :type container: `str` :param obj: name of the object to touch """ self.container.content_touch(account, container, obj, version=version, **kwargs) def object_drain(self, account, container, obj, version=None, **kwargs): """ Remove all the chunks of a content, but keep all the metadata. :param account: name of the account where the object is present :type account: `str` :param container: name of the container where the object is present :type container: `str` :param obj: name of the object to drain """ self.container.content_drain(account, container, obj, version=version, **kwargs) @handle_object_not_found @ensure_headers @ensure_request_id def object_delete(self, account, container, obj, version=None, **kwargs): """ Delete an object from a container. If versioning is enabled and no version is specified, the object will be marked as deleted but not actually deleted. :param account: name of the account the object belongs to :type account: `str` :param container: name of the container the object belongs to :type container: `str` :param obj: name of the object to delete :param version: version of the object to delete :returns: True on success """ return self.container.content_delete(account, container, obj, version=version, **kwargs) @ensure_headers @ensure_request_id def object_delete_many(self, account, container, objs, **kwargs): return self.container.content_delete_many( account, container, objs, **kwargs) @handle_object_not_found @ensure_headers @ensure_request_id def object_truncate(self, account, container, obj, version=None, size=None, **kwargs): """ Truncate object at specified size. Only shrink is supported. A download may occur if size is not on chunk boundaries. :param account: name of the account in which the object is stored :param container: name of the container in which the object is stored :param obj: name of the object to query :param version: version of the object to query :param size: new size of object """ # code copied from object_fetch (should be factorized !) meta, raw_chunks = self.object_locate( account, container, obj, version=version, **kwargs) chunk_method = meta['chunk_method'] storage_method = STORAGE_METHODS.load(chunk_method) chunks = _sort_chunks(raw_chunks, storage_method.ec) for pos in sorted(chunks.keys()): chunk = chunks[pos][0] if (size >= chunk['offset'] and size <= chunk['offset'] + chunk['size']): break else: raise exc.OioException("No chunk found at position %d" % size) if chunk['offset'] != size: # retrieve partial chunk ret = self.object_fetch(account, container, obj, version=version, ranges=[(chunk['offset'], size-1)]) # TODO implement a proper object_update pos = int(chunk['pos'].split('.')[0]) self.object_create(account, container, obj_name=obj, data=ret[1], meta_pos=pos, content_id=meta['id']) return self.container.content_truncate(account, container, obj, version=version, size=size, **kwargs) @handle_container_not_found def object_list(self, account, container, limit=None, marker=None, delimiter=None, prefix=None, end_marker=None, properties=False, versions=False, deleted=False, **kwargs): """ Lists objects inside a container. :param properties: if True, list object properties along with objects :param versions: if True, list all versions of objects :param deleted: if True, list also the deleted objects :returns: a dict which contains * 'objects': the list of objects * 'prefixes': common prefixes (only if delimiter and prefix are set) * 'properties': a dict of container properties * 'system': a dict of system metadata """ _, resp_body = self.container.content_list( account, container, limit=limit, marker=marker, end_marker=end_marker, prefix=prefix, delimiter=delimiter, properties=properties, versions=versions, deleted=deleted, **kwargs) for obj in resp_body['objects']: mtype = obj.get('mime-type') if mtype is not None: obj['mime_type'] = mtype del obj['mime-type'] version = obj.get('ver') if version is not None: obj['version'] = version del obj['ver'] return resp_body @handle_object_not_found def object_locate(self, account, container, obj, version=None, **kwargs): """ Get a description of the object along with the list of its chunks. :param account: name of the account in which the object is stored :param container: name of the container in which the object is stored :param obj: name of the object to query :param version: version of the object to query :returns: a tuple with object metadata `dict` as first element and chunk `list` as second element """ obj_meta, chunks = self.container.content_locate( account, container, obj, version=version, **kwargs) return obj_meta, chunks def object_analyze(self, *args, **kwargs): """ :deprecated: use `object_locate` """ warnings.warn("You'd better use object_locate()", DeprecationWarning) return self.object_locate(*args, **kwargs) @ensure_headers @ensure_request_id def object_fetch(self, account, container, obj, version=None, ranges=None, key_file=None, **kwargs): meta, raw_chunks = self.object_locate( account, container, obj, version=version, **kwargs) chunk_method = meta['chunk_method'] storage_method = STORAGE_METHODS.load(chunk_method) chunks = _sort_chunks(raw_chunks, storage_method.ec) meta['container_id'] = cid_from_name(account, container).upper() meta['ns'] = self.namespace self._patch_timeouts(kwargs) if storage_method.ec: stream = fetch_stream_ec(chunks, ranges, storage_method, **kwargs) elif storage_method.backblaze: stream = self._fetch_stream_backblaze(meta, chunks, ranges, storage_method, key_file, **kwargs) else: stream = fetch_stream(chunks, ranges, storage_method, **kwargs) return meta, stream @handle_object_not_found def object_get_properties(self, account, container, obj, **kwargs): return self.container.content_get_properties(account, container, obj, **kwargs) @handle_object_not_found def object_show(self, account, container, obj, version=None, **kwargs): """ Get a description of the content along with its user properties. :param account: name of the account in which the object is stored :param container: name of the container in which the object is stored :param obj: name of the object to query :returns: a `dict` describing the object .. python:: {'hash': '6BF60C17CC15EEA108024903B481738F', 'ctime': '1481031763', 'deleted': 'False', 'properties': { u'projet': u'OpenIO-SDS'}, 'length': '43518', 'hash_method': 'md5', 'chunk_method': 'ec/algo=liberasurecode_rs_vand,k=6,m=3', 'version': '1481031762951972', 'policy': 'EC', 'id': '20BF2194FD420500CD4729AE0B5CBC07', 'mime_type': 'application/octet-stream', 'name': 'Makefile'} """ return self.container.content_show(account, container, obj, version=version, **kwargs) def object_update(self, account, container, obj, metadata, version=None, clear=False, **kwargs): warnings.warn("You'd better use object_set_properties()", DeprecationWarning, stacklevel=2) if clear: self.object_del_properties( account, container, obj, [], version=version, **kwargs) if metadata: self.object_set_properties( account, container, obj, metadata, version=version, **kwargs) @handle_object_not_found def object_set_properties(self, account, container, obj, properties, version=None, **kwargs): return self.container.content_set_properties( account, container, obj, properties={'properties': properties}, version=version, **kwargs) @handle_object_not_found def object_del_properties(self, account, container, obj, properties, version=None, **kwargs): return self.container.content_del_properties( account, container, obj, properties=properties, version=version, **kwargs) def _content_preparer(self, account, container, obj_name, policy=None, **kwargs): # TODO: optimize by asking more than one metachunk at a time obj_meta, first_body = self.container.content_prepare( account, container, obj_name, size=1, stgpol=policy, autocreate=True, **kwargs) storage_method = STORAGE_METHODS.load(obj_meta['chunk_method']) def _fix_mc_pos(chunks, mc_pos): for chunk in chunks: raw_pos = chunk["pos"].split(".") if storage_method.ec: chunk['num'] = int(raw_pos[1]) chunk["pos"] = "%d.%d" % (mc_pos, chunk['num']) else: chunk["pos"] = str(mc_pos) def _metachunk_preparer(): mc_pos = kwargs.get('meta_pos', 0) _fix_mc_pos(first_body, mc_pos) yield first_body while True: mc_pos += 1 _, next_body = self.container.content_prepare( account, container, obj_name, 1, stgpol=policy, autocreate=True, **kwargs) _fix_mc_pos(next_body, mc_pos) yield next_body return obj_meta, _metachunk_preparer def _generate_fullpath(self, account, container_name, path, version): return ['{0}/{1}/{2}/{3}'.format(quote_plus(account), quote_plus(container_name), quote_plus(path), version)] def _object_create(self, account, container, obj_name, source, sysmeta, properties=None, policy=None, key_file=None, **kwargs): self._patch_timeouts(kwargs) obj_meta, chunk_prep = self._content_preparer( account, container, obj_name, policy=policy, **kwargs) obj_meta.update(sysmeta) obj_meta['content_path'] = obj_name obj_meta['container_id'] = cid_from_name(account, container).upper() obj_meta['ns'] = self.namespace obj_meta['full_path'] = self._generate_fullpath(account, container, obj_name, obj_meta['version']) obj_meta['oio_version'] = (obj_meta.get('oio_version') or OIO_VERSION) # XXX content_id is necessary to update an existing object kwargs['content_id'] = kwargs.get('content_id', obj_meta['id']) storage_method = STORAGE_METHODS.load(obj_meta['chunk_method']) if storage_method.ec: handler = ECWriteHandler( source, obj_meta, chunk_prep, storage_method, **kwargs) elif storage_method.backblaze: backblaze_info = self._b2_credentials(storage_method, key_file) handler = BackblazeWriteHandler( source, obj_meta, chunk_prep, storage_method, backblaze_info, **kwargs) else: handler = ReplicatedWriteHandler( source, obj_meta, chunk_prep, storage_method, **kwargs) final_chunks, bytes_transferred, content_checksum = handler.stream() etag = obj_meta.get('etag') if etag and etag.lower() != content_checksum.lower(): raise exc.EtagMismatch( "given etag %s != computed %s" % (etag, content_checksum)) obj_meta['etag'] = content_checksum data = {'chunks': final_chunks, 'properties': properties or {}} # FIXME: we may just pass **obj_meta self.container.content_create( account, container, obj_name, size=bytes_transferred, checksum=content_checksum, data=data, stgpol=obj_meta['policy'], version=obj_meta['version'], mime_type=obj_meta['mime_type'], chunk_method=obj_meta['chunk_method'], **kwargs) return final_chunks, bytes_transferred, content_checksum def _b2_credentials(self, storage_method, key_file): key_file = key_file or '/etc/oio/sds/b2-appkey.conf' try: return BackblazeUtils.get_credentials(storage_method, key_file) except BackblazeUtilsException as err: raise exc.ConfigurationException(str(err)) def _fetch_stream_backblaze(self, meta, chunks, ranges, storage_method, key_file, **kwargs): backblaze_info = self._b2_credentials(storage_method, key_file) total_bytes = 0 current_offset = 0 size = None offset = 0 for pos in range(len(chunks)): if ranges: offset = ranges[pos][0] size = ranges[pos][1] if size is None: size = int(meta["length"]) chunk_size = int(chunks[pos][0]["size"]) if total_bytes >= size: break if current_offset + chunk_size > offset: if current_offset < offset: _offset = offset - current_offset else: _offset = 0 if chunk_size + total_bytes > size: _size = size - total_bytes else: _size = chunk_size handler = BackblazeChunkDownloadHandler( meta, chunks[pos], _offset, _size, backblaze_info=backblaze_info) stream = handler.get_stream() if not stream: raise exc.OioException("Error while downloading") total_bytes += len(stream) yield stream current_offset += chunk_size @handle_container_not_found def container_refresh(self, account, container, attempts=3, **kwargs): for i in range(attempts): try: self.account.container_reset(account, container, time.time()) except exc.Conflict: if i >= attempts - 1: raise try: self.container.container_touch(account, container) except exc.ClientException as e: if e.status != 406 and e.status != 431: raise # CODE_USER_NOTFOUND or CODE_CONTAINER_NOTFOUND metadata = dict() metadata["dtime"] = time.time() self.account.container_update(account, container, metadata) @handle_account_not_found def account_refresh(self, account, **kwargs): self.account.account_refresh(account) containers = self.container_list(account) for container in containers: try: self.container_refresh(account, container[0]) except exc.NoSuchContainer: # container remove in the meantime pass while containers: marker = containers[-1][0] containers = self.container_list(account, marker=marker) if containers: for container in containers: try: self.container_refresh(account, container[0]) except exc.NoSuchContainer: # container remove in the meantime pass def all_accounts_refresh(self, **kwargs): accounts = self.account_list() for account in accounts: try: self.account_refresh(account) except exc.NoSuchAccount: # account remove in the meantime pass @handle_account_not_found def account_flush(self, account): self.account.account_flush(account) def _random_buffer(self, dictionary, n): return ''.join(random.choice(dictionary) for _ in range(n)) def _generate_copy(self, chunks, random_hex=60): # random_hex is the number of hexadecimals characters to generate for # the copy path copies = [] for c in chunks: tmp = ''.join([c[:-random_hex], self._random_buffer('0123456789ABCDEF', random_hex)]) copies.append(tmp) return copies def _send_copy(self, targets, copies, fullpath): headers = {"x-oio-chunk-meta-full-path": fullpath} if not hasattr(self, "blob_client"): from oio.blob.client import BlobClient self.blob_client = BlobClient() for t, c in zip(targets, copies): self.blob_client.chunk_link(t, c, headers=headers).status def _prepare_update_meta2(self, targets, copies, account, container, content): targets_beans = [] copies_beans = [] for t, c in zip(targets, copies): targets_beans.append(self._meta2bean(t['url'], t, content)) copies_beans.append(self._meta2bean(c, t, content)) return targets_beans, copies_beans def _meta2bean(self, url, meta, content): return {"type": "chunk", "id": url, "hash": meta['hash'], "size": int(meta["size"]), "pos": meta["pos"], "content": content}
class BlobConverter(object): def __init__(self, conf, logger=None, **kwargs): self.conf = conf self.logger = logger or get_logger(conf) volume = conf.get('volume') if not volume: raise ConfigurationException('No volume specified for converter') self.volume = volume self.namespace, self.volume_id = check_volume(self.volume) # cache self.name_by_cid = CacheDict() self.content_id_by_name = CacheDict() # client self.container_client = ContainerClient(conf, **kwargs) self.content_factory = ContentFactory(conf, self.container_client, logger=self.logger) # stats/logs self.errors = 0 self.passes = 0 self.total_chunks_processed = 0 self.start_time = 0 self.last_reported = 0 self.report_interval = int_value(conf.get('report_interval'), 3600) # speed self.chunks_run_time = 0 self.max_chunks_per_second = int_value(conf.get('chunks_per_second'), 30) # backup self.no_backup = true_value(conf.get('no_backup', False)) self.backup_dir = conf.get('backup_dir') or tempfile.gettempdir() self.backup_name = 'backup_%s_%f' \ % (self.volume_id, time.time()) # dry run self.dry_run = true_value(conf.get('dry_run', False)) def save_xattr(self, chunk_id, xattr): if self.no_backup: return dirname = self.backup_dir + '/' + self.backup_name + '/' + chunk_id[:3] try: os.makedirs(dirname) except OSError: if not os.path.isdir(dirname): raise with open(dirname + '/' + chunk_id, 'w') as backup_fd: # same format as getfattr backup_fd.write('# file: ' + self._get_path(chunk_id) + '\n') for k, v in xattr.iteritems(): backup_fd.write('user.' + k + '="' + v + '"\n') def _save_container(self, cid, account, container): cid = cid.upper() self.name_by_cid[cid] = (account, container) return cid, account, container def _save_content(self, cid, path, version, content_id): cid = cid.upper() content_id = content_id.upper() self.content_id_by_name[(cid, path, version)] = content_id return cid, path, version, content_id def _get_path(self, chunk_id): return self.volume + '/' + chunk_id[:3] + '/' + chunk_id def cid_from_name(self, account, container): cid = cid_from_name(account, container) cid, account, container = self._save_container(cid, account, container) return cid def name_from_cid(self, cid): name = self.name_by_cid.get(cid) if name: return name properties = self.container_client.container_get_properties(cid=cid) account = properties['system']['sys.account'] container = properties['system']['sys.user.name'] cid, account, container = self._save_container(cid, account, container) return account, container def content_id_from_name(self, cid, path, version, search=False): content_id = self.content_id_by_name.get((cid, path, version)) if content_id or not search: return content_id properties = self.container_client.content_get_properties( cid=cid, path=path, version=version) content_id = properties['id'] cid, path, version, content_id = self._save_content( cid, path, version, content_id) return content_id def decode_fullpath(self, fullpath): account, container, path, version, content_id = decode_fullpath( fullpath) cid = self.cid_from_name(account, container) cid, path, version, content_id = self._save_content( cid, path, version, content_id) return account, container, cid, path, version, content_id def decode_old_fullpath(self, old_fullpath): account, container, path, version = decode_old_fullpath(old_fullpath) cid = self.cid_from_name(account, container) content_id = self.content_id_from_name(cid, path, version) return account, container, cid, path, version, content_id def encode_fullpath(self, chunk_inode, chunk_id, account, container, path, version, content_id): # check if chunk exists and has the same inode if not is_hexa(chunk_id) or len(chunk_id) != STRLEN_CHUNKID: raise ValueError('chunk ID must be hexadecimal (%s)' % STRLEN_CHUNKID) try: chunk_inode2 = os.stat(self._get_path(chunk_id)).st_ino except OSError: raise OrphanChunk('No such chunk: possible orphan chunk') if chunk_inode2 != chunk_inode: raise OrphanChunk('Not the same inode: possible orphan chunk') # check fullpath and chunk ID if isinstance(version, basestring): try: version = int(version) except ValueError: raise ValueError('version must be a number') if version <= 0: raise ValueError('version must be positive') if not is_hexa(content_id): raise ValueError('content ID must be hexadecimal') fullpath = encode_fullpath(account, container, path, version, content_id.upper()) return chunk_id.upper(), fullpath def _get_chunk_id_and_fullpath(self, chunk_inode, chunk_pos, content, chunk_id=None): content.container_id, content.account, content.container_name = \ self._save_container(content.container_id, content.account, content.container_name) content.container_id, content.path, content.version, \ content.content_id = self._save_content( content.container_id, content.path, content.version, content.content_id) chunks = content.chunks.filter(host=self.volume_id) if chunk_id: chunks = chunks.filter(id=chunk_id) chunk = chunks.filter(pos=chunk_pos).one() if chunk is None: raise OrphanChunk('Chunk not found in content:' 'possible orphan chunk') chunk_id, new_fullpath = self.encode_fullpath( chunk_inode, chunk.id, content.account, content.container_name, content.path, content.version, content.content_id) return chunk_id, new_fullpath def get_chunk_id_and_fullpath(self, chunk_inode, chunk_pos, container_id, path, version, chunk_id=None, account=None, container=None, content_id=None): if account is None or container is None: account, container = self.name_from_cid(container_id) if content_id: try: content = self.content_factory.get(container_id, content_id, account=account, container_name=container) return self._get_chunk_id_and_fullpath(chunk_inode, chunk_pos, content, chunk_id=chunk_id) except Exception as exc: self.logger.warn( 'chunk_id=%s chunk_pos=%s object=%s/%s/%s/%s/%s/%s: %s', chunk_id, chunk_pos, str(account), str(container), container_id, path, str(version), str(content_id), exc) # version must be integer try: version = str(int(version)) except Exception: version = None try: content = self.content_factory.get_by_path_and_version( container_id, path, version, account=account, container_name=container) except ContentNotFound: raise OrphanChunk('Content not found: possible orphan chunk') return self._get_chunk_id_and_fullpath(chunk_inode, chunk_pos, content, chunk_id=chunk_id) def convert_chunk(self, fd, chunk_id): meta, raw_meta = read_chunk_metadata(fd, chunk_id, check_chunk_id=False) links = meta.get('links', dict()) for chunk_id2, fullpath2 in links.iteritems(): self.decode_fullpath(fullpath2) fullpath = meta.get('full_path') if fullpath is not None: self.decode_fullpath(fullpath) if meta.get('oio_version') == OIO_VERSION: return True, meta chunk_inode = os.fstat(fd.fileno()).st_ino raw_chunk_id = None chunk_id = chunk_id.upper() chunk_pos = meta['chunk_pos'] container_id = meta['container_id'].upper() path = meta['content_path'] version = meta['content_version'] content_id = meta['content_id'].upper() new_fullpaths = dict() xattr_to_remove = list() success = True for k, v in raw_meta.iteritems(): # fetch raw chunk ID if k == XATTR_CHUNK_ID: raw_chunk_id = v.upper() # search old fullpaths if not k.startswith(XATTR_OLD_FULLPATH) \ or not is_hexa(k[4:], size=64): continue try: account2, container2, container_id2, path2, version2, \ content_id2 = self.decode_old_fullpath(v) if container_id == container_id2 and path == path2 \ and version == version2: if content_id2 is None: content_id2 = self.content_id_from_name(container_id2, path2, version2, search=True) chunk_id, new_fullpath = self.encode_fullpath( chunk_inode, chunk_id, account2, container2, path2, version2, content_id2) new_fullpaths[chunk_id] = new_fullpath else: chunk_id2, new_fullpath = self.get_chunk_id_and_fullpath( chunk_inode, chunk_pos, container_id2, path2, version2, account=account2, container=container2, content_id=content_id2) new_fullpaths[chunk_id2] = new_fullpath xattr_to_remove.append(k) except Exception as exc: success = False self.logger.warn('chunk_id=%s old_fullpath=%s: %s', chunk_id, k, exc) # old xattr if raw_chunk_id is not None: try: if raw_chunk_id != chunk_id and raw_chunk_id not in links: if raw_chunk_id not in new_fullpaths: meta2, _ = read_chunk_metadata(fd, raw_chunk_id) container_id2 = meta2['container_id'].upper() path2 = meta2['content_path'] version2 = meta2['content_version'] content_id2 = meta2['content_id'].upper() raw_chunk_id, new_fullpath = \ self.get_chunk_id_and_fullpath( chunk_inode, chunk_pos, container_id2, path2, version2, chunk_id=raw_chunk_id, content_id=content_id2) new_fullpaths[raw_chunk_id] = new_fullpath elif raw_chunk_id == chunk_id and fullpath is None: if raw_chunk_id not in new_fullpaths: raw_chunk_id, new_fullpath = \ self.get_chunk_id_and_fullpath( chunk_inode, chunk_pos, container_id, path, version, chunk_id=raw_chunk_id, content_id=content_id) new_fullpaths[raw_chunk_id] = new_fullpath except Exception as exc: success = False self.logger.warn('chunk_id=%s (old xattr): %s', raw_chunk_id, exc) self.save_xattr(chunk_id, raw_meta) if self.dry_run: self.logger.info( "[dryrun] Converting chunk %s: success=%s new_fullpaths=%s " "xattr_to_remove=%s", chunk_id, str(success), str(new_fullpaths), str(xattr_to_remove)) else: # for security, if there is an error, we don't delete old xattr modify_xattr(fd, new_fullpaths, success, xattr_to_remove) return success, None def safe_convert_chunk(self, path, fd=None, chunk_id=None): if chunk_id is None: chunk_id = path.rsplit('/', 1)[-1] if len(chunk_id) != STRLEN_CHUNKID: self.logger.warn('Not a chunk %s' % path) return for c in chunk_id: if c not in hexdigits: self.logger.warn('Not a chunk %s' % path) return success = False self.total_chunks_processed += 1 try: if fd is None: with open(path) as fd: success, _ = self.convert_chunk(fd, chunk_id) else: success, _ = self.convert_chunk(fd, chunk_id) except Exception: self.logger.exception('ERROR while conversion %s', path) if not success: self.errors += 1 else: self.logger.debug('Converted %s', path) self.passes += 1 def _fetch_chunks_from_file(self, input_file): with open(input_file, 'r') as ifile: for line in ifile: chunk_id = line.strip() if chunk_id and not chunk_id.startswith('#'): yield self._get_path(chunk_id) def paths_gen(self, input_file=None): if input_file: return self._fetch_chunks_from_file(input_file) else: return paths_gen(self.volume) def converter_pass(self, input_file=None): def report(tag, now=None): if now is None: now = time.time() total_time = now - self.start_time self.logger.info( '%(tag)s %(volume)s ' 'started=%(start_time)s ' 'passes=%(passes)d ' 'errors=%(errors)d ' 'chunks=%(nb_chunks)d %(c_rate).2f/s ' 'total_time=%(total_time).2f ' '(converter: %(success_rate).2f%%)' % { 'tag': tag, 'volume': self.volume_id, 'start_time': datetime.fromtimestamp(int(self.start_time)).isoformat(), 'passes': self.passes, 'errors': self.errors, 'nb_chunks': self.total_chunks_processed, 'c_rate': self.total_chunks_processed / total_time, 'total_time': total_time, 'success_rate': 100 * ((self.total_chunks_processed - self.errors) / float(self.total_chunks_processed)) }) self.passes = 0 self.last_reported = now self.start_time = time.time() self.errors = 0 self.passes = 0 self.backup_name = 'backup_%s_%f' % (self.volume_id, self.start_time) paths = self.paths_gen(input_file=input_file) for path in paths: self.safe_convert_chunk(path) now = time.time() if now - self.last_reported >= self.report_interval: report('RUN', now=now) self.chunks_run_time = ratelimit(self.chunks_run_time, self.max_chunks_per_second) report('DONE') return self.errors == 0
class BlobConverter(object): def __init__(self, conf, logger=None, **kwargs): self.conf = conf self.logger = logger or get_logger(conf) volume = conf.get('volume') if not volume: raise ConfigurationException('No volume specified for converter') self.volume = volume self.namespace, self.volume_id = check_volume(self.volume) # cache self.name_by_cid = CacheDict() self.content_id_by_name = CacheDict() # client self.container_client = ContainerClient(conf, **kwargs) self.content_factory = ContentFactory(conf, self.container_client, logger=self.logger) self._rdir = None # we may never need it # stats/logs self.errors = 0 self.passes = 0 self.total_chunks_processed = 0 self.start_time = 0 self.last_reported = 0 self.report_interval = int_value(conf.get('report_interval'), 3600) # speed self.chunks_run_time = 0 self.max_chunks_per_second = int_value(conf.get('chunks_per_second'), 30) # backup self.no_backup = true_value(conf.get('no_backup', False)) self.backup_dir = conf.get('backup_dir') or tempfile.gettempdir() self.backup_name = 'backup_%s_%f' \ % (self.volume_id, time.time()) # dry run self.dry_run = true_value(conf.get('dry_run', False)) @property def rdir(self): """Get an instance of `RdirClient`.""" if self._rdir is None: self._rdir = RdirClient( self.conf, pool_manager=self.container_client.pool_manager) return self._rdir def save_xattr(self, fd, chunk_id, xattr): if self.no_backup: return dirname = self.backup_dir + '/' + self.backup_name + '/' + chunk_id[:3] try: os.makedirs(dirname) except OSError: if not os.path.isdir(dirname): raise with open(dirname + '/' + chunk_id, 'w') as backup_fd: # same format as getfattr backup_fd.write('# file: ' + self._get_path(fd, chunk_id) + '\n') for k, v in xattr.items(): backup_fd.write('user.' + k + '="' + v + '"\n') def _save_container(self, cid, account, container): cid = cid.upper() self.name_by_cid[cid] = (account, container) return cid, account, container def _save_content(self, cid, path, version, content_id): cid = cid.upper() content_id = content_id.upper() self.content_id_by_name[(cid, path, version)] = content_id return cid, path, version, content_id def _get_path(self, fd, chunk_id): chunk_path = self.volume chunk_path_split = fd.name[len(self.volume):].split('/') start = 0 for chunk_part in chunk_path_split[:-1]: end = start + len(chunk_part) chunk_path += '/' + chunk_id[start:end] start = end chunk_path += '/' + chunk_path_split[-1] return chunk_path def cid_from_name(self, account, container): cid = cid_from_name(account, container) cid, account, container = self._save_container(cid, account, container) return cid def name_from_cid(self, cid): name = self.name_by_cid.get(cid) if name: return name properties = self.container_client.container_get_properties(cid=cid) account = properties['system']['sys.account'] container = properties['system']['sys.user.name'] cid, account, container = self._save_container(cid, account, container) return account, container def content_id_from_name(self, cid, path, version, search=False): content_id = self.content_id_by_name.get((cid, path, version)) if content_id or not search: return content_id properties = self.container_client.content_get_properties( cid=cid, path=path, version=version) content_id = properties['id'] cid, path, version, content_id = self._save_content( cid, path, version, content_id) return content_id def decode_fullpath(self, fullpath): # pylint: disable=unbalanced-tuple-unpacking account, container, path, version, content_id = decode_fullpath( fullpath) cid = self.cid_from_name(account, container) cid, path, version, content_id = self._save_content( cid, path, version, content_id) return account, container, cid, path, version, content_id def decode_old_fullpath(self, old_fullpath): # pylint: disable=unbalanced-tuple-unpacking try: account, container, path, version = decode_old_fullpath( old_fullpath) cid = self.cid_from_name(account, container) content_id = self.content_id_from_name(cid, path, version) except ValueError: # We never know, let's try to decode the fullpath as if it was new account, container, path, version, content_id = decode_fullpath( old_fullpath) cid = self.cid_from_name(account, container) return account, container, cid, path, version, content_id def encode_fullpath(self, fd, chunk_id, account, container, path, version, content_id): # check if chunk exists and has the same inode if not is_hexa(chunk_id) or len(chunk_id) != STRLEN_CHUNKID: raise ValueError('chunk ID must be hexadecimal (%s)' % STRLEN_CHUNKID) try: chunk_inode = os.fstat(fd.fileno()).st_ino chunk_inode2 = os.stat(self._get_path(fd, chunk_id)).st_ino if chunk_inode2 != chunk_inode: raise OrphanChunk('Not the same inode: possible orphan chunk') except OSError: raise OrphanChunk('No such chunk: possible orphan chunk') # check fullpath and chunk ID if isinstance(version, string_types): try: version = int(version) except ValueError: raise ValueError('version must be a number') if version <= 0: raise ValueError('version must be positive') if not is_hexa(content_id): raise ValueError('content ID must be hexadecimal') fullpath = encode_fullpath(account, container, path, version, content_id.upper()) return chunk_id.upper(), fullpath def _get_chunk_id_and_fullpath(self, fd, chunk_pos, content, chunk_id=None): content.container_id, content.account, content.container_name = \ self._save_container(content.container_id, content.account, content.container_name) content.container_id, content.path, content.version, \ content.content_id = self._save_content( content.container_id, content.path, content.version, content.content_id) chunks = content.chunks.filter(host=self.volume_id) if chunk_id: chunks = chunks.filter(id=chunk_id) chunk = chunks.filter(pos=chunk_pos).one() if chunk is None: raise OrphanChunk('Chunk not found in content:' 'possible orphan chunk') chunk_id, new_fullpath = self.encode_fullpath( fd, chunk.id, content.account, content.container_name, content.path, content.version, content.content_id) return chunk_id, new_fullpath def get_chunk_id_and_fullpath(self, fd, chunk_pos, container_id, path, version, chunk_id=None, account=None, container=None, content_id=None): if account is None or container is None: account, container = self.name_from_cid(container_id) if content_id: try: content = self.content_factory.get(container_id, content_id, account=account, container_name=container) return self._get_chunk_id_and_fullpath(fd, chunk_pos, content, chunk_id=chunk_id) except Exception as exc: self.logger.warn( 'chunk_id=%s chunk_pos=%s object=%s/%s/%s/%s/%s/%s: %s', chunk_id, chunk_pos, str(account), str(container), container_id, path, str(version), str(content_id), exc) # version must be integer try: version = str(int(version)) except Exception: version = None try: content = self.content_factory.get_by_path_and_version( container_id, path, version, account=account, container_name=container) except ContentNotFound: raise OrphanChunk('Content not found: possible orphan chunk') return self._get_chunk_id_and_fullpath(fd, chunk_pos, content, chunk_id=chunk_id) def convert_chunk(self, fd, chunk_id): meta, raw_meta = read_chunk_metadata(fd, chunk_id, for_conversion=True) links = meta.get('links', dict()) for chunk_id2, fullpath2 in links.items(): self.decode_fullpath(fullpath2) fullpath = meta.get('full_path') if fullpath is not None: self.decode_fullpath(fullpath) if meta.get('oio_version') == OIO_VERSION: return True, meta raw_chunk_id = None chunk_id = chunk_id.upper() chunk_pos = meta['chunk_pos'] container_id = meta['container_id'].upper() path = meta['content_path'] version = meta['content_version'] content_id = meta['content_id'].upper() new_fullpaths = dict() xattr_to_remove = list() success = True for k, v in raw_meta.items(): # fetch raw chunk ID if k == XATTR_CHUNK_ID: raw_chunk_id = v.upper() # search old fullpaths if not k.startswith(XATTR_OLD_FULLPATH) \ or not is_hexa(k[4:], size=64): continue try: account2, container2, container_id2, path2, version2, \ content_id2 = self.decode_old_fullpath(v) if meta['chunk_id'] == chunk_id \ and container_id == container_id2 \ and path == path2 \ and version == version2: if content_id2 is None: content_id2 = self.content_id_from_name(container_id2, path2, version2, search=True) chunk_id2, new_fullpath = self.encode_fullpath( fd, chunk_id, account2, container2, path2, version2, content_id2) new_fullpaths[chunk_id2] = new_fullpath else: chunk_id2, new_fullpath = self.get_chunk_id_and_fullpath( fd, chunk_pos, container_id2, path2, version2, account=account2, container=container2, content_id=content_id2) new_fullpaths[chunk_id2] = new_fullpath xattr_to_remove.append(k) except Exception as exc: success = False self.logger.warn('chunk_id=%s old_fullpath=%s: %s', chunk_id, k, exc) # old xattr if raw_chunk_id is not None: try: if raw_chunk_id != chunk_id and raw_chunk_id not in links: if raw_chunk_id not in new_fullpaths: meta2, _ = read_chunk_metadata(fd, raw_chunk_id) container_id2 = meta2['container_id'].upper() path2 = meta2['content_path'] version2 = meta2['content_version'] content_id2 = meta2['content_id'].upper() raw_chunk_id2, new_fullpath = \ self.get_chunk_id_and_fullpath( fd, chunk_pos, container_id2, path2, version2, chunk_id=raw_chunk_id, content_id=content_id2) new_fullpaths[raw_chunk_id2] = new_fullpath elif raw_chunk_id == chunk_id and fullpath is None: if raw_chunk_id not in new_fullpaths: raw_chunk_id2, new_fullpath = \ self.get_chunk_id_and_fullpath( fd, chunk_pos, container_id, path, version, chunk_id=raw_chunk_id, content_id=content_id) new_fullpaths[raw_chunk_id2] = new_fullpath except Exception as exc: success = False self.logger.warn('chunk_id=%s (old xattr): %s', raw_chunk_id, exc) self.save_xattr(fd, chunk_id, raw_meta) if self.dry_run: self.logger.info( "[dryrun] Converting chunk %s: success=%s new_fullpaths=%s " "xattr_to_remove=%s", chunk_id, str(success), str(new_fullpaths), str(xattr_to_remove)) else: # for security, if there is an error, we don't delete old xattr set_fullpath_xattr(fd, new_fullpaths, success, xattr_to_remove) return success, None def is_fullpath_error(self, err): if (isinstance(err, MissingAttribute) and (err.attribute.startswith(CHUNK_XATTR_CONTENT_FULLPATH_PREFIX) or err.attribute == CHUNK_XATTR_KEYS['content_path'] or err.attribute.startswith(XATTR_OLD_FULLPATH))): return True elif isinstance(err, FaultyChunk): return any(self.is_fullpath_error(x) for x in err.args) return False def safe_convert_chunk(self, path, chunk_id=None): if chunk_id is None: chunk_id = path.rsplit('/', 1)[-1] if len(chunk_id) != STRLEN_CHUNKID: self.logger.warn('Not a chunk %s' % path) return for char in chunk_id: if char not in hexdigits: self.logger.warn('Not a chunk %s' % path) return success = False self.total_chunks_processed += 1 try: with open(path) as fd: success, _ = self.convert_chunk(fd, chunk_id) except (FaultyChunk, MissingAttribute) as err: if self.is_fullpath_error(err): self.logger.warn( "Cannot convert %s: %s, will try to recover 'fullpath'", path, err) try: success = self.recover_chunk_fullpath(path, chunk_id) except Exception as err2: self.logger.error('Could not recover fullpath: %s', err2) else: self.logger.exception('ERROR while converting %s', path) except Exception: self.logger.exception('ERROR while converting %s', path) if not success: self.errors += 1 else: self.logger.debug('Converted %s', path) self.passes += 1 def recover_chunk_fullpath(self, path, chunk_id=None): if not chunk_id: chunk_id = path.rsplit('/', 1)[-1] # 1. Fetch chunk list from rdir (could be cached). # Unfortunately we cannot seek for a chunk ID. entries = [ x for x in self.rdir.chunk_fetch(self.volume_id, limit=-1) if x[2] == chunk_id ] if not entries: raise KeyError('Chunk %s not found in rdir' % chunk_id) elif len(entries) > 1: self.logger.info('Chunk %s appears in %d objects', chunk_id, len(entries)) # 2. Find content and container IDs cid, content_id = entries[0][0:2] # 3a. Call ContainerClient.content_locate() # with the container ID and content ID try: meta, chunks = self.container_client.content_locate( cid=cid, content=content_id) except NotFound as err: raise OrphanChunk('Cannot check %s is valid: %s' % (path, err)) # 3b. Resolve container ID into account and container names. # FIXME(FVE): get account and container names from meta1 cmeta = self.container_client.container_get_properties(cid=cid) aname = cmeta['system']['sys.account'] cname = cmeta['system']['sys.user.name'] fullpath = encode_fullpath(aname, cname, meta['name'], meta['version'], content_id) # 4. Check if the chunk actually belongs to the object chunk_url = 'http://%s/%s' % (self.volume_id, chunk_id) if chunk_url not in [x['url'] for x in chunks]: raise OrphanChunk('Chunk %s not found in object %s' % (chunk_url, fullpath)) # 5. Regenerate the fullpath with open(path, 'w') as fd: set_fullpath_xattr(fd, {chunk_id: fullpath}) return True def _fetch_chunks_from_file(self, input_file): with open(input_file, 'r') as ifile: for line in ifile: chunk_path = line.strip() if chunk_path and not chunk_path.startswith('#'): yield self.volume + '/' + chunk_path def paths_gen(self, input_file=None): if input_file: return self._fetch_chunks_from_file(input_file) else: return paths_gen(self.volume) def converter_pass(self, input_file=None): def report(tag, now=None): if now is None: now = time.time() total_time = now - self.start_time self.logger.info( '%(tag)s %(volume)s ' 'started=%(start_time)s ' 'passes=%(passes)d ' 'errors=%(errors)d ' 'chunks=%(nb_chunks)d %(c_rate).2f/s ' 'total_time=%(total_time).2f ' '(converter: %(success_rate).2f%%)' % { 'tag': tag, 'volume': self.volume_id, 'start_time': datetime.fromtimestamp(int(self.start_time)).isoformat(), 'passes': self.passes, 'errors': self.errors, 'nb_chunks': self.total_chunks_processed, 'c_rate': self.total_chunks_processed / total_time, 'total_time': total_time, 'success_rate': 100 * ((self.total_chunks_processed - self.errors) / (float(self.total_chunks_processed) or 1.0)) }) self.passes = 0 self.last_reported = now self.start_time = time.time() self.errors = 0 self.passes = 0 self.backup_name = 'backup_%s_%f' % (self.volume_id, self.start_time) paths = self.paths_gen(input_file=input_file) for path in paths: self.safe_convert_chunk(path) now = time.time() if now - self.last_reported >= self.report_interval: report('RUN', now=now) self.chunks_run_time = ratelimit(self.chunks_run_time, self.max_chunks_per_second) report('DONE') return self.errors == 0
class ObjectStorageApi(object): """ The Object Storage API. High level API that wraps `AccountClient`, `ContainerClient` and `DirectoryClient` classes. """ def __init__(self, namespace, **kwargs): """ Initialize the object storage API. :param namespace: name of the namespace to interract with :type namespace: `str` :keyword connection_timeout: connection timeout towards rawx services :type connection_timeout: `float` seconds :keyword read_timeout: timeout for rawx responses and data reads from the caller (when uploading) :type read_timeout: `float` seconds :keyword write_timeout: timeout for rawx write requests :type write_timeout: `float` seconds """ self.namespace = namespace self.connection_timeout = utils.float_value( kwargs.get("connection_timeout"), None) self.read_timeout = utils.float_value(kwargs.get("read_timeout"), None) self.write_timeout = utils.float_value(kwargs.get("write_timeout"), None) # FIXME: share session between all the clients self.directory = DirectoryClient({"namespace": self.namespace}, **kwargs) self.account = AccountClient({"namespace": self.namespace}, **kwargs) self.container = ContainerClient({"namespace": self.namespace}, **kwargs) def account_create(self, account, headers=None): """ Create an account. :param account: name of the account to create :type account: `str` :returns: `True` if the account has been created """ return self.account.account_create(account, headers=headers) @handle_account_not_found def account_delete(self, account, headers=None): """ Delete an account. :param account: name of the account to delete :type account: `str` """ self.account.account_delete(account, headers=headers) @handle_account_not_found def account_show(self, account, headers=None): """ Get information about an account. """ return self.account.account_show(account, headers=headers) def account_list(self, headers=None): """ List accounts """ return self.account.account_list(headers=headers) # FIXME: @handle_account_not_found def account_update(self, account, metadata, to_delete=None, headers=None): self.account.account_update(account, metadata, to_delete, headers=headers) @handle_account_not_found def account_set_properties(self, account, properties, headers=None): self.account_update(account, properties, headers=headers) @handle_account_not_found def account_del_properties(self, account, properties, headers=None): self.account_update(account, None, properties, headers=headers) def container_create(self, account, container, properties=None, headers=None, **kwargs): """ Create a container. :param account: account in which to create the container :type account: `str` :param container: name of the container :type container: `str` :param properties: properties to set on the container :type properties: `dict` :keyword headers: extra headers to send to the proxy :type headers: `dict` :returns: True if the container has been created, False if it already exists """ return self.container.container_create(account, container, properties=properties, headers=headers, autocreate=True, **kwargs) @handle_container_not_found def container_touch(self, account, container, headers=None, **kwargs): """ Trigger a notification about the container state. :param account: account from which to delete the container :type account: `str` :param container: name of the container :type container: `str` :keyword headers: extra headers to send to the proxy :type headers: `dict` """ if not headers: headers = dict() if 'X-oio-req-id' not in headers: headers['X-oio-req-id'] = utils.request_id() self.container.container_touch(account, container, headers=headers, **kwargs) def container_create_many(self, account, containers, properties=None, headers=None, **kwargs): """ Create Many containers :param account: account in which to create the containers :type account: `str` :param containers: names of the containers :type containers: `list` :param properties: properties to set on the containers :type properties: `dict` :keyword headers: extra headers to send to the proxy :type headers: `dict` """ return self.container.container_create_many(account, containers, properties=properties, headers=headers, autocreate=True, **kwargs) @handle_container_not_found def container_delete(self, account, container, headers=None, **kwargs): """ Delete a container. :param account: account from which to delete the container :type account: `str` :param container: name of the container :type container: `str` :keyword headers: extra headers to send to the proxy :type headers: `dict` """ self.container.container_delete(account, container, headers=headers, **kwargs) @handle_account_not_found def container_list(self, account, limit=None, marker=None, end_marker=None, prefix=None, delimiter=None, headers=None): """ Get the list of containers of an account. :param account: account from which to get the container list :type account: `str` :keyword limit: maximum number of results to return :type limit: `int` :keyword marker: name of the container from where to start the listing :type marker: `str` :keyword end_marker: :keyword prefix: :keyword delimiter: :keyword headers: extra headers to send to the proxy :type headers: `dict` """ resp = self.account.container_list(account, limit=limit, marker=marker, end_marker=end_marker, prefix=prefix, delimiter=delimiter, headers=headers) return resp["listing"] @handle_container_not_found def container_show(self, account, container, headers=None): """ Get information about a container (user properties). :param account: account in which the container is :type account: `str` :param container: name of the container :type container: `str` :keyword headers: extra headers to send to the proxy :type headers: `dict` :returns: a `dict` with "properties" containing a `dict` of user properties. """ return self.container.container_show(account, container, headers=headers) @handle_container_not_found def container_get_properties(self, account, container, properties=None, headers=None): """ Get information about a container (user and system properties). :param account: account in which the container is :type account: `str` :param container: name of the container :type container: `str` :param properties: *ignored* :keyword headers: extra headers to send to the proxy :type headers: `dict` :returns: a `dict` with "properties" and "system" entries, containing respectively a `dict` of user properties and a `dict` of system properties. """ return self.container.container_get_properties(account, container, properties=properties, headers=headers) @handle_container_not_found def container_set_properties(self, account, container, properties=None, clear=False, headers=None, **kwargs): """ Set properties on a container. :param account: name of the account :type account: `str` :param container: name of the container where to set properties :type container: `str` :param properties: a dictionary of properties :type properties: `dict` :param clear: :type clear: `bool` :param headers: extra headers to pass to the proxy :type headers: `dict` :keyword system: dictionary of system properties to set """ return self.container.container_set_properties(account, container, properties, clear=clear, headers=headers, **kwargs) @handle_container_not_found def container_del_properties(self, account, container, properties, headers=None, **kwargs): return self.container.container_del_properties(account, container, properties, headers=headers, **kwargs) def container_update(self, account, container, metadata, clear=False, headers=None): if not metadata: self.container_del_properties(account, container, [], headers=headers) else: self.container_set_properties(account, container, metadata, clear, headers=headers) @handle_container_not_found def object_create(self, account, container, file_or_path=None, data=None, etag=None, obj_name=None, mime_type=None, metadata=None, policy=None, headers=None, key_file=None, **_kwargs): """ Create an object in *container* of *account* with data taken from either *data* (`str` or `generator`) or *file_or_path* (path to a file or file-like object). The object will be named after *obj_name* if specified, or after the base name of *file_or_path*. :param account: name of the account where to create the object :type account: `str` :param container: name of the container where to create the object :type container: `str` :param file_or_path: file-like object or path to a file from which to read object data :type file_or_path: `str` or file-like object :param data: object data (if `file_or_path` is not set) :type data: `str` or `generator` :keyword etag: entity tag of the object :type etag: `str` :keyword obj_name: name of the object to create. If not set, will use the base name of `file_or_path`. :keyword mime_type: MIME type of the object :type mime_type: `str` :keyword properties: a dictionary of properties :type properties: `dict` :keyword policy: name of the storage policy :type policy: `str` :param headers: extra headers to pass to the proxy :type headers: `dict` :keyword key_file: """ if (data, file_or_path) == (None, None): raise exc.MissingData() src = data if data is not None else file_or_path if src is file_or_path: if isinstance(file_or_path, basestring): if not os.path.exists(file_or_path): raise exc.FileNotFound("File '%s' not found." % file_or_path) file_name = os.path.basename(file_or_path) else: try: file_name = os.path.basename(file_or_path.name) except AttributeError: file_name = None obj_name = obj_name or file_name elif isgenerator(src): file_or_path = utils.GeneratorIO(src) src = file_or_path if not obj_name: raise exc.MissingName("No name for the object has been specified") sysmeta = {'mime_type': mime_type, 'etag': etag} if not headers: headers = dict() if 'X-oio-req-id' not in headers: headers['X-oio-req-id'] = utils.request_id() if src is data: return self._object_create(account, container, obj_name, BytesIO(data), sysmeta, properties=metadata, policy=policy, headers=headers, key_file=key_file) elif hasattr(file_or_path, "read"): return self._object_create(account, container, obj_name, src, sysmeta, properties=metadata, policy=policy, headers=headers, key_file=key_file) else: with open(file_or_path, "rb") as f: return self._object_create(account, container, obj_name, f, sysmeta, properties=metadata, policy=policy, headers=headers, key_file=key_file) def object_touch(self, account, container, obj, headers=None, **kwargs): """ Trigger a notification about an object (as if it just had been created). :param account: name of the account where to create the object :type account: `str` :param container: name of the container where to create the object :type container: `str` :param obj: name of the object to touch :param headers: extra headers to pass to the proxy """ if not headers: headers = dict() if 'X-oio-req-id' not in headers: headers['X-oio-req-id'] = utils.request_id() self.container.content_touch(account, container, obj, headers=headers, **kwargs) @handle_object_not_found def object_delete(self, account, container, obj, headers=None, **kwargs): if not headers: headers = dict() if 'X-oio-req-id' not in headers: headers['X-oio-req-id'] = utils.request_id() return self.container.content_delete(account, container, obj, headers=headers, **kwargs) def object_delete_many(self, account, container, objs, headers=None, **kwargs): if not headers: headers = dict() if 'X-oio-req-id' not in headers: headers['X-oio-req-id'] = utils.request_id() return self.container.content_delete_many(account, container, objs, headers=headers, **kwargs) @handle_container_not_found def object_list(self, account, container, limit=None, marker=None, delimiter=None, prefix=None, end_marker=None, headers=None, properties=False, **kwargs): """ Lists objects inside a container. :returns: a dict which contains * 'objects': the list of objects * 'prefixes': common prefixes (only if delimiter and prefix are set) * 'properties': a dict of container properties * 'system': system metadata """ _, resp_body = self.container.content_list(account, container, limit=limit, marker=marker, end_marker=end_marker, prefix=prefix, delimiter=delimiter, properties=properties, headers=headers, **kwargs) for obj in resp_body['objects']: mtype = obj.get('mime-type') if mtype: obj['mime_type'] = mtype del obj['mime-type'] return resp_body # FIXME: @handle_object_not_found def object_locate(self, account, container, obj, headers=None): obj_meta, body = self.container.content_locate(account, container, obj) return obj_meta, body def object_analyze(self, *args, **kwargs): """ :deprecated: use `object_locate` """ return self.object_locate(*args, **kwargs) def object_fetch(self, account, container, obj, ranges=None, headers=None, key_file=None): if not headers: headers = dict() if 'X-oio-req-id' not in headers: headers['X-oio-req-id'] = utils.request_id() meta, raw_chunks = self.object_locate(account, container, obj, headers=headers) chunk_method = meta['chunk_method'] storage_method = STORAGE_METHODS.load(chunk_method) chunks = _sort_chunks(raw_chunks, storage_method.ec) meta['container_id'] = utils.name2cid(account, container).upper() meta['ns'] = self.namespace if storage_method.ec: stream = self._fetch_stream_ec(meta, chunks, ranges, storage_method, headers) elif storage_method.backblaze: stream = self._fetch_stream_backblaze(meta, chunks, ranges, storage_method, key_file) else: stream = self._fetch_stream(meta, chunks, ranges, storage_method, headers) return meta, stream @handle_object_not_found def object_get_properties(self, account, container, obj, headers=None): return self.container.content_get_properties(account, container, obj) def object_show(self, account, container, obj, headers=None): """ Get a description of the content along with its user properties. :param account: name of the account in which the object is stored :param container: name of the container in which the object is stored :param obj: name of the object to query :returns: a `dict` describing the object .. python:: {'hash': '6BF60C17CC15EEA108024903B481738F', 'ctime': '1481031763', 'deleted': 'False', 'properties': { u'projet': u'OpenIO-SDS'}, 'length': '43518', 'hash_method': 'md5', 'chunk_method': 'ec/algo=liberasurecode_rs_vand,k=6,m=3', 'version': '1481031762951972', 'policy': 'EC', 'id': '20BF2194FD420500CD4729AE0B5CBC07', 'mime_type': 'application/octet-stream', 'name': 'Makefile'} """ return self.container.content_show(account, container, obj, headers=headers) def object_update(self, account, container, obj, metadata, clear=False, headers=None): if clear: self.object_del_properties(account, container, obj, [], headers=headers) if metadata: self.object_set_properties(account, container, obj, metadata, headers=headers) @handle_object_not_found def object_set_properties(self, account, container, obj, properties, clear=False, headers=None, **kwargs): return self.container.content_set_properties( account, container, obj, properties={'properties': properties}, headers=headers, **kwargs) @handle_object_not_found def object_del_properties(self, account, container, obj, properties, headers=None, **kwargs): return self.container.content_del_properties(account, container, obj, properties=properties, headers=headers, **kwargs) # FIXME: remove and call self.container.content_prepare() directly def _content_prepare(self, account, container, obj_name, size, policy=None, headers=None): return self.container.content_prepare(account, container, obj_name, size, stgpol=policy, autocreate=True, headers=headers) def _content_preparer(self, account, container, obj_name, policy=None, headers=None): # TODO: optimize by asking more than one metachunk at a time obj_meta, first_body = self.container.content_prepare(account, container, obj_name, size=1, stgpol=policy, autocreate=True, headers=headers) storage_method = STORAGE_METHODS.load(obj_meta['chunk_method']) def _fix_mc_pos(chunks, mc_pos): for chunk in chunks: raw_pos = chunk["pos"].split(".") if storage_method.ec: chunk['num'] = int(raw_pos[1]) chunk["pos"] = "%d.%d" % (mc_pos, chunk['num']) else: chunk["pos"] = str(mc_pos) def _metachunk_preparer(): mc_pos = 0 _fix_mc_pos(first_body, mc_pos) yield first_body while True: mc_pos += 1 _, next_body = self._content_prepare(account, container, obj_name, 1, policy, headers) _fix_mc_pos(next_body, mc_pos) yield next_body return obj_meta, _metachunk_preparer def _object_create(self, account, container, obj_name, source, sysmeta, properties=None, policy=None, headers=None, key_file=None): obj_meta, chunk_prep = self._content_preparer(account, container, obj_name, policy=policy, headers=headers) obj_meta.update(sysmeta) obj_meta['content_path'] = obj_name obj_meta['container_id'] = utils.name2cid(account, container).upper() obj_meta['ns'] = self.namespace storage_method = STORAGE_METHODS.load(obj_meta['chunk_method']) if storage_method.ec: handler = ECWriteHandler( source, obj_meta, chunk_prep, storage_method, headers=headers, write_timeout=self.write_timeout, read_timeout=self.read_timeout, connection_timeout=self.connection_timeout) elif storage_method.backblaze: backblaze_info = self._b2_credentials(storage_method, key_file) handler = BackblazeWriteHandler(source, obj_meta, chunk_prep, storage_method, headers, backblaze_info) else: handler = ReplicatedWriteHandler( source, obj_meta, chunk_prep, storage_method, headers=headers, write_timeout=self.write_timeout, read_timeout=self.read_timeout, connection_timeout=self.connection_timeout) final_chunks, bytes_transferred, content_checksum = handler.stream() etag = obj_meta.get('etag') if etag and etag.lower() != content_checksum.lower(): raise exc.EtagMismatch("given etag %s != computed %s" % (etag, content_checksum)) obj_meta['etag'] = content_checksum data = {'chunks': final_chunks, 'properties': properties or {}} # FIXME: we may just pass **obj_meta self.container.content_create(account, container, obj_name, size=bytes_transferred, checksum=content_checksum, data=data, content_id=obj_meta['id'], stgpol=obj_meta['policy'], version=obj_meta['version'], mime_type=obj_meta['mime_type'], chunk_method=obj_meta['chunk_method'], headers=headers) return final_chunks, bytes_transferred, content_checksum def _fetch_stream(self, meta, chunks, ranges, storage_method, headers): total_bytes = 0 headers = headers or {} ranges = ranges or [(None, None)] meta_range_list = get_meta_ranges(ranges, chunks) for meta_range_dict in meta_range_list: for pos, meta_range in meta_range_dict.iteritems(): meta_start, meta_end = meta_range if meta_start is not None and meta_end is not None: headers['Range'] = http_header_from_ranges([meta_range]) reader = io.ChunkReader( iter(chunks[pos]), io.READ_CHUNK_SIZE, headers, connection_timeout=self.connection_timeout, response_timeout=self.read_timeout, read_timeout=self.read_timeout) try: it = reader.get_iter() except Exception as err: raise exc.OioException( "Error while downloading position %d: %s" % (pos, err)) for part in it: for d in part['iter']: total_bytes += len(d) yield d def _fetch_stream_ec(self, meta, chunks, ranges, storage_method, headers): ranges = ranges or [(None, None)] meta_range_list = get_meta_ranges(ranges, chunks) for meta_range_dict in meta_range_list: for pos, meta_range in meta_range_dict.iteritems(): meta_start, meta_end = meta_range handler = ECChunkDownloadHandler( storage_method, chunks[pos], meta_start, meta_end, headers, connection_timeout=self.connection_timeout, response_timeout=self.read_timeout, read_timeout=self.read_timeout) stream = handler.get_stream() for part_info in stream: for d in part_info['iter']: yield d stream.close() def _b2_credentials(self, storage_method, key_file): key_file = key_file or '/etc/oio/sds/b2-appkey.conf' try: return BackblazeUtils.get_credentials(storage_method, key_file) except BackblazeUtilsException as err: raise exc.ConfigurationException(str(err)) def _fetch_stream_backblaze(self, meta, chunks, ranges, storage_method, key_file): backblaze_info = self._b2_credentials(storage_method, key_file) total_bytes = 0 current_offset = 0 size = None offset = 0 for pos in range(len(chunks)): if ranges: offset = ranges[pos][0] size = ranges[pos][1] if size is None: size = int(meta["length"]) chunk_size = int(chunks[pos][0]["size"]) if total_bytes >= size: break if current_offset + chunk_size > offset: if current_offset < offset: _offset = offset - current_offset else: _offset = 0 if chunk_size + total_bytes > size: _size = size - total_bytes else: _size = chunk_size handler = BackblazeChunkDownloadHandler( meta, chunks[pos], _offset, _size, backblaze_info=backblaze_info) stream = handler.get_stream() if not stream: raise exc.OioException("Error while downloading") total_bytes += len(stream) yield stream current_offset += chunk_size
class TestContentRebuildFilter(BaseTestCase): def setUp(self): super(TestContentRebuildFilter, self).setUp() self.namespace = self.conf['namespace'] self.gridconf = {"namespace": self.namespace} self.container = "TestContentRebuildFilter%f" % time.time() self.ref = self.container self.container_client = ContainerClient(self.conf) self.container_client.container_create(self.account, self.container) syst = self.container_client.container_get_properties( self.account, self.container)['system'] self.container_id = syst['sys.name'].split('.', 1)[0] self.object_storage_api = ObjectStorageApi(namespace=self.namespace) queue_addr = choice(self.conf['services']['beanstalkd'])['addr'] self.queue_url = queue_addr self.conf['queue_url'] = 'beanstalk://' + self.queue_url self.conf['tube'] = DEFAULT_REBUILDER_TUBE self.notify_filter = NotifyFilter(app=_App, conf=self.conf) bt = Beanstalk.from_url(self.conf['queue_url']) bt.drain_tube(DEFAULT_REBUILDER_TUBE) bt.close() def _create_event(self, content_name, present_chunks, missing_chunks, content_id): event = {} event["when"] = time.time() event["event"] = "storage.content.broken" event["data"] = { "present_chunks": present_chunks, "missing_chunks": missing_chunks } event["url"] = { "ns": self.namespace, "account": self.account, "user": self.container, "path": content_name, "id": self.container_id, "content": content_id } return event def _is_chunks_created(self, previous, after, pos_created): remain = list(after) for p in previous: for r in remain: if p["url"] == r["url"]: remain.remove(r) break if len(remain) != len(pos_created): return False for r in remain: if r["pos"] in pos_created: remain.remove(r) else: return False return True def _rebuild(self, event, job_id=0): self.blob_rebuilder = subprocess.Popen([ 'oio-blob-rebuilder', self.namespace, '--beanstalkd=' + self.queue_url ]) time.sleep(3) self.blob_rebuilder.kill() def _remove_chunks(self, chunks, content_id): if not chunks: return for chunk in chunks: chunk['id'] = chunk['url'] chunk['content'] = content_id chunk['type'] = 'chunk' self.container_client.container_raw_delete(self.account, self.container, data=chunks) def _check_rebuild(self, content_name, chunks, missing_pos, meta, chunks_to_remove, chunk_created=True): self._remove_chunks(chunks_to_remove, meta['id']) event = self._create_event(content_name, chunks, missing_pos, meta['id']) self.notify_filter.process(env=event, cb=None) self._rebuild(event) _, after = self.object_storage_api.object_locate( container=self.container, obj=content_name, account=self.account) self.assertIs(chunk_created, self._is_chunks_created(chunks, after, missing_pos)) def test_nothing_missing(self): content_name = "test_nothing_missing" self.object_storage_api.object_create(account=self.account, container=self.container, data="test", policy="THREECOPIES", obj_name=content_name) meta, chunks = self.object_storage_api.object_locate( container=self.container, obj=content_name, account=self.account) chunks_to_remove = [] for chunk in chunks: chunk.pop('score', None) missing_pos = [] self._check_rebuild(content_name, chunks, missing_pos, meta, chunks_to_remove, chunk_created=True) def test_missing_1_chunk(self): content_name = "test_missing_1_chunk" self.object_storage_api.object_create(account=self.account, container=self.container, data="test", policy="THREECOPIES", obj_name=content_name) meta, chunks = self.object_storage_api.object_locate( container=self.container, obj=content_name, account=self.account) chunks_to_remove = [] chunks_to_remove.append(chunks.pop(0)) for chunk in chunks: chunk.pop('score', None) missing_pos = ["0"] self._check_rebuild(content_name, chunks, missing_pos, meta, chunks_to_remove) def test_missing_last_chunk(self): content_name = "test_missing_last_chunk" data = random_str(1024 * 1024 * 4) self.object_storage_api.object_create(account=self.account, container=self.container, data=data, policy="THREECOPIES", obj_name=content_name) meta, chunks = self.object_storage_api.object_locate( container=self.container, obj=content_name, account=self.account) chunks_to_remove = [] chunks_to_remove.append(chunks.pop(0)) for chunk in chunks: chunk.pop('score', None) missing_pos = ["3"] self._check_rebuild(content_name, chunks, missing_pos, meta, chunks_to_remove) def test_missing_2_chunks(self): content_name = "test_missing_2_chunks" self.object_storage_api.object_create(account=self.account, container=self.container, data="test", policy="THREECOPIES", obj_name=content_name) meta, chunks = self.object_storage_api.object_locate( container=self.container, obj=content_name, account=self.account) chunks_to_remove = [] for i in range(0, 2): chunks_to_remove.append(chunks.pop(0)) for chunk in chunks: chunk.pop('score', None) missing_pos = ["0", "0"] self._check_rebuild(content_name, chunks, missing_pos, meta, chunks_to_remove) def test_missing_all_chunks(self): content_name = "test_missing_all_chunks" self.object_storage_api.object_create(account=self.account, container=self.container, data="test", policy="SINGLE", obj_name=content_name) meta, chunks = self.object_storage_api.object_locate( container=self.container, obj=content_name, account=self.account) chunks_to_remove = [] chunks_to_remove.append(chunks.pop(0)) for chunk in chunks: chunk.pop('score', None) missing_pos = ["0"] self._check_rebuild(content_name, chunks, missing_pos, meta, chunks_to_remove, chunk_created=False) def test_missing_all_chunks_of_a_pos(self): content_name = "test_missing_2_chunks" self.object_storage_api.object_create(account=self.account, container=self.container, data="test", policy="THREECOPIES", obj_name=content_name) meta, chunks = self.object_storage_api.object_locate( container=self.container, obj=content_name, account=self.account) chunks_to_remove = [] for i in range(0, 3): chunks_to_remove.append(chunks.pop(0)) for chunk in chunks: chunk.pop('score', None) missing_pos = ["0"] self._check_rebuild(content_name, chunks, missing_pos, meta, chunks_to_remove, chunk_created=False) def test_missing_multiple_chunks(self): content_name = "test_missing_multiple_chunks" data = random_str(1024 * 1024 * 4) self.object_storage_api.object_create(account=self.account, container=self.container, data=data, policy="THREECOPIES", obj_name=content_name) meta, chunks = self.object_storage_api.object_locate( container=self.container, obj=content_name, account=self.account) chunks_to_remove = [] chunks_to_remove.append(chunks.pop(9)) chunks_to_remove.append(chunks.pop(6)) chunks_to_remove.append(chunks.pop(4)) chunks_to_remove.append(chunks.pop(0)) for chunk in chunks: chunk.pop('score', None) missing_pos = ["0", "1", "2", "3"] self._check_rebuild(content_name, chunks, missing_pos, meta, chunks_to_remove) def test_missing_1_chunk_ec(self): if len(self.conf['services']['rawx']) < 9: self.skipTest("Not enough rawx. " "EC tests needs at least 9 rawx to run") content_name = "test_missing_1_chunk_ec" self.object_storage_api.object_create(account=self.account, container=self.container, data="test", policy="EC", obj_name=content_name) meta, chunks = self.object_storage_api.object_locate( container=self.container, obj=content_name, account=self.account) chunks_to_remove = [] chunks_to_remove.append(chunks.pop(0)) for chunk in chunks: chunk.pop('score', None) missing_pos = ["0.1"] self._check_rebuild(content_name, chunks, missing_pos, meta, chunks_to_remove) def test_missing_m_chunk_ec(self): if len(self.conf['services']['rawx']) < 9: self.skipTest("Not enough rawx. " "EC tests needs at least 9 rawx to run") content_name = "test_missing_m_chunk_ec" self.object_storage_api.object_create(account=self.account, container=self.container, data="test", policy="EC", obj_name=content_name) meta, chunks = self.object_storage_api.object_locate( container=self.container, obj=content_name, account=self.account) chunks_to_remove = [] for i in range(0, 3): chunks_to_remove.append(chunks.pop(0)) for chunk in chunks: chunk.pop('score', None) missing_pos = ["0.1", "0.2", "0.3"] self._check_rebuild(content_name, chunks, missing_pos, meta, chunks_to_remove) def test_missing_m_chunk_ec_2(self): if len(self.conf['services']['rawx']) < 9: self.skipTest("Not enough rawx. " "EC tests needs at least 9 rawx to run") content_name = "test_missing_m_chunk_ec" self.object_storage_api.object_create(account=self.account, container=self.container, data="test", policy="EC", obj_name=content_name) meta, chunks = self.object_storage_api.object_locate( container=self.container, obj=content_name, account=self.account) chunks_to_remove = [] chunks_to_remove.append(chunks.pop(0)) chunks_to_remove.append(chunks.pop(3)) chunks_to_remove.append(chunks.pop(5)) for chunk in chunks: chunk.pop('score', None) missing_pos = ["0.1", "0.5", "0.8"] self._check_rebuild(content_name, chunks, missing_pos, meta, chunks_to_remove) def test_missing_m1_chunk_ec(self): if len(self.conf['services']['rawx']) < 9: self.skipTest("Not enough rawx. " "EC tests needs at least 9 rawx to run") content_name = "test_missing_m1_chunk_ec" self.object_storage_api.object_create(account=self.account, container=self.container, data="test", policy="EC", obj_name=content_name) meta, chunks = self.object_storage_api.object_locate( container=self.container, obj=content_name, account=self.account) chunks_to_remove = [] chunks_to_remove.append(chunks.pop(0)) chunks_to_remove.append(chunks.pop(0)) chunks_to_remove.append(chunks.pop(0)) chunks_to_remove.append(chunks.pop(0)) for chunk in chunks: chunk.pop('score', None) missing_pos = ["0.1", "0.2", "0.3", "0.4"] self._check_rebuild(content_name, chunks, missing_pos, meta, chunks_to_remove, chunk_created=False)
class ContentFactory(object): DEFAULT_DATASEC = "plain", {"nb_copy": "1", "distance": "0"} def __init__(self, conf, **kwargs): self.conf = conf self.logger = get_logger(conf) self.container_client = ContainerClient(conf, logger=self.logger, **kwargs) def get(self, container_id, content_id, account=None, container_name=None): try: meta, chunks = self.container_client.content_locate( cid=container_id, content=content_id) except NotFound: raise ContentNotFound("Content %s/%s not found" % (container_id, content_id)) chunk_method = meta['chunk_method'] storage_method = STORAGE_METHODS.load(chunk_method) if not account or not container_name: container_info = self.container_client.container_get_properties( cid=container_id)['system'] if not account: account = container_info['sys.account'] if not container_name: container_name = container_info['sys.user.name'] cls = ECContent if storage_method.ec else PlainContent return cls(self.conf, container_id, meta, chunks, storage_method, account, container_name, container_client=self.container_client) def new(self, container_id, path, size, policy, account=None, container_name=None): meta, chunks = self.container_client.content_prepare(cid=container_id, path=path, size=size, stgpol=policy) chunk_method = meta['chunk_method'] storage_method = STORAGE_METHODS.load(chunk_method) if not account or not container_name: container_info = self.container_client.container_get_properties( cid=container_id)['system'] if not account: account = container_info['sys.account'] if not container_name: container_name = container_info['sys.user.name'] cls = ECContent if storage_method.ec else PlainContent return cls(self.conf, container_id, meta, chunks, storage_method, account, container_name) def copy(self, origin, policy=None): if not policy: policy = origin.policy metadata = origin.metadata.copy() new_metadata, chunks = self.container_client.content_prepare( cid=origin.container_id, path=metadata['name'], size=metadata['length'], stgpol=policy) metadata['chunk_method'] = new_metadata['chunk_method'] metadata['chunk_size'] = new_metadata['chunk_size'] # We must use a new content_id since we change the data metadata['id'] = new_metadata['id'] # We may want to keep the same version, but it is denied by meta2 metadata['version'] = int(metadata['version']) + 1 metadata['policy'] = new_metadata['policy'] # FIXME: meta2 does not allow us to set ctime # and thus the object will appear as new. storage_method = STORAGE_METHODS.load(metadata['chunk_method']) cls = ECContent if storage_method.ec else PlainContent return cls(self.conf, origin.container_id, metadata, chunks, storage_method, origin.account, origin.container_name) def change_policy(self, container_id, content_id, new_policy): old_content = self.get(container_id, content_id) if old_content.policy == new_policy: return old_content new_content = self.copy(old_content, policy=new_policy) stream = old_content.fetch() new_content.create(GeneratorIO(stream)) # the old content is automatically deleted because the new content has # the same name (but not the same id) return new_content