def check_volume_for_service_type(volume_path, required_type): """ Check if `volume_path` points to a directory for the specified service type. :returns: the namespace name and the service ID :raises oio.common.exceptions.OioException: when the specified path does not belong to a service from the specified type or is missing some attributes. """ msg_pfx = 'Invalid volume path [%s]: ' % volume_path meta = read_user_xattr(volume_path) server_type = meta.get(volume_xattr_keys['type']) if server_type is None: raise exc.OioException(msg_pfx + 'missing %s xattr' % volume_xattr_keys['type']) if server_type != required_type: raise exc.OioException( msg_pfx + 'service is a {0}, not a {1}'.format(server_type, required_type)) namespace = meta.get(volume_xattr_keys['namespace']) server_id = meta.get(volume_xattr_keys['id']) if server_id is None: raise exc.OioException(msg_pfx + 'missing %s xattr' % volume_xattr_keys['id']) elif namespace is None: raise exc.OioException(msg_pfx + 'missing %s xattr' % volume_xattr_keys['namespace']) return namespace, server_id
def check_volume(volume_path): meta = read_user_xattr(volume_path) server_type = meta.get(volume_xattr_keys['type']) if server_type != 'rawx': raise exc.OioException('Invalid volume path') namespace = meta.get(volume_xattr_keys['namespace']) server_id = meta.get(volume_xattr_keys['id']) if namespace is None or server_id is None: raise exc.OioException('Invalid rawx volume path') return namespace, server_id
def stream(self, source, size): writers = self._get_writers() failed_chunks = [] current_writers = [] for writer, chunk in writers: if not writer: failed_chunks.append(chunk) else: current_writers.append(writer) # write the data bytes_transferred = self._stream(source, size, current_writers) # get the chunks from writers chunks, quorum = self._get_results(current_writers) if not quorum: logger.error('Quorum not reached during write') raise exc.OioException('Write failure') meta_checksum = self.checksum.hexdigest() final_chunks = chunks + failed_chunks return bytes_transferred, meta_checksum, final_chunks
def _fetch_stream(self, meta, chunks, ranges, storage_method, headers): total_bytes = 0 headers = headers or {} ranges = ranges or [(None, None)] meta_range_list = get_meta_ranges(ranges, chunks) for meta_range_dict in meta_range_list: for pos, meta_range in meta_range_dict.iteritems(): meta_start, meta_end = meta_range if meta_start is not None and meta_end is not None: headers['Range'] = http_header_from_ranges([meta_range]) reader = io.ChunkReader( iter(chunks[pos]), io.READ_CHUNK_SIZE, headers, connection_timeout=self.connection_timeout, response_timeout=self.read_timeout, read_timeout=self.read_timeout) try: it = reader.get_iter() except Exception as err: raise exc.OioException( "Error while downloading position %d: %s" % (pos, err)) for part in it: for d in part['iter']: total_bytes += len(d) yield d
def quorum_or_fail(self, successes, failures): """ Compare the number of uploads against the quorum. :param successes: a list of chunk objects whose upload succeded :type successes: `list` or `tuple` :param failures: a list of chunk objects whose upload failed :type failures: `list` or `tuple` :raises `exc.SourceReadError`: if there is an error while reading data from the client :raises `exc.SourceReadTimeout`: if there is a timeout while reading data from the client :raises `exc.OioTimeout`: if there is a timeout among the errors :raises `exc.OioException`: if quorum has not been reached for any other reason """ if len(successes) < self.quorum: errors = group_chunk_errors( ((chunk["url"], chunk.get("error", "success")) for chunk in successes + failures)) new_exc = exc.OioException( "RAWX write failure, quorum not reached (%d/%d): %s" % (len(successes), self.quorum, errors)) for err in [x.get('error') for x in failures]: if isinstance(err, exc.SourceReadError): raise exc.SourceReadError(new_exc) elif isinstance(err, green.SourceReadTimeout): # Never raise 'green' timeouts out of our API raise exc.SourceReadTimeout(new_exc) elif isinstance(err, (exc.OioTimeout, green.OioTimeout)): raise exc.OioTimeout(new_exc) raise new_exc
def get_stream(self): range_infos = self._get_range_infos() chunk_iter = iter(self.chunks) # we use eventlet GreenPool to manage readers with green.ContextPool(self.storage_method.ec_nb_data) as pool: pile = GreenPile(pool) # we use eventlet GreenPile to spawn readers for _j in range(self.storage_method.ec_nb_data): pile.spawn(self._get_fragment, chunk_iter, range_infos, self.storage_method) readers = [] for reader, parts_iter in pile: if reader.status in (200, 206): readers.append((reader, parts_iter)) # TODO log failures? # with EC we need at least ec_nb_data valid readers if len(readers) >= self.storage_method.ec_nb_data: # all readers should return the same Content-Length # so just take the headers from one of them resp_headers = HeadersDict(readers[0][0].headers) fragment_length = int(resp_headers.get('Content-Length')) read_iterators = [it for _, it in readers] stream = ECStream(self.storage_method, read_iterators, range_infos, self.meta_length, fragment_length) # start the stream stream.start() return stream else: raise exceptions.OioException("Not enough valid sources to read")
def _fetch_stream_backblaze(self, meta, chunks, ranges, storage_method, key_file): backblaze_info = self._b2_credentials(storage_method, key_file) total_bytes = 0 current_offset = 0 size = None offset = 0 for pos in range(len(chunks)): if ranges: offset = ranges[pos][0] size = ranges[pos][1] if size is None: size = int(meta["length"]) chunk_size = int(chunks[pos][0]["size"]) if total_bytes >= size: break if current_offset + chunk_size > offset: if current_offset < offset: _offset = offset - current_offset else: _offset = 0 if chunk_size + total_bytes > size: _size = size - total_bytes else: _size = chunk_size handler = BackblazeChunkDownloadHandler( meta, chunks[pos], _offset, _size, backblaze_info=backblaze_info) stream = handler.get_stream() if not stream: raise exc.OioException("Error while downloading") total_bytes += len(stream) yield stream current_offset += chunk_size
def fetch_stream(chunks, ranges, storage_method, headers=None, **kwargs): ranges = ranges or [(None, None)] meta_range_list = get_meta_ranges(ranges, chunks) for meta_range_dict in meta_range_list: for pos in sorted(meta_range_dict.keys()): meta_start, meta_end = meta_range_dict[pos] if meta_start is not None and meta_end is not None: headers['Range'] = http_header_from_ranges( (meta_range_dict[pos], )) reader = ChunkReader(iter(chunks[pos]), READ_CHUNK_SIZE, headers=headers, **kwargs) try: it = reader.get_iter() except exc.NotFound as err: raise exc.UnrecoverableContent( "Cannot download position %d: %s" % (pos, err)) except Exception as err: raise exc.OioException( "Error while downloading position %d: %s" % (pos, err)) for part in it: for dat in part['iter']: yield dat
def _quorum_or_fail(self, successes, failures): quorum = self._check_quorum(successes) if not quorum: errors = utils.group_chunk_errors( ((chunk["url"], chunk.get("error", "success")) for chunk in successes + failures)) raise exc.OioException( "RAWX write failure, quorum not reached: %s" % errors)
def object_truncate(self, account, container, obj, version=None, size=None, **kwargs): """ Truncate object at specified size. Only shrink is supported. A download may occur if size is not on chunk boundaries. :param account: name of the account in which the object is stored :param container: name of the container in which the object is stored :param obj: name of the object to query :param version: version of the object to query :param size: new size of object """ # code copied from object_fetch (should be factorized !) meta, raw_chunks = self.object_locate(account, container, obj, version=version, **kwargs) chunk_method = meta['chunk_method'] storage_method = STORAGE_METHODS.load(chunk_method) chunks = _sort_chunks(raw_chunks, storage_method.ec) for pos in sorted(chunks.keys()): chunk = chunks[pos][0] if (size >= chunk['offset'] and size <= chunk['offset'] + chunk['size']): break else: raise exc.OioException("No chunk found at position %d" % size) if chunk['offset'] != size: # retrieve partial chunk ret = self.object_fetch(account, container, obj, version=version, ranges=[(chunk['offset'], size - 1)]) # TODO implement a proper object_update pos = int(chunk['pos'].split('.')[0]) self.object_create(account, container, obj_name=obj, data=ret[1], meta_pos=pos, content_id=meta['id']) return self.container.content_truncate(account, container, obj, version=version, size=size, **kwargs)
def __init__(self, conf): super(Meta2Indexer, self).__init__(conf=conf) self.logger = get_logger(conf) if not conf.get("volume_list"): raise exc.OioException("No meta2 volumes provided to index !") self.volumes = [x.strip() for x in conf.get('volume_list').split(',')] self.pool = ContextPool(len(self.volumes)) self.volume_workers = [Meta2IndexingWorker(x, conf) for x in self.volumes]
def _put_meta_backblaze(storage_method, application_key): if not (application_key and storage_method.bucket_name != '0' and storage_method.account_id != '0'): raise exc.OioException("The client is missing backblaze parameters") meta = {} meta['backblaze.account_id'] = storage_method.account_id meta['backblaze.application_key'] = application_key meta['bucket_name'] = storage_method.bucket_name backblaze = Backblaze(storage_method.account_id, application_key) meta['authorization'] = backblaze.authorization_token meta['uploadToken'] = backblaze._get_upload_token_by_bucket_name( storage_method.bucket_name) return meta
def quorum_or_fail(self, successes, failures): """ Compare the number of uploads against the quorum. :param successes: a list of chunk objects whose upload succeded :type successes: `list` or `tuple` :param failures: a list of chunk objects whose upload failed :type failures: `list` or `tuple` :raises `exc.OioException`: if quorum has not been reached """ if len(successes) < self.quorum: errors = group_chunk_errors( ((chunk["url"], chunk.get("error", "success")) for chunk in successes + failures)) raise exc.OioException( "RAWX write failure, quorum not reached (%d/%d): %s" % (len(successes), self.quorum, errors))
def _fetch_stream(self, meta, chunks, ranges, storage_method, headers): total_bytes = 0 headers = headers or {} ranges = ranges or [(None, None)] meta_ranges = get_meta_ranges(ranges, chunks) for pos, meta_range in meta_ranges.iteritems(): meta_start, meta_end = meta_range reader = io.ChunkReader(iter(chunks[pos]), io.READ_CHUNK_SIZE, headers) it = reader.get_iter() if not it: raise exc.OioException("Error while downloading") for part in it: for d in part['iter']: total_bytes += len(d) yield d
def stream(self, source, size): bytes_transferred = 0 def _connect_put(chunk): raw_url = chunk["url"] parsed = urlparse(raw_url) try: chunk_path = parsed.path.split('/')[-1] h = {} h["transfer-encoding"] = "chunked" h[chunk_headers["content_id"]] = self.sysmeta['id'] h[chunk_headers["content_version"]] = self.sysmeta['version'] h[chunk_headers["content_path"]] = \ utils.quote(self.sysmeta['content_path']) h[chunk_headers["content_chunkmethod"]] = \ self.sysmeta['chunk_method'] h[chunk_headers["content_policy"]] = self.sysmeta['policy'] h[chunk_headers["container_id"]] = self.sysmeta['container_id'] h[chunk_headers["chunk_pos"]] = chunk["pos"] h[chunk_headers["chunk_id"]] = chunk_path with ConnectionTimeout(io.CONNECTION_TIMEOUT): conn = io.http_connect(parsed.netloc, 'PUT', parsed.path, h) conn.chunk = chunk return conn, chunk except (Exception, Timeout) as e: msg = str(e) logger.error("Failed to connect to %s (%s)", chunk, msg) chunk['error'] = msg return None, chunk meta_chunk = self.meta_chunk pile = GreenPile(len(meta_chunk)) failed_chunks = [] current_conns = [] for chunk in meta_chunk: pile.spawn(_connect_put, chunk) results = [d for d in pile] for conn, chunk in results: if not conn: failed_chunks.append(chunk) else: current_conns.append(conn) quorum = False quorum = self._check_quorum(current_conns) if not quorum: raise exc.OioException("RAWX write failure") bytes_transferred = 0 try: with utils.ContextPool(len(meta_chunk)) as pool: for conn in current_conns: conn.failed = False conn.queue = Queue(io.PUT_QUEUE_DEPTH) pool.spawn(self._send_data, conn) while True: remaining_bytes = size - bytes_transferred if io.WRITE_CHUNK_SIZE < remaining_bytes: read_size = io.WRITE_CHUNK_SIZE else: read_size = remaining_bytes with SourceReadTimeout(io.CLIENT_TIMEOUT): try: data = source.read(read_size) except (ValueError, IOError) as e: raise SourceReadError(str(e)) if len(data) == 0: for conn in current_conns: conn.queue.put('0\r\n\r\n') break self.checksum.update(data) bytes_transferred += len(data) for conn in current_conns: if not conn.failed: conn.queue.put('%x\r\n%s\r\n' % (len(data), data)) else: current_conns.remove(conn) quorum = self._check_quorum(current_conns) if not quorum: raise exc.OioException("RAWX write failure") for conn in current_conns: if conn.queue.unfinished_tasks: conn.queue.join() except SourceReadTimeout: logger.warn('Source read timeout') raise except SourceReadError: logger.warn('Source read error') raise except Timeout: logger.exception('Timeout writing data') raise except Exception: logger.exception('Exception writing data') raise success_chunks = [] for conn in current_conns: if conn.failed: failed_chunks.append(conn.chunk) continue pile.spawn(self._get_response, conn) def _handle_resp(conn, resp): if resp: if resp.status == 201: success_chunks.append(conn.chunk) else: conn.failed = True conn.chunk['error'] = 'HTTP %s' % resp.status failed_chunks.append(conn.chunk) logger.error("Wrong status code from %s (%s)", conn.chunk, resp.status) conn.close() for (conn, resp) in pile: if resp: _handle_resp(conn, resp) quorum = self._check_quorum(success_chunks) if not quorum: raise exc.OioException("RAWX write failure") meta_checksum = self.checksum.hexdigest() for chunk in success_chunks: chunk["size"] = bytes_transferred chunk["hash"] = meta_checksum return bytes_transferred, meta_checksum, success_chunks + failed_chunks
def _direct_request(self, method, url, headers=None, data=None, json=None, params=None, admin_mode=False, pool_manager=None, **kwargs): """ Make an HTTP request. :param method: HTTP method to use (e.g. "GET") :type method: `str` :param url: URL to request :type url: `str` :keyword admin_mode: allow operations on slave or worm namespaces :type admin_mode: `bool` :keyword timeout: optional timeout for the request (in seconds). May be a `urllib3.Timeout(connect=connection_timeout, read=read_timeout)`. This method also accepts `connection_timeout` and `read_timeout` as separate arguments. :type timeout: `float` or `urllib3.Timeout` :keyword headers: optional headers to add to the request :type headers: `dict` :raise oio.common.exceptions.OioTimeout: in case of read, write or connection timeout :raise oio.common.exceptions.OioNetworkException: in case of connection error :raise oio.common.exceptions.OioException: in other case of HTTP error :raise oio.common.exceptions.ClientException: in case of HTTP status code >= 400 """ # Filter arguments that are not recognized by Requests out_kwargs = { k: v for k, v in kwargs.items() if k in URLLIB3_REQUESTS_KWARGS } # Ensure headers are all strings if headers: out_headers = {k: str(v) for k, v in headers.items()} else: out_headers = dict() if self.admin_mode or admin_mode: out_headers[ADMIN_HEADER] = '1' # Ensure there is a timeout if 'timeout' not in out_kwargs: out_kwargs['timeout'] = urllib3.Timeout( connect=kwargs.get('connection_timeout', CONNECTION_TIMEOUT), read=kwargs.get('read_timeout', READ_TIMEOUT)) # Convert json and add Content-Type if json: out_headers["Content-Type"] = "application/json" data = jsonlib.dumps(json) out_kwargs['headers'] = out_headers out_kwargs['body'] = data # Add query string if params: out_param = [] for k, v in params.items(): if v is not None: if isinstance(v, unicode): v = unicode(v).encode('utf-8') out_param.append((k, v)) encoded_args = urlencode(out_param) url += '?' + encoded_args if not pool_manager: pool_manager = self.pool_manager try: resp = pool_manager.request(method, url, **out_kwargs) body = resp.data if body: try: body = jsonlib.loads(body) except ValueError: pass except MaxRetryError as exc: if isinstance(exc.reason, NewConnectionError): raise exceptions.OioNetworkException(exc), None, \ sys.exc_info()[2] if isinstance(exc.reason, TimeoutError): raise exceptions.OioTimeout(exc), None, sys.exc_info()[2] raise exceptions.OioNetworkException(exc), None, sys.exc_info()[2] except (ProtocolError, ProxyError, ClosedPoolError) as exc: raise exceptions.OioNetworkException(exc), None, sys.exc_info()[2] except TimeoutError as exc: raise exceptions.OioTimeout(exc), None, sys.exc_info()[2] except HTTPError as exc: raise exceptions.OioException(exc), None, sys.exc_info()[2] if resp.status >= 400: raise exceptions.from_response(resp, body) return resp, body
def _b2_credentials(self, storage_method, key_file): try: return BackblazeUtils.get_credentials(storage_method, key_file) except BackblazeUtilsException as err: raise exc.OioException(str(err))
def _direct_request(self, method, url, headers=None, data=None, json=None, params=None, admin_mode=False, pool_manager=None, force_master=False, **kwargs): """ Make an HTTP request. :param method: HTTP method to use (e.g. "GET") :type method: `str` :param url: URL to request :type url: `str` :keyword admin_mode: allow operations on slave or worm namespaces :type admin_mode: `bool` :keyword deadline: deadline for the request, in monotonic time. Supersedes `read_timeout`. :type deadline: `float` seconds :keyword timeout: optional timeout for the request (in seconds). May be a `urllib3.Timeout(connect=connection_timeout, read=read_timeout)`. This method also accepts `connection_timeout` and `read_timeout` as separate arguments. :type timeout: `float` or `urllib3.Timeout` :keyword headers: optional headers to add to the request :type headers: `dict` :keyword force_master: request will run on master service only. :type force_master: `bool` :raise oio.common.exceptions.OioTimeout: in case of read, write or connection timeout :raise oio.common.exceptions.OioNetworkException: in case of connection error :raise oio.common.exceptions.OioException: in other case of HTTP error :raise oio.common.exceptions.ClientException: in case of HTTP status code >= 400 """ # Filter arguments that are not recognized by Requests out_kwargs = {k: v for k, v in iteritems(kwargs) if k in URLLIB3_REQUESTS_KWARGS} # Ensure headers are all strings if headers: out_headers = {k: text_type(v) for k, v in headers.items()} else: out_headers = dict() if self.admin_mode or admin_mode: out_headers[ADMIN_HEADER] = '1' if self.force_master or force_master: out_headers[FORCEMASTER_HEADER] = '1' # Look for a request deadline, deduce the timeout from it. if kwargs.get('deadline', None) is not None: to = deadline_to_timeout(kwargs['deadline'], True) to = min(to, kwargs.get('read_timeout', to)) out_kwargs['timeout'] = urllib3.Timeout( connect=kwargs.get('connection_timeout', CONNECTION_TIMEOUT), read=to) # Shorten the deadline by 1% to compensate for the time spent # connecting and reading response. out_headers[TIMEOUT_HEADER] = int(to * 990000.0) # Ensure there is a timeout if 'timeout' not in out_kwargs: out_kwargs['timeout'] = urllib3.Timeout( connect=kwargs.get('connection_timeout', CONNECTION_TIMEOUT), read=kwargs.get('read_timeout', READ_TIMEOUT)) if TIMEOUT_HEADER not in out_headers: to = out_kwargs['timeout'] if isinstance(to, urllib3.Timeout): to = to.read_timeout else: to = float(to) out_headers[TIMEOUT_HEADER] = int(to * 1000000.0) # Look for a request ID if 'reqid' in kwargs: out_headers[REQID_HEADER] = str(kwargs['reqid']) if len(out_headers.get(REQID_HEADER, '')) > STRLEN_REQID: out_headers[REQID_HEADER] = \ out_headers[REQID_HEADER][:STRLEN_REQID] self.__logger().warn('Request ID truncated to %d characters', STRLEN_REQID) # Convert json and add Content-Type if json: out_headers["Content-Type"] = HTTP_CONTENT_TYPE_JSON data = jsonlib.dumps(json) # Trigger performance measurments perfdata = kwargs.get('perfdata', None) if perfdata is not None: out_headers[PERFDATA_HEADER] = 'enabled' # Explicitly keep or close the connection if 'Connection' not in out_headers: out_headers['Connection'] = self.connection out_kwargs['headers'] = out_headers out_kwargs['body'] = data # Add query string if params: out_param = [] for k, v in params.items(): if v is not None: if isinstance(v, text_type): v = text_type(v).encode('utf-8') out_param.append((k, v)) encoded_args = urlencode(out_param) url += '?' + encoded_args if not pool_manager: pool_manager = self.pool_manager try: if perfdata is not None: request_start = monotonic_time() resp = pool_manager.request(method, url, **out_kwargs) if perfdata is not None: request_end = monotonic_time() service_perfdata = perfdata.setdefault( self.service_type, dict()) service_perfdata['overall'] = service_perfdata.get( 'overall', 0.0) + request_end - request_start body = resp.data if body and resp.headers.get('Content-Type') \ == HTTP_CONTENT_TYPE_JSON: try: body = jsonlib.loads(body.decode('utf-8')) except (UnicodeDecodeError, ValueError): self.__logger().warn( "Response body isn't decodable JSON: %s", body) raise exceptions.OioException( "Response body isn't decodable JSON") if perfdata is not None and PERFDATA_HEADER in resp.headers: service_perfdata = perfdata[self.service_type] for header_val in resp.headers[PERFDATA_HEADER].split(','): kv = header_val.split('=', 1) service_perfdata[kv[0]] = service_perfdata.get( kv[0], 0.0) + float(kv[1]) / 1000000.0 except urllib3.exceptions.HTTPError as exc: oio_exception_from_httperror(exc, reqid=out_headers.get(REQID_HEADER), url=url) if resp.status >= 400: raise exceptions.from_response(resp, body) return resp, body