def chunk_delete_many(self, chunks, cid=None, concurrency=PARALLEL_CHUNKS_DELETE, **kwargs): """ :rtype: `list` of either `urllib3.response.HTTPResponse` or `urllib3.exceptions.HTTPError`, with an extra "chunk" attribute. """ headers = kwargs['headers'].copy() if cid is not None: # This is only to get a nice access log headers['X-oio-chunk-meta-container-id'] = cid timeout = kwargs.get('timeout') if not timeout: timeout = urllib3.Timeout(CHUNK_TIMEOUT) def __delete_chunk(chunk_): try: resp = self.http_pool.request("DELETE", self.resolve_url(chunk_['url']), headers=headers, timeout=timeout) resp.chunk = chunk_ return resp except urllib3.exceptions.HTTPError as ex: ex.chunk = chunk_ return ex pile = GreenPile(concurrency) for chunk in chunks: pile.spawn(__delete_chunk, chunk) resps = [resp for resp in pile if resp] return resps
def get_stream(self): range_infos = self._get_range_infos() chunk_iter = iter(self.chunks) # we use eventlet GreenPool to manage readers with green.ContextPool(self.storage_method.ec_nb_data) as pool: pile = GreenPile(pool) # we use eventlet GreenPile to spawn readers for _j in range(self.storage_method.ec_nb_data): pile.spawn(self._get_fragment, chunk_iter, range_infos, self.storage_method) readers = [] for reader, parts_iter in pile: if reader.status in (200, 206): readers.append((reader, parts_iter)) # TODO log failures? # with EC we need at least ec_nb_data valid readers if len(readers) >= self.storage_method.ec_nb_data: # all readers should return the same Content-Length # so just take the headers from one of them resp_headers = HeadersDict(readers[0][0].headers) fragment_length = int(resp_headers.get('Content-Length')) read_iterators = [it for _, it in readers] stream = ECStream(self.storage_method, read_iterators, range_infos, self.meta_length, fragment_length) # start the stream stream.start() return stream else: raise exceptions.ServiceUnavailable( 'Not enough valid sources to read (%d/%d)' % ( len(readers), self.storage_method.ec_nb_data))
def chunk_delete_many(self, chunks, cid=None, concurrency=PARALLEL_CHUNKS_DELETE, **kwargs): """ :rtype: `list` of either `urllib3.response.HTTPResponse` or `urllib3.exceptions.HTTPError`, with an extra "chunk" attribute. """ headers = kwargs.pop('headers', None) # Actually this is not needed since ensure_request_id always sets it if headers is None: headers = dict() else: headers = headers.copy() if cid is not None: # This is only to get a nice access log headers['X-oio-chunk-meta-container-id'] = cid def __delete_chunk(chunk_): try: resp = self._request( "DELETE", chunk_['url'], headers=headers, **kwargs) resp.chunk = chunk_ return resp except urllib3.exceptions.HTTPError as ex: ex.chunk = chunk_ return ex pile = GreenPile(concurrency) for chunk in chunks: pile.spawn(__delete_chunk, chunk) resps = [resp for resp in pile if resp] return resps
def rebuild(self): pile = GreenPile(len(self.meta_chunk)) nb_data = self.storage_method.ec_nb_data headers = {} for chunk in self.meta_chunk: pile.spawn(self._get_response, chunk, headers) # Sort all responses according to the chunk size total_resps = 0 resps_by_size = dict() resps_without_chunk_size = list() for resp in pile: if not resp: continue chunk_size = int_value( resp.getheader(CHUNK_HEADERS['chunk_size'], None), None) if chunk_size is None: self.logger.warning('Missing chunk size') resps_without_chunk_size.append(resp) continue total_resps += 1 resps_by_size.setdefault(chunk_size, list()).append(resp) # Select the chunk with the majority chunk size resps = None max_resps = 0 assumed_chunk_size = None for chunk_size, resps in resps_by_size.items(): nb_resp = len(resps) if nb_resp > max_resps: max_resps = nb_resp assumed_chunk_size = chunk_size if assumed_chunk_size is None: self.logger.warning( 'No chunk available with chunk size information') resps = list() else: resps = resps_by_size[assumed_chunk_size] if max_resps != total_resps: self.logger.warning( '%d/%d chunks are not the same size as others (%d), ' 'they should be removed', total_resps - max_resps, total_resps, assumed_chunk_size) # Check the number of chunks available if max_resps < nb_data: # Add the chunks without size information # assuming they are the correct size resps = resps + resps_without_chunk_size if len(resps) < nb_data: self.logger.error( 'Unable to read enough valid sources to rebuild') raise exceptions.UnrecoverableContent( 'Not enough valid sources to rebuild') self.logger.warning( 'Use chunk(s) without size information to rebuild a chunk') rebuild_iter = self._make_rebuild_iter(resps[:nb_data]) return assumed_chunk_size, rebuild_iter
def _get_writers(self): """ Initialize writers for all chunks of the metachunk and connect them """ pile = GreenPile(len(self.meta_chunk)) # we use eventlet GreenPile to spawn the writers for _pos, chunk in enumerate(self.meta_chunk): pile.spawn(self._get_writer, chunk) writers = [w for w in pile] return writers
def frag_iter(): pile = GreenPile(len(resps)) while True: for resp in resps: pile.spawn(_get_frag, resp) try: with Timeout(self.read_timeout): frag = [frag for frag in pile] except Timeout as to: logger.error('ERROR while rebuilding: %s', to) except Exception: logger.exception('ERROR while rebuilding') break if not all(frag): break rebuilt_frag = self._reconstruct(frag) yield rebuilt_frag
def take_action(self, parsed_args): self.log.debug('take_action(%s)', parsed_args) digits = self.app.client_manager.meta1_digits concurrency = parsed_args.concurrency conf = {'namespace': self.app.client_manager.namespace} if parsed_args.proxy: conf.update({'proxyd_url': parsed_args.proxy}) else: ns_conf = self.app.client_manager.sds_conf proxy = ns_conf.get('proxy') conf.update({'proxyd_url': proxy}) workers = list() with green.ContextPool(concurrency) as pool: pile = GreenPile(pool) prefix_queue = Queue(16) # Prepare some workers for _ in range(concurrency): worker = WarmupWorker(self.app.client_manager.client_conf, self.log) workers.append(worker) pile.spawn(worker.run, prefix_queue) # Feed the queue trace_increment = 0.01 trace_next = trace_increment sent, total = 0, float(count_prefixes(digits)) for prefix in generate_prefixes(digits): sent += 1 prefix_queue.put(prefix) # Display the progression ratio = float(sent) / total if ratio >= trace_next: self.log.info("... %d%%", int(ratio * 100.0)) trace_next += trace_increment self.log.debug("Send the termination marker") prefix_queue.join() self.log.info("All the workers are done")
def _get_results(self, writers): # get the results from writers success_chunks = [] failed_chunks = [] # we use eventlet GreenPile to read the responses from the writers pile = GreenPile(len(writers)) for writer in writers: if writer.failed: failed_chunks.append(writer.chunk) continue pile.spawn(self._get_response, writer) for (writer, resp) in pile: self._dispatch_response(writer, resp, success_chunks, failed_chunks) self.quorum_or_fail(success_chunks, failed_chunks) return success_chunks + failed_chunks
def rebuild(self): pile = GreenPile(len(self.meta_chunk)) nb_data = self.storage_method.ec_nb_data headers = {} for chunk in self.meta_chunk: pile.spawn(self._get_response, chunk, headers) resps = [] for resp in pile: if not resp: continue resps.append(resp) if len(resps) >= self.storage_method.ec_nb_data: break else: logger.error('Unable to read enough valid sources to rebuild') raise exceptions.UnrecoverableContent( 'Not enough valid sources to rebuild') rebuild_iter = self._make_rebuild_iter(resps[:nb_data]) return rebuild_iter
def _get_results(self, writers): """ Check the results of the writers. Failures are appended to the self.failed_chunks list. :returns: a list of chunks that have been uploaded. """ success_chunks = [] # we use eventlet GreenPile to read the responses from the writers pile = GreenPile(len(writers)) for writer in writers: if writer.failed: # Already in failures list continue pile.spawn(self._get_response, writer) for (writer, resp) in pile: self._dispatch_response(writer, resp, success_chunks) self.quorum_or_fail(success_chunks, self.failed_chunks) return success_chunks
def stream(self, source, size): bytes_transferred = 0 meta_chunk = self.meta_chunk if self.chunk_checksum_algo: meta_checksum = hashlib.new(self.chunk_checksum_algo) else: meta_checksum = None pile = GreenPile(len(meta_chunk)) failed_chunks = [] current_conns = [] for chunk in meta_chunk: pile.spawn(self._connect_put, chunk) for conn, chunk in pile: if not conn: failed_chunks.append(chunk) else: current_conns.append(conn) self.quorum_or_fail([co.chunk for co in current_conns], failed_chunks) bytes_transferred = 0 try: with green.ContextPool(len(meta_chunk)) as pool: for conn in current_conns: conn.failed = False conn.queue = LightQueue(io.PUT_QUEUE_DEPTH) pool.spawn(self._send_data, conn) while True: buffer_size = self.buffer_size() if size is not None: remaining_bytes = size - bytes_transferred if buffer_size < remaining_bytes: read_size = buffer_size else: read_size = remaining_bytes else: read_size = buffer_size with green.SourceReadTimeout(self.read_timeout): try: data = source.read(read_size) except (ValueError, IOError) as err: raise SourceReadError(str(err)) if len(data) == 0: for conn in current_conns: if not conn.failed: conn.queue.put('') break self.checksum.update(data) if meta_checksum: meta_checksum.update(data) bytes_transferred += len(data) # copy current_conns to be able to remove a failed conn for conn in current_conns[:]: if not conn.failed: conn.queue.put(data) else: current_conns.remove(conn) failed_chunks.append(conn.chunk) self.quorum_or_fail([co.chunk for co in current_conns], failed_chunks) for conn in current_conns: while conn.queue.qsize(): sleep(0) except green.SourceReadTimeout as err: logger.warn('Source read timeout (reqid=%s): %s', self.reqid, err) raise SourceReadTimeout(err) except SourceReadError as err: logger.warn('Source read error (reqid=%s): %s', self.reqid, err) raise except Timeout as to: logger.error('Timeout writing data (reqid=%s): %s', self.reqid, to) raise OioTimeout(to) except Exception: logger.exception('Exception writing data (reqid=%s)', self.reqid) raise success_chunks = [] for conn in current_conns: if conn.failed: failed_chunks.append(conn.chunk) continue pile.spawn(self._get_response, conn) for (conn, resp) in pile: if resp: self._handle_resp( conn, resp, meta_checksum.hexdigest() if meta_checksum else None, success_chunks, failed_chunks) self.quorum_or_fail(success_chunks, failed_chunks) for chunk in success_chunks: chunk["size"] = bytes_transferred return bytes_transferred, success_chunks[0]['hash'], success_chunks
def _stream(self, source, size, writers): bytes_transferred = 0 # create EC encoding generator ec_stream = ec_encode(self.storage_method, len(self.meta_chunk)) # init generator ec_stream.send(None) try: # we use eventlet GreenPool to manage writers with ContextPool(len(writers) * 2) as pool: # init writers in pool for writer in writers: writer.start(pool) def read(read_size): with SourceReadTimeout(self.read_timeout): try: data = source.read(read_size) except (ValueError, IOError) as exc: raise SourceReadError(str(exc)) return data # the main write loop # Maintain a list of writers which continue writing # TODO(FVE): use an instance variable # to maintain the list of writers curr_writers = writers if size: while True: buffer_size = self.buffer_size() remaining_bytes = size - bytes_transferred if buffer_size < remaining_bytes: read_size = buffer_size else: read_size = remaining_bytes data = read(read_size) bytes_transferred += len(data) if len(data) == 0: break curr_writers = self.encode_and_send( ec_stream, data, curr_writers) else: while True: data = read(self.buffer_size()) bytes_transferred += len(data) if len(data) == 0: break curr_writers = self.encode_and_send( ec_stream, data, curr_writers) # flush out buffered data self.encode_and_send(ec_stream, '', curr_writers) # trailer headers # metachunk size # metachunk hash metachunk_size = bytes_transferred metachunk_hash = self.checksum.hexdigest() finish_pile = GreenPile(pool) for writer in writers: finish_pile.spawn(writer.finish, metachunk_size, metachunk_hash) for just_failed in finish_pile: # Avoid reporting problems twice if just_failed and not any(x['url'] == just_failed['url'] for x in self.failed_chunks): self.failed_chunks.append(just_failed) return bytes_transferred except SourceReadTimeout as exc: self.logger.warn('%s (reqid=%s)', exc, self.reqid) raise exceptions.SourceReadTimeout(exc) except SourceReadError as exc: self.logger.warn('Source read error (reqid=%s): %s', self.reqid, exc) raise except Timeout as to: self.logger.warn('Timeout writing data (reqid=%s): %s', self.reqid, to) # Not the same class as the globally imported OioTimeout class raise exceptions.OioTimeout(to) except Exception: self.logger.exception('Exception writing data (reqid=%s)', self.reqid) raise