def __init__(self, conf): super(Meta2Indexer, self).__init__(conf=conf) self.logger = get_logger(conf) if not conf.get("volume_list"): raise exc.OioException("No meta2 volumes provided to index !") self.volumes = [x.strip() for x in conf.get('volume_list').split(',')] self.pool = ContextPool(len(self.volumes)) self.volume_workers = [Meta2IndexingWorker(x, conf) for x in self.volumes]
def run(self): self.tool.start_time = self.tool.last_report = time.time() self.tool.log_report('START', force=True) try: with ContextPool(len(self.workers) + 1) as pool: # spawn workers for worker in self.workers: pool.spawn(worker.run) # spawn one worker to fill the queue pool.spawn(self._fill_queue_and_wait_all_items) # with the main thread while True: task_res = self.queue_reply.get() if task_res is None: # end signal break self.tool.update_counters(task_res) yield task_res self.tool.log_report('RUN') except Exception: # pylint: disable=broad-except self.logger.exception('ERROR in local dispatcher') self.tool.success = False self.tool.log_report('DONE', force=True)
def rebuilder_pass(self, **kwargs): self.start_time = self.last_report = time.time() self.log_report('START', force=True) workers = list() with ContextPool(self.nworkers + 1) as pool: # spawn one worker for the retry queue rqueue = eventlet.Queue(self.nworkers) pool.spawn(self._read_retry_queue, rqueue, **kwargs) # spawn workers to rebuild queue = eventlet.Queue(self.nworkers * 10) for i in range(self.nworkers): worker = self._create_worker(**kwargs) workers.append(worker) pool.spawn(worker.rebuilder_pass, i, queue, retry_queue=rqueue, **kwargs) # fill the queue (with the main thread) self._fill_queue(queue, **kwargs) # block until all items are rebuilt queue.join() # block until the retry queue is empty rqueue.join() self.log_report('DONE', force=True) return self.total_errors == 0
class Meta2Indexer(Daemon): """ A daemon that spawns a greenlet running a Meta2IndexingWorker for each volume. """ def __init__(self, conf): super(Meta2Indexer, self).__init__(conf=conf) self.logger = get_logger(conf) if not conf.get("volume_list"): raise exc.OioException("No meta2 volumes provided to index !") self.volumes = [x.strip() for x in conf.get('volume_list').split(',')] self.pool = ContextPool(len(self.volumes)) self.volume_workers = [Meta2IndexingWorker(x, conf) for x in self.volumes] def run(self, *args, **kwargs): for worker in self.volume_workers: self.pool.spawn(worker.run) self.pool.waitall() def stop(self): for worker in self.volume_workers: worker.stop()
def get_stream(self): range_infos = self._get_range_infos() chunk_iter = iter(self.chunks) # we use eventlet GreenPool to manage readers with ContextPool(self.storage_method.ec_nb_data) as pool: pile = GreenPile(pool) # we use eventlet GreenPile to spawn readers for _j in range(self.storage_method.ec_nb_data): pile.spawn(self._get_fragment, chunk_iter, range_infos, self.storage_method) readers = [] for reader, parts_iter in pile: if reader.status in (200, 206): readers.append((reader, parts_iter)) # TODO log failures? # with EC we need at least ec_nb_data valid readers if len(readers) >= self.storage_method.ec_nb_data: # all readers should return the same Content-Length # so just take the headers from one of them resp_headers = HeadersDict(readers[0][0].headers) fragment_length = int(resp_headers.get('Content-Length')) read_iterators = [it for _, it in readers] stream = ECStream(self.storage_method, read_iterators, range_infos, self.meta_length, fragment_length, reqid=self.reqid, perfdata=self.perfdata, logger=self.logger) # start the stream stream.start() return stream else: raise exceptions.ServiceUnavailable( 'Not enough valid sources to read (%d/%d)' % (len(readers), self.storage_method.ec_nb_data))
def rebuilder_pass(self, **kwargs): start_time = time.time() workers = list() with ContextPool(self.nworkers) as pool: queue = eventlet.Queue(self.nworkers * 10) # spawn workers to rebuild for i in range(self.nworkers): worker = self._create_worker(**kwargs) workers.append(worker) pool.spawn(worker.rebuilder_pass, i, queue) # fill the queue self._fill_queue(queue, **kwargs) # block until all items are rebuilt queue.join() passes = 0 errors = 0 total_items_processed = 0 waiting_time = 0 rebuilder_time = 0 info = self._init_info(**kwargs) for worker in workers: passes += worker.passes errors += worker.errors total_items_processed += worker.total_items_processed waiting_time += worker.waiting_time rebuilder_time += worker.rebuilder_time info = self._compute_info(worker, info, **kwargs) end_time = time.time() elapsed = (end_time - start_time) or 0.000001 self.logger.info( self._get_report(start_time, end_time, passes, errors, waiting_time, rebuilder_time, elapsed, total_items_processed, info, **kwargs))
def rebuilder_pass(self, **kwargs): self.start_time = self.last_report = time.time() self.log_report('START', force=True) workers = list() with ContextPool(self.concurrency + 1) as pool: # spawn one worker for the retry queue rqueue = eventlet.Queue(self.concurrency) pool.spawn(self._read_retry_queue, rqueue, **kwargs) # spawn workers to rebuild queue = eventlet.Queue(self.concurrency * 10) for i in range(self.concurrency): worker = self._create_worker(**kwargs) workers.append(worker) pool.spawn(worker.rebuilder_pass, i, queue, retry_queue=rqueue, **kwargs) # fill the queue (with the main thread) try: self._fill_queue(queue, **kwargs) except Exception as exc: if self.running: self.logger.error("Failed to fill queue: %s", exc) self.success = False # block until all items are rebuilt queue.join() # block until the retry queue is empty rqueue.join() self.log_report('DONE', force=True) return self.success and self.total_errors == 0
def _stream(self, source, size, writers): bytes_transferred = 0 # create EC encoding generator ec_stream = ec_encode(self.storage_method, len(self.meta_chunk)) # init generator ec_stream.send(None) try: # we use eventlet GreenPool to manage writers with ContextPool(len(writers) * 2) as pool: # init writers in pool for writer in writers: writer.start(pool) def read(read_size): with SourceReadTimeout(self.read_timeout): try: data = source.read(read_size) except (ValueError, IOError) as exc: raise SourceReadError(str(exc)) return data # the main write loop # Maintain a list of writers which continue writing # TODO(FVE): use an instance variable # to maintain the list of writers curr_writers = writers if size: while True: buffer_size = self.buffer_size() remaining_bytes = size - bytes_transferred if buffer_size < remaining_bytes: read_size = buffer_size else: read_size = remaining_bytes data = read(read_size) bytes_transferred += len(data) if len(data) == 0: break curr_writers = self.encode_and_send( ec_stream, data, curr_writers) else: while True: data = read(self.buffer_size()) bytes_transferred += len(data) if len(data) == 0: break curr_writers = self.encode_and_send( ec_stream, data, curr_writers) # flush out buffered data self.encode_and_send(ec_stream, '', curr_writers) # trailer headers # metachunk size # metachunk hash metachunk_size = bytes_transferred metachunk_hash = self.checksum.hexdigest() finish_pile = GreenPile(pool) for writer in writers: finish_pile.spawn(writer.finish, metachunk_size, metachunk_hash) for just_failed in finish_pile: # Avoid reporting problems twice if just_failed and not any(x['url'] == just_failed['url'] for x in self.failed_chunks): self.failed_chunks.append(just_failed) return bytes_transferred except SourceReadTimeout as exc: self.logger.warn('%s (reqid=%s)', exc, self.reqid) raise exceptions.SourceReadTimeout(exc) except SourceReadError as exc: self.logger.warn('Source read error (reqid=%s): %s', self.reqid, exc) raise except Timeout as to: self.logger.warn('Timeout writing data (reqid=%s): %s', self.reqid, to) # Not the same class as the globally imported OioTimeout class raise exceptions.OioTimeout(to) except Exception: self.logger.exception('Exception writing data (reqid=%s)', self.reqid) raise
def _decode_segments(self, fragment_iterators): """ Reads from fragments and yield full segments """ # we use eventlet Queue to read fragments queues = [] # each iterators has its queue for _j in range(len(fragment_iterators)): queues.append(LightQueue(1)) def put_in_queue(fragment_iterator, queue): """ Coroutine to read the fragments from the iterator """ try: for fragment in fragment_iterator: # put the read fragment in the queue queue.put(fragment) # the queues are of size 1 so this coroutine blocks # until we decode a full segment except GreenletExit: # ignore pass except ChunkReadTimeout as err: self.logger.error('%s (reqid=%s)', err, self.reqid) except Exception: self.logger.exception("Exception on reading (reqid=%s)", self.reqid) finally: queue.resize(2) # put None to indicate the decoding loop # this is over queue.put(None) # close the iterator fragment_iterator.close() # we use eventlet GreenPool to manage the read of fragments with ContextPool(len(fragment_iterators)) as pool: # spawn coroutines to read the fragments for fragment_iterator, queue in zip(fragment_iterators, queues): pool.spawn(put_in_queue, fragment_iterator, queue) # main decoding loop while True: data = [] # get the fragments from the queues for queue in queues: fragment = queue.get() data.append(fragment) if not all(data): # one of the readers returned None # impossible to read segment break # actually decode the fragments into a segment try: segment = self.storage_method.driver.decode(data) except exceptions.ECError: # something terrible happened self.logger.exception( "ERROR decoding fragments (reqid=%s)", self.reqid) raise yield segment
def _stream(self, source, size, writers): bytes_transferred = 0 # create EC encoding generator ec_stream = ec_encode(self.storage_method, len(self.meta_chunk)) # init generator ec_stream.send(None) def send(data): self.checksum.update(data) self.global_checksum.update(data) # get the encoded fragments if self.perfdata is not None: ec_start = monotonic_time() fragments = ec_stream.send(data) if self.perfdata is not None: ec_end = monotonic_time() rawx_perfdata = self.perfdata.setdefault('rawx', dict()) rawx_perfdata['ec'] = rawx_perfdata.get('ec', 0.0) \ + ec_end - ec_start if fragments is None: # not enough data given return current_writers = list(writers) failed_chunks = list() for writer in current_writers: fragment = fragments[chunk_index[writer]] if not writer.failed: if writer.checksum: writer.checksum.update(fragment) writer.send(fragment) else: current_writers.remove(writer) failed_chunks.append(writer.chunk) sleep(0) self.quorum_or_fail([w.chunk for w in current_writers], failed_chunks) try: # we use eventlet GreenPool to manage writers with ContextPool(len(writers)) as pool: # convenient index to figure out which writer # handles the resulting fragments chunk_index = self._build_index(writers) # init writers in pool for writer in writers: writer.start(pool) def read(read_size): with SourceReadTimeout(self.read_timeout): try: data = source.read(read_size) except (ValueError, IOError) as exc: raise SourceReadError(str(exc)) return data # the main write loop if size: while True: buffer_size = self.buffer_size() remaining_bytes = size - bytes_transferred if buffer_size < remaining_bytes: read_size = buffer_size else: read_size = remaining_bytes data = read(read_size) bytes_transferred += len(data) if len(data) == 0: break send(data) else: while True: data = read(self.buffer_size()) bytes_transferred += len(data) if len(data) == 0: break send(data) # flush out buffered data send('') # wait for all data to be processed for writer in writers: writer.wait() # trailer headers # metachunk size # metachunk hash metachunk_size = bytes_transferred metachunk_hash = self.checksum.hexdigest() for writer in writers: writer.finish(metachunk_size, metachunk_hash) return bytes_transferred except SourceReadTimeout as exc: logger.warn('%s (reqid=%s)', exc, self.reqid) raise exceptions.SourceReadTimeout(exc) except SourceReadError as exc: logger.warn('Source read error (reqid=%s): %s', self.reqid, exc) raise except Timeout as to: logger.error('Timeout writing data (reqid=%s): %s', self.reqid, to) raise exceptions.OioTimeout(to) except Exception: logger.exception('Exception writing data (reqid=%s)', self.reqid) raise