def get_stream(self): range_infos = self._get_range_infos() chunk_iter = iter(self.chunks) # we use eventlet GreenPool to manage readers with green.ContextPool(self.storage_method.ec_nb_data) as pool: pile = GreenPile(pool) # we use eventlet GreenPile to spawn readers for _j in range(self.storage_method.ec_nb_data): pile.spawn(self._get_fragment, chunk_iter, range_infos, self.storage_method) readers = [] for reader, parts_iter in pile: if reader.status in (200, 206): readers.append((reader, parts_iter)) # TODO log failures? # with EC we need at least ec_nb_data valid readers if len(readers) >= self.storage_method.ec_nb_data: # all readers should return the same Content-Length # so just take the headers from one of them resp_headers = HeadersDict(readers[0][0].headers) fragment_length = int(resp_headers.get('Content-Length')) read_iterators = [it for _, it in readers] stream = ECStream(self.storage_method, read_iterators, range_infos, self.meta_length, fragment_length) # start the stream stream.start() return stream else: raise exceptions.ServiceUnavailable( 'Not enough valid sources to read (%d/%d)' % ( len(readers), self.storage_method.ec_nb_data))
def take_action(self, parsed_args): self.log.debug('take_action(%s)', parsed_args) digits = self.app.client_manager.meta1_digits concurrency = parsed_args.concurrency conf = {'namespace': self.app.client_manager.namespace} if parsed_args.proxy: conf.update({'proxyd_url': parsed_args.proxy}) else: ns_conf = self.app.client_manager.sds_conf proxy = ns_conf.get('proxy') conf.update({'proxyd_url': proxy}) workers = list() with green.ContextPool(concurrency) as pool: pile = GreenPile(pool) prefix_queue = Queue(16) # Prepare some workers for _ in range(concurrency): worker = WarmupWorker(self.app.client_manager.client_conf, self.log) workers.append(worker) pile.spawn(worker.run, prefix_queue) # Feed the queue trace_increment = 0.01 trace_next = trace_increment sent, total = 0, float(count_prefixes(digits)) for prefix in generate_prefixes(digits): sent += 1 prefix_queue.put(prefix) # Display the progression ratio = float(sent) / total if ratio >= trace_next: self.log.info("... %d%%", int(ratio * 100.0)) trace_next += trace_increment self.log.debug("Send the termination marker") prefix_queue.join() self.log.info("All the workers are done")
def _stream(self, source, size, writers): bytes_transferred = 0 # create EC encoding generator ec_stream = ec_encode(self.storage_method, len(self.meta_chunk)) # init generator ec_stream.send(None) def send(data): self.checksum.update(data) self.global_checksum.update(data) # get the encoded fragments fragments = ec_stream.send(data) if fragments is None: # not enough data given return current_writers = list(writers) failed_chunks = list() for writer in current_writers: fragment = fragments[chunk_index[writer]] if not writer.failed: if writer.checksum: writer.checksum.update(fragment) writer.send(fragment) else: current_writers.remove(writer) failed_chunks.append(writer.chunk) self.quorum_or_fail([w.chunk for w in current_writers], failed_chunks) try: # we use eventlet GreenPool to manage writers with green.ContextPool(len(writers)) as pool: # convenient index to figure out which writer # handles the resulting fragments chunk_index = self._build_index(writers) # init writers in pool for writer in writers: writer.start(pool) def read(read_size): with green.SourceReadTimeout(self.read_timeout): try: data = source.read(read_size) except (ValueError, IOError) as exc: raise SourceReadError(str(exc)) return data # the main write loop if size: while True: remaining_bytes = size - bytes_transferred if io.WRITE_CHUNK_SIZE < remaining_bytes: read_size = io.WRITE_CHUNK_SIZE else: read_size = remaining_bytes data = read(read_size) bytes_transferred += len(data) if len(data) == 0: break send(data) else: while True: data = read(io.WRITE_CHUNK_SIZE) bytes_transferred += len(data) if len(data) == 0: break send(data) # flush out buffered data send('') # wait for all data to be processed for writer in writers: writer.wait() # trailer headers # metachunk size # metachunk hash metachunk_size = bytes_transferred metachunk_hash = self.checksum.hexdigest() for writer in writers: writer.finish(metachunk_size, metachunk_hash) return bytes_transferred except green.SourceReadTimeout as exc: logger.warn('%s', exc) raise exceptions.SourceReadTimeout(exc) except SourceReadError as exc: logger.warn('Source read error: %s', exc) raise except Timeout as to: logger.error('Timeout writing data: %s', to) raise exceptions.OioTimeout(to) except Exception: logger.exception('Exception writing data') raise
def _decode_segments(self, fragment_iterators): """ Reads from fragments and yield full segments """ # we use eventlet Queue to read fragments queues = [] # each iterators has its queue for _j in range(len(fragment_iterators)): queues.append(Queue(1)) def put_in_queue(fragment_iterator, queue): """ Coroutine to read the fragments from the iterator """ try: for fragment in fragment_iterator: # put the read fragment in the queue queue.put(fragment) # the queues are of size 1 so this coroutine blocks # until we decode a full segment except GreenletExit: # ignore pass except green.ChunkReadTimeout as err: logger.error('%s', err) except Exception: logger.exception("Exception on reading") finally: queue.resize(2) # put None to indicate the decoding loop # this is over queue.put(None) # close the iterator fragment_iterator.close() # we use eventlet GreenPool to manage the read of fragments with green.ContextPool(len(fragment_iterators)) as pool: # spawn coroutines to read the fragments for fragment_iterator, queue in zip(fragment_iterators, queues): pool.spawn(put_in_queue, fragment_iterator, queue) # main decoding loop while True: data = [] # get the fragments from the queues for queue in queues: fragment = queue.get() queue.task_done() data.append(fragment) if not all(data): # one of the readers returned None # impossible to read segment break # actually decode the fragments into a segment try: segment = self.storage_method.driver.decode(data) except exceptions.ECError: # something terrible happened logger.exception("ERROR decoding fragments") raise yield segment
def stream(self, source, size): bytes_transferred = 0 meta_chunk = self.meta_chunk if self.chunk_checksum_algo: meta_checksum = hashlib.new(self.chunk_checksum_algo) else: meta_checksum = None pile = GreenPile(len(meta_chunk)) failed_chunks = [] current_conns = [] for chunk in meta_chunk: pile.spawn(self._connect_put, chunk) for conn, chunk in pile: if not conn: failed_chunks.append(chunk) else: current_conns.append(conn) self.quorum_or_fail([co.chunk for co in current_conns], failed_chunks) bytes_transferred = 0 try: with green.ContextPool(len(meta_chunk)) as pool: for conn in current_conns: conn.failed = False conn.queue = LightQueue(io.PUT_QUEUE_DEPTH) pool.spawn(self._send_data, conn) while True: buffer_size = self.buffer_size() if size is not None: remaining_bytes = size - bytes_transferred if buffer_size < remaining_bytes: read_size = buffer_size else: read_size = remaining_bytes else: read_size = buffer_size with green.SourceReadTimeout(self.read_timeout): try: data = source.read(read_size) except (ValueError, IOError) as err: raise SourceReadError(str(err)) if len(data) == 0: for conn in current_conns: if not conn.failed: conn.queue.put('') break self.checksum.update(data) if meta_checksum: meta_checksum.update(data) bytes_transferred += len(data) # copy current_conns to be able to remove a failed conn for conn in current_conns[:]: if not conn.failed: conn.queue.put(data) else: current_conns.remove(conn) failed_chunks.append(conn.chunk) self.quorum_or_fail([co.chunk for co in current_conns], failed_chunks) for conn in current_conns: while conn.queue.qsize(): sleep(0) except green.SourceReadTimeout as err: logger.warn('Source read timeout (reqid=%s): %s', self.reqid, err) raise SourceReadTimeout(err) except SourceReadError as err: logger.warn('Source read error (reqid=%s): %s', self.reqid, err) raise except Timeout as to: logger.error('Timeout writing data (reqid=%s): %s', self.reqid, to) raise OioTimeout(to) except Exception: logger.exception('Exception writing data (reqid=%s)', self.reqid) raise success_chunks = [] for conn in current_conns: if conn.failed: failed_chunks.append(conn.chunk) continue pile.spawn(self._get_response, conn) for (conn, resp) in pile: if resp: self._handle_resp( conn, resp, meta_checksum.hexdigest() if meta_checksum else None, success_chunks, failed_chunks) self.quorum_or_fail(success_chunks, failed_chunks) for chunk in success_chunks: chunk["size"] = bytes_transferred return bytes_transferred, success_chunks[0]['hash'], success_chunks
def stream(self, source, size=None): bytes_transferred = 0 meta_chunk = self.meta_chunk meta_checksum = hashlib.md5() pile = GreenPile(len(meta_chunk)) failed_chunks = [] current_conns = [] for chunk in meta_chunk: pile.spawn(self._connect_put, chunk) for conn, chunk in [d for d in pile]: if not conn: failed_chunks.append(chunk) else: current_conns.append(conn) self.quorum_or_fail([co.chunk for co in current_conns], failed_chunks) bytes_transferred = 0 try: with green.ContextPool(len(meta_chunk)) as pool: for conn in current_conns: conn.failed = False conn.queue = Queue(io.PUT_QUEUE_DEPTH) pool.spawn(self._send_data, conn) while True: if size is not None: remaining_bytes = size - bytes_transferred if io.WRITE_CHUNK_SIZE < remaining_bytes: read_size = io.WRITE_CHUNK_SIZE else: read_size = remaining_bytes else: read_size = io.WRITE_CHUNK_SIZE with green.SourceReadTimeout(self.read_timeout): try: data = source.read(read_size) except (ValueError, IOError) as e: raise SourceReadError(str(e)) if len(data) == 0: for conn in current_conns: if not conn.failed: conn.queue.put('0\r\n\r\n') break self.checksum.update(data) meta_checksum.update(data) bytes_transferred += len(data) # copy current_conns to be able to remove a failed conn for conn in current_conns[:]: if not conn.failed: conn.queue.put('%x\r\n%s\r\n' % (len(data), data)) else: current_conns.remove(conn) failed_chunks.append(conn.chunk) self.quorum_or_fail([co.chunk for co in current_conns], failed_chunks) for conn in current_conns: if conn.queue.unfinished_tasks: conn.queue.join() except green.SourceReadTimeout: logger.warn('Source read timeout') raise except SourceReadError: logger.warn('Source read error') raise except Timeout as to: logger.exception('Timeout writing data') raise exc.OioTimeout(to) except Exception: logger.exception('Exception writing data') raise success_chunks = [] for conn in current_conns: if conn.failed: failed_chunks.append(conn.chunk) continue pile.spawn(self._get_response, conn) meta_checksum_hex = meta_checksum.hexdigest() for (conn, resp) in pile: if resp: self._handle_resp(conn, resp, meta_checksum_hex, success_chunks, failed_chunks) self.quorum_or_fail(success_chunks, failed_chunks) for chunk in success_chunks: chunk["size"] = bytes_transferred chunk["hash"] = meta_checksum_hex return bytes_transferred, meta_checksum_hex, success_chunks