class EcChunkWriter(object): """ Writes an EC chunk """ def __init__(self, chunk, conn, write_timeout=None, chunk_checksum_algo='md5', perfdata=None, **kwargs): self._chunk = chunk self._conn = conn self.failed = False self.bytes_transferred = 0 if chunk_checksum_algo: self.checksum = hashlib.new(chunk_checksum_algo) else: self.checksum = None self.write_timeout = write_timeout or io.CHUNK_TIMEOUT # we use eventlet Queue to pass data to the send coroutine self.queue = LightQueue(io.PUT_QUEUE_DEPTH) self.reqid = kwargs.get('reqid') self.perfdata = perfdata self.logger = kwargs.get('logger', LOGGER) @property def chunk(self): return self._chunk @property def conn(self): return self._conn @classmethod def connect(cls, chunk, sysmeta, reqid=None, connection_timeout=None, write_timeout=None, **kwargs): raw_url = chunk.get("real_url", chunk["url"]) parsed = urlparse(raw_url) chunk_path = parsed.path.split('/')[-1] hdrs = headers_from_object_metadata(sysmeta) if reqid: hdrs[REQID_HEADER] = reqid hdrs[CHUNK_HEADERS["chunk_pos"]] = chunk["pos"] hdrs[CHUNK_HEADERS["chunk_id"]] = chunk_path # in the trailer # metachunk_size & metachunk_hash trailers = (CHUNK_HEADERS["metachunk_size"], CHUNK_HEADERS["metachunk_hash"]) if kwargs.get('chunk_checksum_algo'): trailers = trailers + (CHUNK_HEADERS["chunk_hash"], ) hdrs["Trailer"] = ', '.join(trailers) with ConnectionTimeout(connection_timeout or io.CONNECTION_TIMEOUT): perfdata = kwargs.get('perfdata', None) if perfdata is not None: connect_start = monotonic_time() conn = io.http_connect(parsed.netloc, 'PUT', parsed.path, hdrs) conn.set_cork(True) if perfdata is not None: connect_end = monotonic_time() perfdata_rawx = perfdata.setdefault('rawx', dict()) perfdata_rawx[chunk['url']] = \ perfdata_rawx.get(chunk['url'], 0.0) \ + connect_end - connect_start conn.chunk = chunk return cls(chunk, conn, write_timeout=write_timeout, reqid=reqid, **kwargs) def start(self, pool): """Spawn the send coroutine""" pool.spawn(self._send) def _send(self): """Send coroutine loop""" self.conn.upload_start = None while not self.failed: # fetch input data from the queue data = self.queue.get() # use HTTP transfer encoding chunked # to write data to RAWX try: with ChunkWriteTimeout(self.write_timeout): if self.perfdata is not None \ and self.conn.upload_start is None: self.conn.upload_start = monotonic_time() self.conn.send("%x\r\n" % len(data)) self.conn.send(data) self.conn.send("\r\n") self.bytes_transferred += len(data) eventlet_yield() except (Exception, ChunkWriteTimeout) as exc: self.failed = True msg = str(exc) self.logger.warn("Failed to write to %s (%s, reqid=%s)", self.chunk, msg, self.reqid) self.chunk['error'] = 'write: %s' % msg # Drain the queue before quitting while True: try: self.queue.get_nowait() except Empty: break def wait(self): """ Wait until all data in the queue has been processed by the send coroutine """ self.logger.debug("Waiting for %s to receive data", self.chunk['url']) while self.queue.qsize() and not self.failed: eventlet_yield() def send(self, data): # do not send empty data because # this will end the chunked body if not data: return # put the data to send into the queue # it will be processed by the send coroutine self.queue.put(data) def finish(self, metachunk_size, metachunk_hash): """ Send metachunk_size and metachunk_hash as trailers. :returns: the chunk object if the upload has failed, else None """ self.wait() if self.failed: self.logger.debug( "NOT sending end marker and trailers to %s, " "because upload has failed", self.chunk['url']) return self.chunk self.logger.debug("Sending end marker and trailers to %s", self.chunk['url']) parts = [ '0\r\n', '%s: %s\r\n' % (CHUNK_HEADERS['metachunk_size'], metachunk_size), '%s: %s\r\n' % (CHUNK_HEADERS['metachunk_hash'], metachunk_hash) ] if self.checksum: parts.append( '%s: %s\r\n' % (CHUNK_HEADERS['chunk_hash'], self.checksum.hexdigest())) parts.append('\r\n') to_send = "".join(parts) try: with ChunkWriteTimeout(self.write_timeout): self.conn.send(to_send) # Last segment sent, disable TCP_CORK to flush buffers self.conn.set_cork(False) except (Exception, ChunkWriteTimeout) as exc: self.failed = True msg = str(exc) self.logger.warn("Failed to finish %s (%s, reqid=%s)", self.chunk, msg, self.reqid) self.chunk['error'] = 'finish: %s' % msg return self.chunk return None def getresponse(self): """Read the HTTP response from the connection""" try: # As the server may buffer data before writing it to non-volatile # storage, we don't know if we have to wait while sending data or # while reading response, thus we apply the same timeout to both. with ChunkWriteTimeout(self.write_timeout): resp = self.conn.getresponse() return resp finally: if self.perfdata is not None: perfdata_rawx = self.perfdata.setdefault('rawx', dict()) url_chunk = self.conn.chunk['url'] upload_end = monotonic_time() perfdata_rawx[url_chunk] = \ perfdata_rawx.get(url_chunk, 0.0) \ + upload_end - self.conn.upload_start
def _decode_segments(self, fragment_iterators): """ Reads from fragments and yield full segments """ # we use eventlet Queue to read fragments queues = [] # each iterators has its queue for _j in range(len(fragment_iterators)): queues.append(LightQueue(1)) def put_in_queue(fragment_iterator, queue): """ Coroutine to read the fragments from the iterator """ try: for fragment in fragment_iterator: # put the read fragment in the queue queue.put(fragment) # the queues are of size 1 so this coroutine blocks # until we decode a full segment except GreenletExit: # ignore pass except ChunkReadTimeout as err: self.logger.error('%s (reqid=%s)', err, self.reqid) except Exception: self.logger.exception("Exception on reading (reqid=%s)", self.reqid) finally: queue.resize(2) # put None to indicate the decoding loop # this is over queue.put(None) # close the iterator fragment_iterator.close() # we use eventlet GreenPool to manage the read of fragments with ContextPool(len(fragment_iterators)) as pool: # spawn coroutines to read the fragments for fragment_iterator, queue in zip(fragment_iterators, queues): pool.spawn(put_in_queue, fragment_iterator, queue) # main decoding loop while True: data = [] # get the fragments from the queues for queue in queues: fragment = queue.get() data.append(fragment) if not all(data): # one of the readers returned None # impossible to read segment break # actually decode the fragments into a segment if self.perfdata is not None: ec_start = monotonic_time() try: segment = self.storage_method.driver.decode(data) except exceptions.ECError: # something terrible happened self.logger.exception( "ERROR decoding fragments (reqid=%s)", self.reqid) raise finally: if self.perfdata is not None: ec_end = monotonic_time() rawx_pdata = self.perfdata.setdefault('rawx', dict()) rawx_pdata['ec'] = rawx_pdata.get('ec', 0.0) \ + ec_end - ec_start yield segment
def stream(self, source, size): bytes_transferred = 0 meta_chunk = self.meta_chunk if self.chunk_checksum_algo: meta_checksum = hashlib.new(self.chunk_checksum_algo) else: meta_checksum = None pile = GreenPile(len(meta_chunk)) failed_chunks = [] current_conns = [] for chunk in meta_chunk: pile.spawn(self._connect_put, chunk) for conn, chunk in pile: if not conn: failed_chunks.append(chunk) else: current_conns.append(conn) self.quorum_or_fail([co.chunk for co in current_conns], failed_chunks) bytes_transferred = 0 try: with green.ContextPool(len(meta_chunk)) as pool: for conn in current_conns: conn.failed = False conn.queue = LightQueue(io.PUT_QUEUE_DEPTH) pool.spawn(self._send_data, conn) while True: buffer_size = self.buffer_size() if size is not None: remaining_bytes = size - bytes_transferred if buffer_size < remaining_bytes: read_size = buffer_size else: read_size = remaining_bytes else: read_size = buffer_size with green.SourceReadTimeout(self.read_timeout): try: data = source.read(read_size) except (ValueError, IOError) as err: raise SourceReadError(str(err)) if len(data) == 0: for conn in current_conns: if not conn.failed: conn.queue.put('') break self.checksum.update(data) if meta_checksum: meta_checksum.update(data) bytes_transferred += len(data) # copy current_conns to be able to remove a failed conn for conn in current_conns[:]: if not conn.failed: conn.queue.put(data) else: current_conns.remove(conn) failed_chunks.append(conn.chunk) self.quorum_or_fail([co.chunk for co in current_conns], failed_chunks) for conn in current_conns: while conn.queue.qsize(): sleep(0) except green.SourceReadTimeout as err: self.logger.warn('Source read timeout (reqid=%s): %s', self.reqid, err) raise SourceReadTimeout(err) except SourceReadError as err: self.logger.warn('Source read error (reqid=%s): %s', self.reqid, err) raise except Timeout as to: self.logger.warn('Timeout writing data (reqid=%s): %s', self.reqid, to) raise OioTimeout(to) except Exception: self.logger.exception('Exception writing data (reqid=%s)', self.reqid) raise success_chunks = [] for conn in current_conns: if conn.failed: failed_chunks.append(conn.chunk) continue pile.spawn(self._get_response, conn) for (conn, resp) in pile: if resp: self._handle_resp( conn, resp, meta_checksum.hexdigest() if meta_checksum else None, success_chunks, failed_chunks) self.quorum_or_fail(success_chunks, failed_chunks) for chunk in success_chunks: chunk["size"] = bytes_transferred return bytes_transferred, success_chunks[0]['hash'], success_chunks