def run(self): if self.stdin_fd != -1: sys.stdin.close() sys.stdin = os.fdopen(self.stdin_fd) try: with xopen(self.file, 'rb') as f: if self.file2: with xopen(self.file2, 'rb') as f2: for chunk_index, (chunk1, chunk2) in enumerate( dnaio.read_paired_chunks( f, f2, self.buffer_size)): self.send_to_worker(chunk_index, chunk1, chunk2) else: for chunk_index, chunk in enumerate( dnaio.read_chunks(f, self.buffer_size)): self.send_to_worker(chunk_index, chunk) # Send poison pills to all workers for _ in range(len(self.connections)): worker_index = self.queue.get() self.connections[worker_index].send(-1) except Exception as e: # TODO better send this to a common "something went wrong" Queue for connection in self.connections: connection.send(-2) connection.send((e, traceback.format_exc()))
def reader_process(file, file2, connections, queue, buffer_size, stdin_fd): """ Read chunks of FASTA or FASTQ data from *file* and send to a worker. queue -- a Queue of worker indices. A worker writes its own index into this queue to notify the reader that it is ready to receive more data. connections -- a list of Connection objects, one for each worker. The function repeatedly - reads a chunk from the file - reads a worker index from the Queue - sends the chunk to connections[index] and finally sends "poison pills" (the value -1) to all connections. """ if stdin_fd != -1: sys.stdin.close() sys.stdin = os.fdopen(stdin_fd) try: with xopen(file, 'rb') as f: if file2: with xopen(file2, 'rb') as f2: for chunk_index, (chunk1, chunk2) in enumerate( dnaio.read_paired_chunks(f, f2, buffer_size)): # Determine the worker that should get this chunk worker_index = queue.get() pipe = connections[worker_index] pipe.send(chunk_index) pipe.send_bytes(chunk1) pipe.send_bytes(chunk2) else: for chunk_index, chunk in enumerate( dnaio.read_chunks(f, buffer_size)): # Determine the worker that should get this chunk worker_index = queue.get() pipe = connections[worker_index] pipe.send(chunk_index) pipe.send_bytes(chunk) # Send poison pills to all workers for _ in range(len(connections)): worker_index = queue.get() connections[worker_index].send(-1) except Exception as e: # TODO better send this to a common "something went wrong" Queue for worker_index in range(len(connections)): connections[worker_index].send(-2) connections[worker_index].send((e, traceback.format_exc()))
def reader_process(file, file2, connections, queue, buffer_size, stdin_fd): """ Read chunks of FASTA or FASTQ data from *file* and send to a worker. queue -- a Queue of worker indices. A worker writes its own index into this queue to notify the reader that it is ready to receive more data. connections -- a list of Connection objects, one for each worker. The function repeatedly - reads a chunk from the file - reads a worker index from the Queue - sends the chunk to connections[index] and finally sends "poison pills" (the value -1) to all connections. """ if stdin_fd != -1: sys.stdin.close() sys.stdin = os.fdopen(stdin_fd) try: with xopen(file, 'rb') as f: if file2: with xopen(file2, 'rb') as f2: for chunk_index, (chunk1, chunk2) in enumerate(dnaio.read_paired_chunks(f, f2, buffer_size)): # Determine the worker that should get this chunk worker_index = queue.get() pipe = connections[worker_index] pipe.send(chunk_index) pipe.send_bytes(chunk1) pipe.send_bytes(chunk2) else: for chunk_index, chunk in enumerate(dnaio.read_chunks(f, buffer_size)): # Determine the worker that should get this chunk worker_index = queue.get() pipe = connections[worker_index] pipe.send(chunk_index) pipe.send_bytes(chunk) # Send poison pills to all workers for _ in range(len(connections)): worker_index = queue.get() connections[worker_index].send(-1) except Exception as e: # TODO better send this to a common "something went wrong" Queue for worker_index in range(len(connections)): connections[worker_index].send(-2) connections[worker_index].send((e, traceback.format_exc()))