def test_join_flattens_filters(self): filter1 = Pipes.join(ReprFilter()) filter2 = Pipes.join(filter1, ReprFilter()) filter3 = Pipes.join(filter2, filter2) self.assertEqual(4, len(filter3._filters))
def test_bad_exception(self): with self.assertRaises(CobaException): Pipes.join() with self.assertRaises(CobaException): Pipes.join(object())
def test_run(self): source = ListSource(list(range(10))) sink = ListSink() Pipes.join(source, ProcessNameFilter(), sink).run() self.assertEqual(sink.items[0], ['MainProcess']*10)
def test_params(self): line = Pipes.join(NoParamsSource(), NoParamsFilter(), NoParamsSink()) self.assertEqual({}, line.params) line = Pipes.join(NoParamsSource(), ParamsFilter(), NoParamsSink()) self.assertEqual({'filter':'ParamsFilter'}, line.params) line = Pipes.join(NoParamsSource(), NoParamsFilter(), ParamsFilter(), NoParamsSink()) self.assertEqual({'filter':'ParamsFilter'}, line.params) line = Pipes.join(ParamsSource(), ParamsFilter(), ParamsSink()) self.assertEqual({'source':'ParamsSource','sink':'ParamsSink','filter':'ParamsFilter'}, line.params)
def test_join_source_filters_sink_repr(self): source = ReprSource() filters = [ReprFilter(), ReprFilter()] sink = ReprSink() self.assertEqual("ReprSource,ReprFilter,ReprFilter,ReprSink", str(Pipes.join(source, *filters, sink)))
def test_filter_order(self): class AddFilter: def filter(self,items): for item in items: yield item+1 class MultFilter: def filter(self,items): for item in items: yield item*2 self.assertEqual([4,6], list(Pipes.join(AddFilter(), MultFilter()).filter([1,2])))
def map(self, filter: Filter[Any, Any], items: Iterable[Any]) -> Iterable[Any]: self._stdin = QueueIO(Queue(maxsize=self._n_processes)) self._stdout = QueueIO(Queue()) self._stderr = QueueIO(Queue()) # Without this multiprocessing.Queue() will output an ugly error message if a user ever hits ctrl-c. # By setting _ignore_epipe we prevent Queue() from displaying its message and we show our own friendly # message instead. In future versions of Python this could break but for now this works for 3.6-3.10. self._stdin._queue._ignore_epipe = True self._stdout._queue._ignore_epipe = True self._stderr._queue._ignore_epipe = True self._threads = [] self._completed = False self._terminate = False self._pool: List[Process] = [] self._no_more_items = False def maintain_pool(): finished = lambda: self._completed and (self._stdin._queue.qsize() == 0 or self._terminate) while not finished(): if self._terminate: break self._pool = [p for p in self._pool if p.is_alive()] for _ in range(self._n_processes - len(self._pool)): args = (filter, self._stdin, self._stdout, self._stderr, self._maxtasksperchild) process = Process(target=PipesPool.worker, args=args) process.start() self._pool.append(process) #I don't like this but it seems to be #the fastest/simplest way out of all my tests... time.sleep(0.1) if not self._terminate: for _ in self._pool: self._stdin.write(None) else: for p in self._pool: p.terminate() for p in self._pool: p.join() self._stderr.write(None) self._stdout.write(None) def populate_tasks(): try: for item in items: if self._terminate: break try: self._stdin.write(pickle.dumps(item)) except Exception as e: if "pickle" in str(e) or "Pickling" in str(e): message = str(e) if isinstance( e, CobaException ) else ( f"We attempted to process your code on multiple processes but were unable to do so due to a pickle " f"error. The exact error received was '{str(e)}'. Errors this kind can often be fixed in one of two " f"ways: 1) evaluate the experiment in question on a single process with no limit on the tasks per child " f"or 2) modify the named class to be picklable. The easiest way to make a given class picklable is to " f"add `def __reduce__(self): return (<the class in question>, (<tuple of constructor arguments>))` to " f"the class. For more information see https://docs.python.org/3/library/pickle.html#object.__reduce__." ) self._stderr.write(message) # I'm not sure what I think about this... # It means pipes stops after a pickle error... # This is how it has worked for a long time # So we're leaving it as is for now... break else: #pragma: no cover self._stderr.write( (time.time(), current_process().name, e, traceback.format_tb(e.__traceback__))) except Exception as e: self._stderr.write((time.time(), current_process().name, e, traceback.format_tb(e.__traceback__))) self._completed = True log_thread = Thread( target=Pipes.join(self._stderr, Foreach(self._given_stderr)).run) log_thread.daemon = True log_thread.start() pool_thread = Thread(target=maintain_pool) pool_thread.daemon = True pool_thread.start() tasks_thread = Thread(target=populate_tasks) tasks_thread.daemon = True tasks_thread.start() self._threads.append(log_thread) self._threads.append(pool_thread) self._threads.append(tasks_thread) for item in self._stdout.read(): yield item
def test_join_source_sink_repr(self): source = ReprSource() sink = ReprSink() self.assertEqual("ReprSource,ReprSink", str(Pipes.join(source, sink)))
def test_join_filters_repr(self): self.assertEqual("ReprFilter,ReprFilter", str(Pipes.join(ReprFilter(), ReprFilter())))
def test_join_filters_sink_repr(self): filters = [ReprFilter(),ReprFilter()] sink = ReprSink() self.assertEqual("ReprFilter,ReprFilter,ReprSink", str(Pipes.join(*filters, sink)))
def test_join_source_filters_repr(self): source = ReprSource() filters = [ReprFilter(), ReprFilter()] self.assertEqual("ReprSource,ReprFilter,ReprFilter", str(Pipes.join(source, *filters)))
def test_exception(self): with self.assertRaises(Exception): Pipes.join(ListSource(list(range(4))), ExceptionFilter(), ListSink()).run()