def fetch(self): """A method to exhaust the pipeline. Itself it is lazy (a generator)""" chunk_feed = chunker(self._mol_feed, chunksize=self.chunksize) # get first chunk and check if it is saturated try: first_chunk = next(chunk_feed) except StopIteration: raise StopIteration('There are no molecules loaded to the pipeline.') if len(first_chunk) == 0: warnings.warn('There is **zero** molecules at the output of the VS' ' pipeline. Output file will be empty.') elif len(first_chunk) < self.chunksize and self.n_cpu > 1: warnings.warn('The chunksize (%i) seams to be to large.' % self.chunksize) # use methods multithreading when we have less molecules than cores if len(first_chunk) < self.n_cpu: warnings.warn('Falling back to sub-methods multithreading as ' 'the number of molecules is less than cores ' '(%i < %i)' % (len(first_chunk), self.n_cpu)) for func in self._pipe: if hasattr(func, 'n_cpu'): func.n_cpu = self.n_cpu elif hasattr(func, 'n_jobs'): func.n_jobs = self.n_cpu elif isinstance(func, partial): for func2 in func.args: if hasattr(func2, 'n_cpu'): func2.n_cpu = self.n_cpu elif hasattr(func2, 'n_jobs'): func2.n_jobs = self.n_cpu # turn off VS multiprocessing self.n_cpu = 1 # TODO add some verbosity or progress bar if self.n_cpu != 1: out = (Pool(self.n_cpu if self.n_cpu > 0 else None) .imap(partial(compose_iter, funcs=self._pipe), (chunk for chunk in chain([first_chunk], chunk_feed)))) else: out = (compose_iter(chunk, self._pipe) for chunk in chain([first_chunk], chunk_feed)) # FIXME use joblib version as soon as it gets return_generator merged # out = Parallel(n_jobs=self.n_cpu)( # delayed(compose_iter)(chunk, self._pipe) # for chunk in chain([first_chunk], chunk_feed)) # merge chunks into one iterable return chain.from_iterable(out)
def test_chunks(): chunks = chunker('ABCDEFG', 2) assert_equal(list(chunks), [['A', 'B'], ['C', 'D'], ['E', 'F'], ['G']])
def test_chunks(): chunks = chunker('ABCDEFG', 2) assert list(chunks), [['A', 'B'], ['C', 'D'], ['E', 'F'] == ['G']]