def iter(self, iters, executor=None): """Iterate over dependencies and yield results :param iters: dict with iterators over dependencies :param executor: Executor to punt computation tasks to. If None, will compute inside the plugin's thread. """ deps_by_kind = self.dependencies_by_kind() # Merge iterators of data that has the same kind kind_iters = dict() for kind, deps in deps_by_kind.items(): kind_iters[kind] = strax.merge_iters( strax.sync_iters(strax.same_length, {d: iters[d] for d in deps})) if len(deps_by_kind) > 1: # Sync iterators of different kinds by time kind_iters = strax.sync_iters( partial(strax.same_stop, func=strax.endtime), kind_iters) iters = kind_iters pending = [] yield from self._inner_iter(iters, pending, executor) self.cleanup(wait_for=pending)
def iter(self, iters, executor=None): """Iterate over dependencies and yield results :param iters: dict with iterators over dependencies :param executor: Executor to punt computation tasks to. If None, will compute inside the plugin's thread. """ deps_by_kind = self.dependencies_by_kind() if len(deps_by_kind) > 1: # Sync the iterators that provide time info for each data kind # (first in deps_by_kind lists) by endtime iters.update( strax.sync_iters( partial(strax.same_stop, func=strax.endtime), {d[0]: iters[d[0]] for d in deps_by_kind.values()})) # Convert to iterators over merged data for each kind new_iters = dict() for kind, deps in deps_by_kind.items(): if len(deps) > 1: synced_iters = strax.sync_iters(strax.same_length, {d: iters[d] for d in deps}) new_iters[kind] = strax.merge_iters(synced_iters.values()) else: new_iters[kind] = iters[deps[0]] iters = new_iters if self.rechunk_input: iters = self.rechunk_input(iters) pending = [] for chunk_i in itertools.count(): if not self.is_ready(chunk_i): if self.source_finished(): break print(f"{self.__class__.__name__} waiting for chunk {chunk_i}") time.sleep(2) try: compute_kwargs = {k: next(iters[k]) for k in deps_by_kind} except StopIteration: return except Exception: raise if self.parallel and executor is not None: new_f = executor.submit(self.do_compute, chunk_i=chunk_i, **compute_kwargs) pending = [f for f in pending + [new_f] if not f.done()] yield new_f else: yield self.do_compute(chunk_i=chunk_i, **compute_kwargs) self.cleanup(wait_for=pending)
def iter(self, iters, executor=None): """Iterate over dependencies and yield results :param iters: dict with iterators over dependencies :param executor: Executor to punt computation tasks to. If None, will compute inside the plugin's thread. """ deps_by_kind = self.dependencies_by_kind() if len(deps_by_kind) > 1: # Sync the iterators that provide time info for each data kind # (first in deps_by_kind lists) by endtime iters.update( strax.sync_iters( partial(strax.same_stop, func=strax.endtime), {d[0]: iters[d[0]] for d in deps_by_kind.values()})) # Convert to iterators over merged data for each kind new_iters = dict() for kind, deps in deps_by_kind.items(): if len(deps) > 1: synced_iters = strax.sync_iters(strax.same_length, {d: iters[d] for d in deps}) new_iters[kind] = strax.merge_iters(synced_iters.values()) else: new_iters[kind] = iters[deps[0]] iters = new_iters if self.rechunk_input: iters = self.rechunk_input(iters) pending = [] for chunk_i in itertools.count(): try: if not self.check_next_ready_or_done(chunk_i): # TODO: avoid duplication # but also remain picklable... self.close(wait_for=tuple(pending)) return compute_kwargs = {k: next(iters[k]) for k in deps_by_kind} except StopIteration: self.close(wait_for=tuple(pending)) return except Exception: self.close(wait_for=tuple(pending)) raise if self.parallel and executor is not None: new_f = executor.submit(self.do_compute, chunk_i=chunk_i, **compute_kwargs) pending = [f for f in pending + [new_f] if not f.done()] yield new_f else: yield self.do_compute(chunk_i=chunk_i, **compute_kwargs)
def test_sync_iters(source, source_skipper): synced = strax.sync_iters(strax.same_stop, dict(s1=source, s2=source_skipper)) assert len(synced) == 2 assert 's1' in synced and 's2' in synced _do_sync_check(list(synced['s1']), list(synced['s2']))