def method(self, *args, profile=False, **kwargs): with perf_count(name, profile): selfmap = self.get_grouped_intervals() keys = selfmap.keys() def func(set1): return getattr(IntervalSet, name)(set1, *args, **kwargs) return {v: func(selfmap[v]) for v in keys}
def method(self, *args, profile=False, **kwargs): with perf_count(name, profile): selfmap = self.get_grouped_intervals() keys_to_process = selfmap.keys() def func(set1): return getattr(IntervalSet, name)(set1, *args, **kwargs) results_map = {v: func(selfmap[v]) for v in keys_to_process} return IntervalSetMapping( IntervalSetMapping._remove_empty_intervalsets(results_map))
def method(self, other, *args, profile=False, **kwargs): with perf_count(name, profile): selfmap = self.get_grouped_intervals() othermap = other.get_grouped_intervals() keys = set(selfmap.keys()).union(othermap.keys()) def func(set1, set2): return getattr(IntervalSet, name)(set1, set2, *args, **kwargs) results_map = { v: func(selfmap.get(v, IntervalSet([])), othermap.get(v, IntervalSet([]))) for v in keys } return IntervalSetMapping( IntervalSetMapping._remove_empty_intervalsets(results_map))
def run(self, query, args, combiner=union_combiner, randomize=True, chunksize=1, progress=False, profile=False, print_error=True): """Dispatches all tasks to workers and waits until everything finishes. See class documentation for an example of how to use run(). Exception raised in `query` are suppressed and the unsuccessful subset of `args` is returned at the end. However, such errors can be printed as soon as they occur. Args: query: A function that can return partial results for any batch of input arguments. args: A potentially long list of input arguments to execute the query with. combiner (optional): A function that takes two partial results and returns the combination of the two. Defaults to union_combiner which assumes the partial results have a `union` method. randomize (optional): Whether to create and dispatch tasks in random order. Defaults to True. chunksize (optional): The size of the input batch for each task. Defaults to 1. progress (optional): Whether to display a progress bar. Defaults to False. profile (optional): Whether to output wall time of various internal stages to stdout. Defaults to False. print_error (optional): Whether to output task errors to stdout. Defaults to True. Returns: A pair ``(query_output, args_with_err)`` where ``query_output`` is the combined results from successful tasks, and ``args_with_err`` is a list that is a subset of args that failed to execute. """ with perf_count("Executing query in Runtime", enable=profile): with _WorkerPoolContext(self._get_worker_pool(query)) as pool: total_work = len(args) with tqdm(total=total_work, disable=not progress) as pbar: with perf_count("Executing in workers", enable=profile): args_with_err = [] with perf_count("Dispatching tasks", enable=profile): if randomize: random.shuffle(args) async_results = pool.map( _create_tasks(args, chunksize), _get_callback(pbar, args_with_err, print_error)) combined_result = None for future in async_results: try: r = future.get() except TaskException: continue if combined_result is None: combined_result = r else: combined_result = combiner(combined_result, r) if combined_result is None and total_work > 0: raise RekallRuntimeException("All tasks failed!") return (combined_result, args_with_err)