コード例 #1
0
        def method(self, *args, profile=False, **kwargs):
            with perf_count(name, profile):
                selfmap = self.get_grouped_intervals()
                keys = selfmap.keys()

                def func(set1):
                    return getattr(IntervalSet, name)(set1, *args, **kwargs)

            return {v: func(selfmap[v]) for v in keys}
コード例 #2
0
        def method(self, *args, profile=False, **kwargs):
            with perf_count(name, profile):
                selfmap = self.get_grouped_intervals()
                keys_to_process = selfmap.keys()

                def func(set1):
                    return getattr(IntervalSet, name)(set1, *args, **kwargs)

                results_map = {v: func(selfmap[v]) for v in keys_to_process}
            return IntervalSetMapping(
                IntervalSetMapping._remove_empty_intervalsets(results_map))
コード例 #3
0
        def method(self, other, *args, profile=False, **kwargs):
            with perf_count(name, profile):
                selfmap = self.get_grouped_intervals()
                othermap = other.get_grouped_intervals()
                keys = set(selfmap.keys()).union(othermap.keys())

                def func(set1, set2):
                    return getattr(IntervalSet, name)(set1, set2, *args,
                                                      **kwargs)

                results_map = {
                    v: func(selfmap.get(v, IntervalSet([])),
                            othermap.get(v, IntervalSet([])))
                    for v in keys
                }
            return IntervalSetMapping(
                IntervalSetMapping._remove_empty_intervalsets(results_map))
コード例 #4
0
    def run(self,
            query,
            args,
            combiner=union_combiner,
            randomize=True,
            chunksize=1,
            progress=False,
            profile=False,
            print_error=True):
        """Dispatches all tasks to workers and waits until everything finishes.

        See class documentation for an example of how to use run().
        Exception raised in `query` are suppressed and the unsuccessful subset
        of `args` is returned at the end. However, such errors can be printed
        as soon as they occur.

        Args:
            query: A function that can return partial results for any batch of
                input arguments.
            args: A potentially long list of input arguments to execute the
                query with.
            combiner (optional): A function that takes two partial results and
                returns the combination of the two.
                Defaults to union_combiner which assumes the partial results
                have a `union` method.
            randomize (optional): Whether to create and dispatch tasks in
                random order.
                Defaults to True.
            chunksize (optional): The size of the input batch for each task.
                Defaults to 1.
            progress (optional): Whether to display a progress bar.
                Defaults to False.
            profile (optional): Whether to output wall time of various internal
                stages to stdout.
                Defaults to False.
            print_error (optional): Whether to output task errors to stdout.
                Defaults to True.

        Returns:
            A pair ``(query_output, args_with_err)`` where ``query_output`` is
            the combined results from successful tasks, and ``args_with_err``
            is a list that is a subset of args that failed to execute.
        """
        with perf_count("Executing query in Runtime", enable=profile):
            with _WorkerPoolContext(self._get_worker_pool(query)) as pool:
                total_work = len(args)
                with tqdm(total=total_work, disable=not progress) as pbar:
                    with perf_count("Executing in workers", enable=profile):
                        args_with_err = []
                        with perf_count("Dispatching tasks", enable=profile):
                            if randomize:
                                random.shuffle(args)
                            async_results = pool.map(
                                _create_tasks(args, chunksize),
                                _get_callback(pbar, args_with_err,
                                              print_error))
                        combined_result = None
                        for future in async_results:
                            try:
                                r = future.get()
                            except TaskException:
                                continue
                            if combined_result is None:
                                combined_result = r
                            else:
                                combined_result = combiner(combined_result, r)
                        if combined_result is None and total_work > 0:
                            raise RekallRuntimeException("All tasks failed!")
                        return (combined_result, args_with_err)