def __call__(self, *args, **kwargs): node_id = "%s-%s" % (self.trace_name, self.call_number) r = super(TracingProxy, self).__call__(*args, **kwargs) assert is_result_proxy(r) factory = r.__factory__ factory.node_id = node_id deps = [] deps_ids = set() for a in args: if is_result_proxy(a): if id(a) not in deps_ids: deps.append(a) deps_ids.add(id(a)) for k in kwargs.values(): if is_result_proxy(k): if id(k) not in deps_ids: deps.append(k) deps_ids.add(id(k)) errors, placeholders = scan_args(args, kwargs) if errors: self.tracer.schedule_activity(node_id, self.trace_name) self.tracer.flush_scheduled() error_factory = first(errors).__factory__ self.tracer.error(node_id, str(error_factory.value)) for dep in deps: self.tracer.add_dependency(dep.__factory__.node_id, node_id) return r
def parallel_reduce(f, iterable, initializer=sentinel): """Like reduce() but optimized to maximize parallel execution. The reduce function must be associative and commutative. The reduction will start as soon as two results are available, regardless of their "position". For example, the following reduction is possible: 5 ----1-----| 15 --------------4----------| 15 | -------------12| 15 | | -------------17| R 15 | | | -------------21 15 ----------|---2-----| | | | 15 | --------------8| | | 10 ---------3| | | | 60 --------------------|-----------------------------|--------4-----| 50 --------------------|----------------------------5| 20 -------------------6| The iterable must have at least one element, otherwise a ValueError will be raised. The improvement over the built-in reduce() is obtained by starting the reduction as soon as any two results are available. The number of reduce operations is always constant and equal to len(iterable) - 1 regardless of how the reduction graph looks like. """ if initializer is not sentinel: iterable = itertools.chain([initializer], iterable) results, non_results = [], [] for x in iterable: if is_result_proxy(x): results.append(x) else: non_results.append(x) i = iter(non_results) reminder = sentinel for x in i: try: y = next(i) results.append(f(x, y)) except StopIteration: reminder = x if not results: # len(iterable) == 1 # Wrap the value in a result for uniform interface return result(x, -1) if not results: # len(iterable) == 0 raise ValueError( 'parallel_reduce() of empty sequence with no initial value') if is_result_proxy(results[0]): results = [(r.__factory__, r) for r in results] heapq.heapify(results) return _parallel_reduce_recurse(f, results, reminder) else: # Looks like we don't use a task for reduction, fallback on reduce return reduce(f, results)
def first(result, *results): """Return the first result finish from a list of results. If no one is finished yet - all of the results are placeholders - return the first placeholder from the list. """ rs = [] for r in i_or_args(result, results): if is_result_proxy(r): rs.append(r) else: return r return min(rs, key=_order_key)
def finish_order(result, *results): """Return the results in their finish order. The results that aren't finished yet will be at the end with their relative order preserved. """ rs = [] for r in i_or_args(result, results): if is_result_proxy(r): rs.append(r) else: yield r for r in sorted(rs, key=_order_key): yield r
def _workflow_wrapper(self, factory, input_data, *extra_args): wf_kwargs = {} for dep_name, proxy in self.proxy_factory_registry.items(): wf_kwargs[dep_name] = proxy(*extra_args) func = factory(**wf_kwargs) try: args, kwargs = self.deserialize_input(input_data) except Exception: logger.exception('Cannot deserialize the input:') raise ValueError('Cannot deserialize the input: %r' % (input_data,)) result = func(*args, **kwargs) # Can't use directly isinstance(result, restart_type) because if the # result is a single result proxy it will be evaluated. This also # fixes another issue, on python2 isinstance() swallows any # exception while python3 it doesn't. if not is_result_proxy(result) and isinstance(result, restart_type): try: traversed_input, (error, placeholders) = traverse_data( [result.args, result.kwargs]) except Exception: logger.exception('Cannot traverse the restart arguments:') raise ValueError( 'Cannot traverse the restart arguments: %r, %r' % result.args, result.kwargs) wait(error) # raise if not None if placeholders: raise SuspendTask r_args, r_kwargs = traversed_input try: serialized_input = self.serialize_restart_input(*r_args, **r_kwargs) except Exception: logger.exception('Cannot serialize the restart arguments:') raise ValueError( 'Cannot serialize the restart arguments: %r, %r' % result.args, result.kwargs) raise Restart(serialized_input) try: traversed_result, (error, placeholders) = traverse_data(result) except Exception: logger.exception('Cannot traverse the result:') raise ValueError('Cannot traverse the result: %r' % result) wait(error) if placeholders: raise SuspendTask try: return self.serialize_result(traversed_result) except Exception: logger.exception('Cannot serialize the result:') raise ValueError('Cannot serialize the result: %r' % (result,))
def __call__(self, *args, **kwargs): node_id = "%s-%s" % (self.trace_name, self.call_number) ((t_args, t_kwargs), (err, results)) = traverse_data( [args, kwargs], f=collect_err_and_results, initial=(None, None) ) r = super(TracingProxy, self).__call__(*t_args, **t_kwargs) assert is_result_proxy(r) factory = r.__factory__ factory.node_id = node_id if err is not None: self.tracer.schedule_activity(node_id, self.trace_name) self.tracer.flush_scheduled() error_factory = err.__factory__ self.tracer.error(node_id, str(error_factory.value)) for dep in results or []: self.tracer.add_dependency(dep.__factory__.node_id, node_id) return r
def collect_err_and_results(result, value): err, results = result if not is_result_proxy(value): return result try: wait(value) except TaskError: if err is None: err = value else: err = first(err, value) except SuspendTask: pass else: if results is None: results = [] results.append(value) return err, results
def traverse_data(value, f=check_err_and_placeholders, initial=(None, False), seen=frozenset(), make_list=True): if is_result_proxy(value): try: wait(value) except TaskError: return value, f(initial, value) except SuspendTask: return value, f(initial, value) return value.__wrapped__, f(initial, value) if isinstance(value, (bytes, uni)): return value, f(initial, value) res = initial if isinstance(value, collections.Iterable): if id(value) in seen: raise ValueError('Recursive structure.') seen = seen | frozenset([id(value)]) if isinstance(value, collections.Mapping): d = {} for k, v in value.items(): k_, res = traverse_data(k, f, res, seen, make_list=False) v_, res = traverse_data(v, f, res, seen, make_list=make_list) d[k_] = v_ return d, res if ( isinstance(value, collections.Iterable) and isinstance(value, collections.Sized) ): l = [] for x in value: x_, res = traverse_data(x, f, res, seen, make_list=make_list) l.append(x_) if make_list: return l, res return tuple(l), res if isinstance(value, collections.Iterable): raise ValueError('Unsized iterables not allowed.') return value, f(initial, value)