Exemple #1
0
 def __call__(self, *args, **kwargs):
     node_id = "%s-%s" % (self.trace_name, self.call_number)
     r = super(TracingProxy, self).__call__(*args, **kwargs)
     assert is_result_proxy(r)
     factory = r.__factory__
     factory.node_id = node_id
     deps = []
     deps_ids = set()
     for a in args:
         if is_result_proxy(a):
             if id(a) not in deps_ids:
                 deps.append(a)
                 deps_ids.add(id(a))
     for k in kwargs.values():
         if is_result_proxy(k):
             if id(k) not in deps_ids:
                 deps.append(k)
                 deps_ids.add(id(k))
     errors, placeholders = scan_args(args, kwargs)
     if errors:
         self.tracer.schedule_activity(node_id, self.trace_name)
         self.tracer.flush_scheduled()
         error_factory = first(errors).__factory__
         self.tracer.error(node_id, str(error_factory.value))
     for dep in deps:
         self.tracer.add_dependency(dep.__factory__.node_id, node_id)
     return r
Exemple #2
0
def parallel_reduce(f, iterable, initializer=sentinel):
    """Like reduce() but optimized to maximize parallel execution.

    The reduce function must be associative and commutative.

    The reduction will start as soon as two results are available, regardless
    of their "position". For example, the following reduction is possible:

     5 ----1-----|
    15           --------------4----------|
    15           |                        -------------12|
    15           |                        |              -------------17|
  R 15           |                        |              |              -------------21
    15 ----------|---2-----|              |              |              |
    15           |         --------------8|              |              |
    10 ---------3|         |                             |              |
    60 --------------------|-----------------------------|--------4-----|
    50 --------------------|----------------------------5|
    20 -------------------6|

    The iterable must have at least one element, otherwise a ValueError will be
    raised.

    The improvement over the built-in reduce() is obtained by starting the
    reduction as soon as any two results are available. The number of reduce
    operations is always constant and equal to len(iterable) - 1 regardless of
    how the reduction graph looks like.
    """
    if initializer is not sentinel:
        iterable = itertools.chain([initializer], iterable)
    results, non_results = [], []
    for x in iterable:
        if is_result_proxy(x):
            results.append(x)
        else:
            non_results.append(x)
    i = iter(non_results)
    reminder = sentinel
    for x in i:
        try:
            y = next(i)
            results.append(f(x, y))
        except StopIteration:
            reminder = x
            if not results:  # len(iterable) == 1
                # Wrap the value in a result for uniform interface
                return result(x, -1)
    if not results:  # len(iterable) == 0
        raise ValueError(
            'parallel_reduce() of empty sequence with no initial value')
    if is_result_proxy(results[0]):
        results = [(r.__factory__, r) for r in results]
        heapq.heapify(results)
        return _parallel_reduce_recurse(f, results, reminder)
    else:
        # Looks like we don't use a task for reduction, fallback on reduce
        return reduce(f, results)
Exemple #3
0
def parallel_reduce(f, iterable, initializer=sentinel):
    """Like reduce() but optimized to maximize parallel execution.

    The reduce function must be associative and commutative.

    The reduction will start as soon as two results are available, regardless
    of their "position". For example, the following reduction is possible:

     5 ----1-----|
    15           --------------4----------|
    15           |                        -------------12|
    15           |                        |              -------------17|
  R 15           |                        |              |              -------------21
    15 ----------|---2-----|              |              |              |
    15           |         --------------8|              |              |
    10 ---------3|         |                             |              |
    60 --------------------|-----------------------------|--------4-----|
    50 --------------------|----------------------------5|
    20 -------------------6|

    The iterable must have at least one element, otherwise a ValueError will be
    raised.

    The improvement over the built-in reduce() is obtained by starting the
    reduction as soon as any two results are available. The number of reduce
    operations is always constant and equal to len(iterable) - 1 regardless of
    how the reduction graph looks like.
    """
    if initializer is not sentinel:
        iterable = itertools.chain([initializer], iterable)
    results, non_results = [], []
    for x in iterable:
        if is_result_proxy(x):
            results.append(x)
        else:
            non_results.append(x)
    i = iter(non_results)
    reminder = sentinel
    for x in i:
        try:
            y = next(i)
            results.append(f(x, y))
        except StopIteration:
            reminder = x
            if not results:  # len(iterable) == 1
                # Wrap the value in a result for uniform interface
                return result(x, -1)
    if not results:  # len(iterable) == 0
        raise ValueError(
            'parallel_reduce() of empty sequence with no initial value')
    if is_result_proxy(results[0]):
        results = [(r.__factory__, r) for r in results]
        heapq.heapify(results)
        return _parallel_reduce_recurse(f, results, reminder)
    else:
        # Looks like we don't use a task for reduction, fallback on reduce
        return reduce(f, results)
Exemple #4
0
def first(result, *results):
    """Return the first result finish from a list of results.

    If no one is finished yet - all of the results are placeholders - return
    the first placeholder from the list.
    """
    rs = []
    for r in i_or_args(result, results):
        if is_result_proxy(r):
            rs.append(r)
        else:
            return r
    return min(rs, key=_order_key)
Exemple #5
0
def first(result, *results):
    """Return the first result finish from a list of results.

    If no one is finished yet - all of the results are placeholders - return
    the first placeholder from the list.
    """
    rs = []
    for r in i_or_args(result, results):
        if is_result_proxy(r):
            rs.append(r)
        else:
            return r
    return min(rs, key=_order_key)
Exemple #6
0
def finish_order(result, *results):
    """Return the results in their finish order.

    The results that aren't finished yet will be at the end with their relative
    order preserved.
    """
    rs = []
    for r in i_or_args(result, results):
        if is_result_proxy(r):
            rs.append(r)
        else:
            yield r
    for r in sorted(rs, key=_order_key):
        yield r
Exemple #7
0
def finish_order(result, *results):
    """Return the results in their finish order.

    The results that aren't finished yet will be at the end with their relative
    order preserved.
    """
    rs = []
    for r in i_or_args(result, results):
        if is_result_proxy(r):
            rs.append(r)
        else:
            yield r
    for r in sorted(rs, key=_order_key):
        yield r
Exemple #8
0
def _workflow_wrapper(self, factory, input_data, *extra_args):
    wf_kwargs = {}
    for dep_name, proxy in self.proxy_factory_registry.items():
        wf_kwargs[dep_name] = proxy(*extra_args)
    func = factory(**wf_kwargs)
    try:
        args, kwargs = self.deserialize_input(input_data)
    except Exception:
        logger.exception('Cannot deserialize the input:')
        raise ValueError('Cannot deserialize the input: %r' % (input_data,))
    result = func(*args, **kwargs)
    # Can't use directly isinstance(result, restart_type) because if the
    # result is a single result proxy it will be evaluated. This also
    # fixes another issue, on python2 isinstance() swallows any
    # exception while python3 it doesn't.
    if not is_result_proxy(result) and isinstance(result, restart_type):
        try:
            traversed_input, (error, placeholders) =  traverse_data(
                [result.args, result.kwargs])
        except Exception:
            logger.exception('Cannot traverse the restart arguments:')
            raise ValueError(
                'Cannot traverse the restart arguments: %r, %r' %
                result.args, result.kwargs)
        wait(error)  # raise if not None
        if placeholders:
            raise SuspendTask
        r_args, r_kwargs = traversed_input
        try:
            serialized_input = self.serialize_restart_input(*r_args, **r_kwargs)
        except Exception:
            logger.exception('Cannot serialize the restart arguments:')
            raise ValueError(
                'Cannot serialize the restart arguments: %r, %r' %
                result.args, result.kwargs)
        raise Restart(serialized_input)
    try:
        traversed_result, (error, placeholders) = traverse_data(result)
    except Exception:
        logger.exception('Cannot traverse the result:')
        raise ValueError('Cannot traverse the result: %r' % result)
    wait(error)
    if placeholders:
        raise SuspendTask
    try:
        return self.serialize_result(traversed_result)
    except Exception:
        logger.exception('Cannot serialize the result:')
        raise ValueError('Cannot serialize the result: %r' % (result,))
Exemple #9
0
 def __call__(self, *args, **kwargs):
     node_id = "%s-%s" % (self.trace_name, self.call_number)
     ((t_args, t_kwargs), (err, results)) = traverse_data(
         [args, kwargs], f=collect_err_and_results, initial=(None, None)
     )
     r = super(TracingProxy, self).__call__(*t_args, **t_kwargs)
     assert is_result_proxy(r)
     factory = r.__factory__
     factory.node_id = node_id
     if err is not None:
         self.tracer.schedule_activity(node_id, self.trace_name)
         self.tracer.flush_scheduled()
         error_factory = err.__factory__
         self.tracer.error(node_id, str(error_factory.value))
     for dep in results or []:
         self.tracer.add_dependency(dep.__factory__.node_id, node_id)
     return r
Exemple #10
0
def collect_err_and_results(result, value):
    err, results = result
    if not is_result_proxy(value):
        return result
    try:
        wait(value)
    except TaskError:
        if err is None:
            err = value
        else:
            err = first(err, value)
    except SuspendTask:
        pass
    else:
        if results is None:
            results = []
        results.append(value)
    return err, results
Exemple #11
0
def traverse_data(value, f=check_err_and_placeholders, initial=(None, False), seen=frozenset(), make_list=True):
    if is_result_proxy(value):
        try:
            wait(value)
        except TaskError:
            return value, f(initial, value)
        except SuspendTask:
            return value, f(initial, value)

        return value.__wrapped__, f(initial, value)

    if isinstance(value, (bytes, uni)):
        return value, f(initial, value)

    res = initial

    if isinstance(value, collections.Iterable):
        if id(value) in seen:
            raise ValueError('Recursive structure.')
        seen = seen | frozenset([id(value)])

    if isinstance(value, collections.Mapping):
        d = {}
        for k, v in value.items():
            k_, res = traverse_data(k, f, res, seen, make_list=False)
            v_, res = traverse_data(v, f, res, seen, make_list=make_list)
            d[k_] = v_
        return d, res
    if (
        isinstance(value, collections.Iterable)
        and isinstance(value, collections.Sized)
    ):
        l = []
        for x in value:
            x_, res = traverse_data(x, f, res, seen, make_list=make_list)
            l.append(x_)
        if make_list:
            return l, res
        return tuple(l), res
    if isinstance(value, collections.Iterable):
        raise ValueError('Unsized iterables not allowed.')
    return value, f(initial, value)