Beispiel #1
0
def _call_node_run(
    node: Node,
    catalog: DataCatalog,
    inputs: Dict[str, Any],
    is_async: bool,
    run_id: str = None,
) -> Dict[str, Any]:
    hook_manager = get_hook_manager()
    try:
        outputs = node.run(inputs)
    except Exception as exc:
        hook_manager.hook.on_node_error(  # pylint: disable=no-member
            error=exc,
            node=node,
            catalog=catalog,
            inputs=inputs,
            is_async=is_async,
            run_id=run_id,
        )
        raise exc
    hook_manager.hook.after_node_run(  # pylint: disable=no-member
        node=node,
        catalog=catalog,
        inputs=inputs,
        outputs=outputs,
        is_async=is_async,
        run_id=run_id,
    )
    return outputs
Beispiel #2
0
def _run_node_sequential(node: Node, catalog: DataCatalog, run_id: str = None) -> Node:
    inputs = {name: catalog.load(name) for name in node.inputs}
    hook_manager = get_hook_manager()
    is_async = False
    hook_manager.hook.before_node_run(  # pylint: disable=no-member
        node=node, catalog=catalog, inputs=inputs, is_async=is_async, run_id=run_id
    )
    try:
        outputs = node.run(inputs)
    except Exception as exc:
        hook_manager.hook.on_node_error(  # pylint: disable=no-member
            error=exc,
            node=node,
            catalog=catalog,
            inputs=inputs,
            is_async=is_async,
            run_id=run_id,
        )
        raise exc
    hook_manager.hook.after_node_run(  # pylint: disable=no-member
        node=node,
        catalog=catalog,
        inputs=inputs,
        outputs=outputs,
        is_async=is_async,
        run_id=run_id,
    )

    for name, data in outputs.items():
        catalog.save(name, data)
    return node
Beispiel #3
0
def _run_node_async(node: Node,
                    catalog: DataCatalog,
                    run_id: str = None) -> Node:
    with ThreadPoolExecutor() as pool:
        inputs = {
            name: pool.submit(catalog.load, name)
            for name in node.inputs
        }  # Python dict is thread-safe
        wait(inputs.values(), return_when=ALL_COMPLETED)
        inputs = {key: value.result() for key, value in inputs.items()}
        hook_manager = get_hook_manager()
        is_async = True
        hook_manager.hook.before_node_run(  # pylint: disable=no-member
            node=node,
            catalog=catalog,
            inputs=inputs,
            is_async=is_async,
            run_id=run_id)
        try:
            outputs = node.run(inputs)
        except Exception as exc:
            hook_manager.hook.on_node_error(  # pylint: disable=no-member
                error=exc,
                node=node,
                catalog=catalog,
                inputs=inputs,
                is_async=is_async,
                run_id=run_id,
            )
            raise exc
        hook_manager.hook.after_node_run(  # pylint: disable=no-member
            node=node,
            catalog=catalog,
            inputs=inputs,
            outputs=outputs,
            is_async=is_async,
            run_id=run_id,
        )

        save_futures = set()

        for name, data in outputs.items():
            save_futures.add(pool.submit(catalog.save, name, data))

        for future in as_completed(save_futures):
            exception = future.exception()
            if exception:
                raise exception
    return node
Beispiel #4
0
def run_node(node: Node, catalog: DataCatalog) -> Node:
    """Run a single `Node` with inputs from and outputs to the `catalog`.

    Args:
        node: The ``Node`` to run.
        catalog: A ``DataCatalog`` containing the node's inputs and outputs.

    Returns:
        The node argument.

    """
    inputs = {name: catalog.load(name) for name in node.inputs}
    outputs = node.run(inputs)
    for name, data in outputs.items():
        catalog.save(name, data)
    return node