def _call_node_run( node: Node, catalog: DataCatalog, inputs: Dict[str, Any], is_async: bool, run_id: str = None, ) -> Dict[str, Any]: hook_manager = get_hook_manager() try: outputs = node.run(inputs) except Exception as exc: hook_manager.hook.on_node_error( # pylint: disable=no-member error=exc, node=node, catalog=catalog, inputs=inputs, is_async=is_async, run_id=run_id, ) raise exc hook_manager.hook.after_node_run( # pylint: disable=no-member node=node, catalog=catalog, inputs=inputs, outputs=outputs, is_async=is_async, run_id=run_id, ) return outputs
def _run_node_sequential(node: Node, catalog: DataCatalog, run_id: str = None) -> Node: inputs = {name: catalog.load(name) for name in node.inputs} hook_manager = get_hook_manager() is_async = False hook_manager.hook.before_node_run( # pylint: disable=no-member node=node, catalog=catalog, inputs=inputs, is_async=is_async, run_id=run_id ) try: outputs = node.run(inputs) except Exception as exc: hook_manager.hook.on_node_error( # pylint: disable=no-member error=exc, node=node, catalog=catalog, inputs=inputs, is_async=is_async, run_id=run_id, ) raise exc hook_manager.hook.after_node_run( # pylint: disable=no-member node=node, catalog=catalog, inputs=inputs, outputs=outputs, is_async=is_async, run_id=run_id, ) for name, data in outputs.items(): catalog.save(name, data) return node
def _run_node_async(node: Node, catalog: DataCatalog, run_id: str = None) -> Node: with ThreadPoolExecutor() as pool: inputs = { name: pool.submit(catalog.load, name) for name in node.inputs } # Python dict is thread-safe wait(inputs.values(), return_when=ALL_COMPLETED) inputs = {key: value.result() for key, value in inputs.items()} hook_manager = get_hook_manager() is_async = True hook_manager.hook.before_node_run( # pylint: disable=no-member node=node, catalog=catalog, inputs=inputs, is_async=is_async, run_id=run_id) try: outputs = node.run(inputs) except Exception as exc: hook_manager.hook.on_node_error( # pylint: disable=no-member error=exc, node=node, catalog=catalog, inputs=inputs, is_async=is_async, run_id=run_id, ) raise exc hook_manager.hook.after_node_run( # pylint: disable=no-member node=node, catalog=catalog, inputs=inputs, outputs=outputs, is_async=is_async, run_id=run_id, ) save_futures = set() for name, data in outputs.items(): save_futures.add(pool.submit(catalog.save, name, data)) for future in as_completed(save_futures): exception = future.exception() if exception: raise exception return node
def run_node(node: Node, catalog: DataCatalog) -> Node: """Run a single `Node` with inputs from and outputs to the `catalog`. Args: node: The ``Node`` to run. catalog: A ``DataCatalog`` containing the node's inputs and outputs. Returns: The node argument. """ inputs = {name: catalog.load(name) for name in node.inputs} outputs = node.run(inputs) for name, data in outputs.items(): catalog.save(name, data) return node