def test_exe_output(): env = make_test_env() env.add_module(core) g = Graph(env) # env.add_storage("python://test") # rt = env.runtimes[0] # TODO: this is error because no data copy between SAME storage engines (but DIFFERENT storage urls) currently # ec = env.get_run_context(g, current_runtime=rt, target_storage=env.storages[0]) # ec = env.get_run_context(g, current_runtime=rt, target_storage=rt.as_storage()) output_alias = "node_output" node = g.create_node(key="node", snap=snap_dl_source, output_alias=output_alias) exe = env.get_executable(node) result = ExecutionManager(exe).execute() with env.md_api.begin(): block = result.get_output_block(env) assert block is not None assert block.as_records() == mock_dl_output assert block.nominal_schema is TestSchema4 assert len(block.realized_schema.fields) == len(TestSchema4.fields) # Test alias was created correctly assert (env.md_api.execute( select(Alias).filter(Alias.alias == output_alias)). scalar_one_or_none().data_block_id == block.data_block_id) assert env.md_api.count(select(DataBlockLog)) == 1 dbl = env.md_api.execute(select(DataBlockLog)).scalar_one_or_none() assert dbl.data_block_id == block.data_block_id assert dbl.direction == Direction.OUTPUT
def test_non_terminating_snap(): def never_stop(input: Optional[DataBlock] = None) -> DataFrame: pass env = make_test_env() g = Graph(env) node = g.create_node(key="node", snap=never_stop) exe = env.get_executable(node) result = ExecutionManager(exe).execute() assert result.get_output_block(env) is None
def test_non_terminating_pipe(): def never_stop(input: Optional[DataBlock] = None) -> DataFrame: pass env = make_test_env() g = Graph(env) rt = env.runtimes[0] ec = env.get_run_context(g, current_runtime=rt) node = g.create_node(key="node", pipe=never_stop) em = ExecutionManager(ec) output = em.execute(node, to_exhaustion=True) assert output is None
def test_non_terminating_function_with_reference_input(): def never_stop(input: Optional[Reference]) -> DataFrame: # Does not use input but doesn't matter cause reference pass env = make_test_env() g = Graph(env) source = g.create_node( function="core.import_dataframe", params={"dataframe": pd.DataFrame({"a": range(10)})}, ) node = g.create_node(key="node", function=never_stop, input=source) exe = env.get_executable(source) # TODO: reference inputs need to log too? (So they know when to update) # with env.md_api.begin(): # assert env.md_api.count(select(DataBlockLog)) == 1 result = ExecutionManager(exe).execute() exe = env.get_executable(node) result = ExecutionManager(exe).execute() assert result.get_output_block(env) is None
def test_exe(): env = make_test_env() g = Graph(env) node = g.create_node(key="node", snap=snap_t1_source) exe = env.get_executable(node) result = ExecutionManager(exe).execute() with env.md_api.begin(): assert not result.output_blocks assert env.md_api.count(select(SnapLog)) == 1 pl = env.md_api.execute(select(SnapLog)).scalar_one_or_none() assert pl.node_key == node.key assert pl.graph_id == g.get_metadata_obj().hash assert pl.node_start_state == {} assert pl.node_end_state == {} assert pl.snap_key == node.snap.key assert pl.snap_params == {}
def run(self, graph: Graph, target_storage: Storage = None, **kwargs) -> Iterator[ExecutionManager]: from snapflow.core.execution import ExecutionManager # self.session.begin_nested() ec = self.get_run_context(graph, target_storage=target_storage, **kwargs) em = ExecutionManager(ec) logger.debug(f"executing on graph {graph.adjacency_list()}") try: yield em # self.session.commit() logger.debug("COMMITTED") except Exception as e: # self.session.rollback() logger.debug("ROLLED") raise e finally: # TODO: # self.validate_and_clean_data_blocks(delete_intermediate=True) pass
def make_test_execution_manager(**kwargs) -> ExecutionManager: return ExecutionManager(make_test_run_context(**kwargs))