def test_natural_schema_translation(): # TODO ec = make_test_run_context() env = ec.env g = Graph(env) translation = {"f1": "mapped_f1"} n1 = g.create_node( key="node1", function=function_t1_to_t2, input="n0", schema_translation=translation, ) pi = n1.get_interface() # im = NodeInterfaceManager(ctx=ec, node=n1) block = DataBlockMetadata( nominal_schema_key="_test.TestSchema1", realized_schema_key="_test.TestSchema1", ) with env.md_api.begin(): schema_translation = get_schema_translation( env, block.realized_schema(env), target_schema=env.get_schema( pi.get_single_non_recursive_input().schema_like), declared_schema_translation=translation, ) assert schema_translation.as_dict() == translation
def test_declared_schema_translation(): ec = make_test_run_context() env = ec.env g = Graph(env) translation = {"f1": "mapped_f1"} n1 = g.create_node( key="node1", function=function_t1_to_t2, input="n0", schema_translation=translation, ) pi = n1.get_interface() # im = NodeInterfaceManager(ctx=ec, node=n1) block = DataBlockMetadata( nominal_schema_key="_test.TestSchema1", realized_schema_key="_test.TestSchema1", ) # stream = block_as_stream(block, ec, pi.inputs[0].schema(env), translation) # bi = im.get_bound_stream_interface({"input": stream}) # assert len(bi.inputs) == 1 # input: StreamInput = bi.inputs[0] with env.md_api.begin(): schema_translation = get_schema_translation( env, block.realized_schema(env), target_schema=env.get_schema( pi.get_single_non_recursive_input().schema_like), declared_schema_translation=translation, ) assert schema_translation.as_dict() == translation
def test_inputs(): ec = make_test_run_context() env = ec.env g = graph() n1 = g.create_node(function=function_t1_source) n2 = g.create_node(function=function_t1_to_t2, inputs={"input": n1}) pi = n2.instantiate(env).get_interface() assert pi is not None n4 = g.create_node(function=function_multiple_input) n4.set_inputs({"input": n1}) pi = n4.instantiate(env).get_interface() assert pi is not None # ec.graph = g.instantiate(env) n1 = n1.instantiate(env) n4 = n4.instantiate(env) with env.md_api.begin(): exe = Executable(node=n1, function=n1.function, execution_context=ec) im = NodeInterfaceManager(exe) bi = im.get_bound_interface() assert bi is not None exe = Executable(node=n4, function=n4.function, execution_context=ec) im = NodeInterfaceManager(exe) db = DataBlockMetadata( nominal_schema_key="_test.TestSchema1", realized_schema_key="_test.TestSchema1", ) env.md_api.add(db) bi = im.get_bound_interface( {"input": StreamBuilder().as_managed_stream(ec)}) assert bi is not None
def test_inputs(): ec = make_test_run_context() env = ec.env g = graph() n1 = g.create_node(pipe=pipe_t1_source) n2 = g.create_node(pipe=pipe_t1_to_t2, upstream={"input": n1}) pi = n2.instantiate(env).get_interface() assert pi is not None n4 = g.create_node(pipe=pipe_multiple_input) n4.set_upstream({"input": n1}) pi = n4.instantiate(env).get_interface() assert pi is not None ec.graph = g.instantiate(env) with env.session_scope() as sess: im = NodeInterfaceManager(ctx=ec, sess=sess, node=n1.instantiate(env)) bi = im.get_bound_interface() assert bi is not None im = NodeInterfaceManager(ctx=ec, sess=sess, node=n4.instantiate(env)) db = DataBlockMetadata( nominal_schema_key="_test.TestSchema1", realized_schema_key="_test.TestSchema1", ) sess.add(db) bi = im.get_bound_interface( {"input": StreamBuilder().as_managed_stream(ec, sess)} ) assert bi is not None
def test_data_block_methods(): env = make_test_env() db = DataBlockMetadata( id=get_datablock_id(), inferred_schema_key="_test.TestSchema1", nominal_schema_key="_test.TestSchema2", realized_schema_key="_test.TestSchema3", ) strg = env.get_default_local_python_storage() records = [{"a": 1}] sdb = StoredDataBlockMetadata( id=get_stored_datablock_id(), data_block_id=db.id, data_block=db, storage_url=strg.url, data_format=RecordsFormat, ) with env.md_api.begin(): env.md_api.add(db) env.md_api.add(sdb) assert sdb.name is None name = sdb.get_name_for_storage() assert len(name) > 10 assert sdb.name == name strg.get_api().put(sdb.get_name_for_storage(), records) assert db.inferred_schema(env) == TestSchema1 assert db.nominal_schema(env) == TestSchema2 assert db.realized_schema(env) == TestSchema3 db.compute_record_count() assert db.record_count == 1
def setup(self): ctx = make_test_run_context() self.ctx = ctx self.env = ctx.env self.sess = self.env.md_api.begin() self.sess.__enter__() self.g = Graph(self.env) self.graph = self.g.get_metadata_obj() self.dr1t1 = DataBlockMetadata( nominal_schema_key="_test.TestSchema1", realized_schema_key="_test.TestSchema1", ) self.dr2t1 = DataBlockMetadata( nominal_schema_key="_test.TestSchema1", realized_schema_key="_test.TestSchema1", ) self.dr1t2 = DataBlockMetadata( nominal_schema_key="_test.TestSchema2", realized_schema_key="_test.TestSchema2", ) self.dr2t2 = DataBlockMetadata( nominal_schema_key="_test.TestSchema2", realized_schema_key="_test.TestSchema2", ) self.node_source = self.g.create_node(key="snap_source", snap=snap_t1_source) self.node1 = self.g.create_node(key="snap1", snap=snap_t1_sink, input="snap_source") self.node2 = self.g.create_node(key="snap2", snap=snap_t1_to_t2, input="snap_source") self.node3 = self.g.create_node(key="snap3", snap=snap_generic, input="snap_source") self.env.md_api.add(self.dr1t1) self.env.md_api.add(self.dr2t1) self.env.md_api.add(self.dr1t2) self.env.md_api.add(self.dr2t2) self.env.md_api.add(self.graph)
def setup(self): ctx = make_test_run_context() self.ctx = ctx self.env = ctx.env self.g = Graph(self.env) self.graph = self.g.get_metadata_obj() self.dr1t1 = DataBlockMetadata( nominal_schema_key="_test.TestSchema1", realized_schema_key="_test.TestSchema1", ) self.dr2t1 = DataBlockMetadata( nominal_schema_key="_test.TestSchema1", realized_schema_key="_test.TestSchema1", ) self.dr1t2 = DataBlockMetadata( nominal_schema_key="_test.TestSchema2", realized_schema_key="_test.TestSchema2", ) self.dr2t2 = DataBlockMetadata( nominal_schema_key="_test.TestSchema2", realized_schema_key="_test.TestSchema2", ) self.node_source = self.g.create_node(key="pipe_source", pipe=pipe_t1_source) self.node1 = self.g.create_node(key="pipe1", pipe=pipe_t1_sink, upstream="pipe_source") self.node2 = self.g.create_node(key="pipe2", pipe=pipe_t1_to_t2, upstream="pipe_source") self.node3 = self.g.create_node(key="pipe3", pipe=pipe_generic, upstream="pipe_source") self.sess = self.env._get_new_metadata_session() self.sess.add(self.dr1t1) self.sess.add(self.dr2t1) self.sess.add(self.dr1t2) self.sess.add(self.dr2t2) self.sess.add(self.graph)
def test_multi_env(): db_url = get_tmp_sqlite_db_url() cfg = EnvironmentConfiguration( key=f"_test_{rand_str()}", metadata_storage_url=db_url, settings=SnapflowSettings(add_core_module=False), ) env1 = Environment.from_config(cfg) with env1.md_api.begin(): env1.md_api.add(DataBlockMetadata(realized_schema_key="Any")) env1.md_api.flush() assert env1.md_api.count(select(DataBlockMetadata)) == 1 cfg = EnvironmentConfiguration( key=f"_test_{rand_str()}", metadata_storage_url=db_url, settings=SnapflowSettings(add_core_module=False), ) env2 = Environment.from_config(cfg) with env2.md_api.begin(): assert env2.md_api.count(select(DataBlockMetadata)) == 0 env2.md_api.add(DataBlockMetadata(realized_schema_key="Any")) env2.md_api.flush() assert env2.md_api.count(select(DataBlockMetadata)) == 1
def test_natural_schema_translation(): # TODO ec = make_test_run_context() env = ec.env g = Graph(env) translation = {"f1": "mapped_f1"} n1 = g.create_node( key="node1", pipe=pipe_t1_to_t2, upstream="n0", schema_translation=translation ) pi = n1.get_interface() # im = NodeInterfaceManager(ctx=ec, node=n1) block = DataBlockMetadata( nominal_schema_key="_test.TestSchema1", realized_schema_key="_test.TestSchema1", ) with env.session_scope() as sess: schema_translation = get_schema_translation( env, sess, block.realized_schema(env, sess), target_schema=pi.inputs[0].schema(env, sess), declared_schema_translation=translation, ) assert schema_translation.as_dict() == translation
def create_stored_datablock(self) -> StoredDataBlockMetadata: block = DataBlockMetadata( id=get_datablock_id(), inferred_schema_key=None, nominal_schema_key=None, realized_schema_key="Any", record_count=None, created_by_node_key=self.node.key, ) sdb = StoredDataBlockMetadata( # type: ignore id=get_stored_datablock_id(), data_block_id=block.id, data_block=block, storage_url=self.execution_context.target_storage.url, data_format=None, ) return sdb
def test_generic_schema_resolution(): ec = make_test_run_context() env = ec.env g = Graph(env) n1 = g.create_node(key="node1", pipe=pipe_generic, upstream="n0") # pi = n1.get_interface() with env.session_scope() as sess: im = NodeInterfaceManager(ctx=ec, sess=sess, node=n1) block = DataBlockMetadata( nominal_schema_key="_test.TestSchema1", realized_schema_key="_test.TestSchema2", ) sess.add(block) sess.flush([block]) stream = block_as_stream(block, ec, sess) bi = im.get_bound_interface({"input": stream}) assert len(bi.inputs) == 1 assert bi.resolve_nominal_output_schema(env, sess) is TestSchema1
def test_generic_schema_resolution(): ec = make_test_run_context() env = ec.env g = Graph(env) n1 = g.create_node(key="node1", function=function_generic, input="n0") # pi = n1.get_interface() with env.md_api.begin(): exe = Executable(node=n1, function=n1.function, execution_context=ec) im = NodeInterfaceManager(exe) block = DataBlockMetadata( nominal_schema_key="_test.TestSchema1", realized_schema_key="_test.TestSchema2", ) env.md_api.add(block) env.md_api.flush([block]) stream = block_as_stream(block, ec) bi = im.get_bound_interface({"input": stream}) assert len(bi.inputs) == 1 assert bi.resolve_nominal_output_schema(env) is TestSchema1