Example #1
0
def test_natural_schema_translation():
    # TODO
    ec = make_test_run_context()
    env = ec.env
    g = Graph(env)
    translation = {"f1": "mapped_f1"}
    n1 = g.create_node(
        key="node1",
        function=function_t1_to_t2,
        input="n0",
        schema_translation=translation,
    )
    pi = n1.get_interface()
    # im = NodeInterfaceManager(ctx=ec, node=n1)
    block = DataBlockMetadata(
        nominal_schema_key="_test.TestSchema1",
        realized_schema_key="_test.TestSchema1",
    )
    with env.md_api.begin():
        schema_translation = get_schema_translation(
            env,
            block.realized_schema(env),
            target_schema=env.get_schema(
                pi.get_single_non_recursive_input().schema_like),
            declared_schema_translation=translation,
        )
        assert schema_translation.as_dict() == translation
Example #2
0
def test_declared_schema_translation():
    ec = make_test_run_context()
    env = ec.env
    g = Graph(env)
    translation = {"f1": "mapped_f1"}
    n1 = g.create_node(
        key="node1",
        function=function_t1_to_t2,
        input="n0",
        schema_translation=translation,
    )
    pi = n1.get_interface()
    # im = NodeInterfaceManager(ctx=ec, node=n1)
    block = DataBlockMetadata(
        nominal_schema_key="_test.TestSchema1",
        realized_schema_key="_test.TestSchema1",
    )
    # stream = block_as_stream(block, ec, pi.inputs[0].schema(env), translation)
    # bi = im.get_bound_stream_interface({"input": stream})
    # assert len(bi.inputs) == 1
    # input: StreamInput = bi.inputs[0]
    with env.md_api.begin():
        schema_translation = get_schema_translation(
            env,
            block.realized_schema(env),
            target_schema=env.get_schema(
                pi.get_single_non_recursive_input().schema_like),
            declared_schema_translation=translation,
        )
        assert schema_translation.as_dict() == translation
Example #3
0
def test_inputs():
    ec = make_test_run_context()
    env = ec.env
    g = graph()
    n1 = g.create_node(function=function_t1_source)
    n2 = g.create_node(function=function_t1_to_t2, inputs={"input": n1})
    pi = n2.instantiate(env).get_interface()
    assert pi is not None
    n4 = g.create_node(function=function_multiple_input)
    n4.set_inputs({"input": n1})
    pi = n4.instantiate(env).get_interface()
    assert pi is not None

    # ec.graph = g.instantiate(env)
    n1 = n1.instantiate(env)
    n4 = n4.instantiate(env)
    with env.md_api.begin():
        exe = Executable(node=n1, function=n1.function, execution_context=ec)
        im = NodeInterfaceManager(exe)
        bi = im.get_bound_interface()
        assert bi is not None
        exe = Executable(node=n4, function=n4.function, execution_context=ec)
        im = NodeInterfaceManager(exe)
        db = DataBlockMetadata(
            nominal_schema_key="_test.TestSchema1",
            realized_schema_key="_test.TestSchema1",
        )
        env.md_api.add(db)
        bi = im.get_bound_interface(
            {"input": StreamBuilder().as_managed_stream(ec)})
        assert bi is not None
Example #4
0
def test_inputs():
    ec = make_test_run_context()
    env = ec.env
    g = graph()
    n1 = g.create_node(pipe=pipe_t1_source)
    n2 = g.create_node(pipe=pipe_t1_to_t2, upstream={"input": n1})
    pi = n2.instantiate(env).get_interface()
    assert pi is not None
    n4 = g.create_node(pipe=pipe_multiple_input)
    n4.set_upstream({"input": n1})
    pi = n4.instantiate(env).get_interface()
    assert pi is not None

    ec.graph = g.instantiate(env)
    with env.session_scope() as sess:
        im = NodeInterfaceManager(ctx=ec, sess=sess, node=n1.instantiate(env))
        bi = im.get_bound_interface()
        assert bi is not None
        im = NodeInterfaceManager(ctx=ec, sess=sess, node=n4.instantiate(env))
        db = DataBlockMetadata(
            nominal_schema_key="_test.TestSchema1",
            realized_schema_key="_test.TestSchema1",
        )
        sess.add(db)
        bi = im.get_bound_interface(
            {"input": StreamBuilder().as_managed_stream(ec, sess)}
        )
        assert bi is not None
def test_data_block_methods():
    env = make_test_env()
    db = DataBlockMetadata(
        id=get_datablock_id(),
        inferred_schema_key="_test.TestSchema1",
        nominal_schema_key="_test.TestSchema2",
        realized_schema_key="_test.TestSchema3",
    )
    strg = env.get_default_local_python_storage()
    records = [{"a": 1}]
    sdb = StoredDataBlockMetadata(
        id=get_stored_datablock_id(),
        data_block_id=db.id,
        data_block=db,
        storage_url=strg.url,
        data_format=RecordsFormat,
    )
    with env.md_api.begin():
        env.md_api.add(db)
        env.md_api.add(sdb)
        assert sdb.name is None
        name = sdb.get_name_for_storage()
        assert len(name) > 10
        assert sdb.name == name
        strg.get_api().put(sdb.get_name_for_storage(), records)
        assert db.inferred_schema(env) == TestSchema1
        assert db.nominal_schema(env) == TestSchema2
        assert db.realized_schema(env) == TestSchema3
        db.compute_record_count()
        assert db.record_count == 1
Example #6
0
 def setup(self):
     ctx = make_test_run_context()
     self.ctx = ctx
     self.env = ctx.env
     self.sess = self.env.md_api.begin()
     self.sess.__enter__()
     self.g = Graph(self.env)
     self.graph = self.g.get_metadata_obj()
     self.dr1t1 = DataBlockMetadata(
         nominal_schema_key="_test.TestSchema1",
         realized_schema_key="_test.TestSchema1",
     )
     self.dr2t1 = DataBlockMetadata(
         nominal_schema_key="_test.TestSchema1",
         realized_schema_key="_test.TestSchema1",
     )
     self.dr1t2 = DataBlockMetadata(
         nominal_schema_key="_test.TestSchema2",
         realized_schema_key="_test.TestSchema2",
     )
     self.dr2t2 = DataBlockMetadata(
         nominal_schema_key="_test.TestSchema2",
         realized_schema_key="_test.TestSchema2",
     )
     self.node_source = self.g.create_node(key="snap_source",
                                           snap=snap_t1_source)
     self.node1 = self.g.create_node(key="snap1",
                                     snap=snap_t1_sink,
                                     input="snap_source")
     self.node2 = self.g.create_node(key="snap2",
                                     snap=snap_t1_to_t2,
                                     input="snap_source")
     self.node3 = self.g.create_node(key="snap3",
                                     snap=snap_generic,
                                     input="snap_source")
     self.env.md_api.add(self.dr1t1)
     self.env.md_api.add(self.dr2t1)
     self.env.md_api.add(self.dr1t2)
     self.env.md_api.add(self.dr2t2)
     self.env.md_api.add(self.graph)
 def setup(self):
     ctx = make_test_run_context()
     self.ctx = ctx
     self.env = ctx.env
     self.g = Graph(self.env)
     self.graph = self.g.get_metadata_obj()
     self.dr1t1 = DataBlockMetadata(
         nominal_schema_key="_test.TestSchema1",
         realized_schema_key="_test.TestSchema1",
     )
     self.dr2t1 = DataBlockMetadata(
         nominal_schema_key="_test.TestSchema1",
         realized_schema_key="_test.TestSchema1",
     )
     self.dr1t2 = DataBlockMetadata(
         nominal_schema_key="_test.TestSchema2",
         realized_schema_key="_test.TestSchema2",
     )
     self.dr2t2 = DataBlockMetadata(
         nominal_schema_key="_test.TestSchema2",
         realized_schema_key="_test.TestSchema2",
     )
     self.node_source = self.g.create_node(key="pipe_source",
                                           pipe=pipe_t1_source)
     self.node1 = self.g.create_node(key="pipe1",
                                     pipe=pipe_t1_sink,
                                     upstream="pipe_source")
     self.node2 = self.g.create_node(key="pipe2",
                                     pipe=pipe_t1_to_t2,
                                     upstream="pipe_source")
     self.node3 = self.g.create_node(key="pipe3",
                                     pipe=pipe_generic,
                                     upstream="pipe_source")
     self.sess = self.env._get_new_metadata_session()
     self.sess.add(self.dr1t1)
     self.sess.add(self.dr2t1)
     self.sess.add(self.dr1t2)
     self.sess.add(self.dr2t2)
     self.sess.add(self.graph)
Example #8
0
def test_multi_env():
    db_url = get_tmp_sqlite_db_url()
    cfg = EnvironmentConfiguration(
        key=f"_test_{rand_str()}",
        metadata_storage_url=db_url,
        settings=SnapflowSettings(add_core_module=False),
    )
    env1 = Environment.from_config(cfg)
    with env1.md_api.begin():
        env1.md_api.add(DataBlockMetadata(realized_schema_key="Any"))
        env1.md_api.flush()
        assert env1.md_api.count(select(DataBlockMetadata)) == 1
    cfg = EnvironmentConfiguration(
        key=f"_test_{rand_str()}",
        metadata_storage_url=db_url,
        settings=SnapflowSettings(add_core_module=False),
    )
    env2 = Environment.from_config(cfg)
    with env2.md_api.begin():
        assert env2.md_api.count(select(DataBlockMetadata)) == 0
        env2.md_api.add(DataBlockMetadata(realized_schema_key="Any"))
        env2.md_api.flush()
        assert env2.md_api.count(select(DataBlockMetadata)) == 1
Example #9
0
def test_natural_schema_translation():
    # TODO
    ec = make_test_run_context()
    env = ec.env
    g = Graph(env)
    translation = {"f1": "mapped_f1"}
    n1 = g.create_node(
        key="node1", pipe=pipe_t1_to_t2, upstream="n0", schema_translation=translation
    )
    pi = n1.get_interface()
    # im = NodeInterfaceManager(ctx=ec, node=n1)
    block = DataBlockMetadata(
        nominal_schema_key="_test.TestSchema1",
        realized_schema_key="_test.TestSchema1",
    )
    with env.session_scope() as sess:
        schema_translation = get_schema_translation(
            env,
            sess,
            block.realized_schema(env, sess),
            target_schema=pi.inputs[0].schema(env, sess),
            declared_schema_translation=translation,
        )
        assert schema_translation.as_dict() == translation
Example #10
0
 def create_stored_datablock(self) -> StoredDataBlockMetadata:
     block = DataBlockMetadata(
         id=get_datablock_id(),
         inferred_schema_key=None,
         nominal_schema_key=None,
         realized_schema_key="Any",
         record_count=None,
         created_by_node_key=self.node.key,
     )
     sdb = StoredDataBlockMetadata(  # type: ignore
         id=get_stored_datablock_id(),
         data_block_id=block.id,
         data_block=block,
         storage_url=self.execution_context.target_storage.url,
         data_format=None,
     )
     return sdb
Example #11
0
def test_generic_schema_resolution():
    ec = make_test_run_context()
    env = ec.env
    g = Graph(env)
    n1 = g.create_node(key="node1", pipe=pipe_generic, upstream="n0")
    # pi = n1.get_interface()
    with env.session_scope() as sess:
        im = NodeInterfaceManager(ctx=ec, sess=sess, node=n1)
        block = DataBlockMetadata(
            nominal_schema_key="_test.TestSchema1",
            realized_schema_key="_test.TestSchema2",
        )
        sess.add(block)
        sess.flush([block])
        stream = block_as_stream(block, ec, sess)
        bi = im.get_bound_interface({"input": stream})
        assert len(bi.inputs) == 1
        assert bi.resolve_nominal_output_schema(env, sess) is TestSchema1
Example #12
0
def test_generic_schema_resolution():
    ec = make_test_run_context()
    env = ec.env
    g = Graph(env)
    n1 = g.create_node(key="node1", function=function_generic, input="n0")
    # pi = n1.get_interface()
    with env.md_api.begin():
        exe = Executable(node=n1, function=n1.function, execution_context=ec)
        im = NodeInterfaceManager(exe)
        block = DataBlockMetadata(
            nominal_schema_key="_test.TestSchema1",
            realized_schema_key="_test.TestSchema2",
        )
        env.md_api.add(block)
        env.md_api.flush([block])
        stream = block_as_stream(block, ec)
        bi = im.get_bound_interface({"input": stream})
        assert len(bi.inputs) == 1
        assert bi.resolve_nominal_output_schema(env) is TestSchema1