Esempio n. 1
0
def test_exe_output():
    env = make_test_env()
    env.add_module(core)
    g = Graph(env)
    # env.add_storage("python://test")
    # rt = env.runtimes[0]
    # TODO: this is error because no data copy between SAME storage engines (but DIFFERENT storage urls) currently
    # ec = env.get_run_context(g, current_runtime=rt, target_storage=env.storages[0])
    # ec = env.get_run_context(g, current_runtime=rt, target_storage=rt.as_storage())
    output_alias = "node_output"
    node = g.create_node(key="node",
                         snap=snap_dl_source,
                         output_alias=output_alias)
    exe = env.get_executable(node)
    result = ExecutionManager(exe).execute()
    with env.md_api.begin():
        block = result.get_output_block(env)
        assert block is not None
        assert block.as_records() == mock_dl_output
        assert block.nominal_schema is TestSchema4
        assert len(block.realized_schema.fields) == len(TestSchema4.fields)
        # Test alias was created correctly
        assert (env.md_api.execute(
            select(Alias).filter(Alias.alias == output_alias)).
                scalar_one_or_none().data_block_id == block.data_block_id)
        assert env.md_api.count(select(DataBlockLog)) == 1
        dbl = env.md_api.execute(select(DataBlockLog)).scalar_one_or_none()
        assert dbl.data_block_id == block.data_block_id
        assert dbl.direction == Direction.OUTPUT
Esempio n. 2
0
def test_data_block_methods():
    env = make_test_env()
    db = DataBlockMetadata(
        id=get_datablock_id(),
        inferred_schema_key="_test.TestSchema1",
        nominal_schema_key="_test.TestSchema2",
        realized_schema_key="_test.TestSchema3",
    )
    strg = env.get_default_local_python_storage()
    records = [{"a": 1}]
    sdb = StoredDataBlockMetadata(
        id=get_stored_datablock_id(),
        data_block_id=db.id,
        data_block=db,
        storage_url=strg.url,
        data_format=RecordsFormat,
    )
    with env.md_api.begin():
        env.md_api.add(db)
        env.md_api.add(sdb)
        assert sdb.name is None
        name = sdb.get_name_for_storage()
        assert len(name) > 10
        assert sdb.name == name
        strg.get_api().put(sdb.get_name_for_storage(), records)
        assert db.inferred_schema(env) == TestSchema1
        assert db.nominal_schema(env) == TestSchema2
        assert db.realized_schema(env) == TestSchema3
        db.compute_record_count()
        assert db.record_count == 1
Esempio n. 3
0
def test_worker_output():
    env = make_test_env()
    env.add_module(core)
    g = Graph(env)
    # env.add_storage("python://test")
    with env.session_scope() as sess:
        rt = env.runtimes[0]
        # TODO: this is error because no data copy between SAME storage engines (but DIFFERENT storage urls) currently
        # ec = env.get_run_context(g, current_runtime=rt, target_storage=env.storages[0])
        ec = env.get_run_context(g, current_runtime=rt, target_storage=rt.as_storage())
        output_alias = "node_output"
        node = g.create_node(key="node", pipe=pipe_dl_source, output_alias=output_alias)
        w = Worker(ec)
        dfi_mgr = NodeInterfaceManager(ec, sess, node)
        bdfi = dfi_mgr.get_bound_interface()
        r = Executable(
            node.key,
            CompiledPipe(node.pipe.key, node.pipe),
            bdfi,
        )
        run_result = w.execute(r)
        outputblock = run_result.output_block
        assert outputblock is not None
        outputblock = sess.merge(outputblock)
        block = outputblock.as_managed_data_block(ec, sess)
        assert block.as_records() == mock_dl_output
        assert block.nominal_schema is TestSchema4
        assert len(block.realized_schema.fields) == len(TestSchema4.fields)
        # Test alias was created correctly
        assert (
            sess.query(Alias).filter(Alias.alias == output_alias).first().data_block_id
            == block.data_block_id
        )
Esempio n. 4
0
def test_node_no_inputs():
    env = make_test_env()
    g = Graph(env)
    df = pipe(pipe_t1_source)
    node1 = g.create_node(key="node1", pipe=df)
    assert {node1: node1}[node1] is node1  # Test hash
    pi = node1.get_interface()
    assert pi.inputs == []
    assert pi.output is not None
    assert node1.declared_inputs == {}
Esempio n. 5
0
def test_non_terminating_snap():
    def never_stop(input: Optional[DataBlock] = None) -> DataFrame:
        pass

    env = make_test_env()
    g = Graph(env)
    node = g.create_node(key="node", snap=never_stop)
    exe = env.get_executable(node)
    result = ExecutionManager(exe).execute()
    assert result.get_output_block(env) is None
Esempio n. 6
0
def test_node_no_inputs():
    env = make_test_env()
    g = Graph(env)
    df = datafunction(function_t1_source)
    node1 = g.create_node(key="node1", function=df)
    assert {node1: node1}[node1] is node1  # Test hash
    pi = node1.get_interface()
    assert pi.inputs == {}
    assert pi.outputs != {}
    assert node1.declared_inputs == {}
Esempio n. 7
0
def test_node_inputs():
    env = make_test_env()
    g = Graph(env)
    df = pipe(pipe_t1_source)
    node = g.create_node(key="node", pipe=df)
    df = pipe(pipe_t1_sink)
    node1 = g.create_node(key="node1", pipe=df, upstream=node)
    pi = node1.get_interface()
    assert len(pi.inputs) == 1
    assert pi.output == make_default_output_annotation()
    assert list(node1.declared_inputs.keys()) == ["input"]
Esempio n. 8
0
def test_node_inputs():
    env = make_test_env()
    g = Graph(env)
    df = Snap(snap_t1_source)
    node = g.create_node(key="node", snap=df)
    df = Snap(snap_t1_sink)
    node1 = g.create_node(key="node1", snap=df, input=node)
    pi = node1.get_interface()
    assert len(pi.inputs) == 1
    assert pi.output == make_default_output()
    assert list(node1.declared_inputs.keys()) == ["input"]
Esempio n. 9
0
def test_node_inputs():
    env = make_test_env()
    g = Graph(env)
    df = datafunction(function_t1_source)
    node = g.create_node(key="node", function=df)
    df = datafunction(function_t1_sink)
    node1 = g.create_node(key="node1", function=df, input=node)
    pi = node1.get_interface()
    assert len(pi.inputs) == 1
    assert pi.outputs == DEFAULT_OUTPUTS
    assert list(node1.declared_inputs.keys()) == ["input"]
Esempio n. 10
0
def test_node_params():
    env = make_test_env()
    g = Graph(env)
    param_vals = []

    def function_ctx(ctx: DataFunctionContext, test: str):
        param_vals.append(test)

    n = g.create_node(key="ctx", function=function_ctx, params={"test": 1})
    env.run_node(n, g)
    assert param_vals == [1]
Esempio n. 11
0
def test_any_schema_interface():
    env = make_test_env()
    env.add_module(core)

    def pipe_any(input: DataBlock) -> DataFrame:
        pass

    df = pipe(pipe_any)
    pi = df.get_interface()
    assert pi.inputs[0].schema_like == "Any"
    assert pi.output.schema_like == "Any"
Esempio n. 12
0
def test_any_schema_interface():
    env = make_test_env()
    env.add_module(core)

    def function_any(input: DataBlock) -> DataFrame:
        pass

    df = datafunction(function_any)
    pi = df.get_interface()
    assert pi.get_single_non_recursive_input().schema_like == "Any"
    assert pi.get_default_output().schema_like == "Any"
Esempio n. 13
0
def test_node_config():
    env = make_test_env()
    g = Graph(env)
    config_vals = []

    def pipe_ctx(ctx: PipeContext):
        config_vals.append(ctx.get_config_value("test"))

    n = g.create_node(key="ctx", pipe=pipe_ctx, config={"test": 1, "extra_arg": 2})
    with env.run(g) as exe:
        exe.execute(n)
    assert config_vals == [1]
Esempio n. 14
0
def test_non_terminating_pipe():
    def never_stop(input: Optional[DataBlock] = None) -> DataFrame:
        pass

    env = make_test_env()
    g = Graph(env)
    rt = env.runtimes[0]
    ec = env.get_run_context(g, current_runtime=rt)
    node = g.create_node(key="node", pipe=never_stop)
    em = ExecutionManager(ec)
    output = em.execute(node, to_exhaustion=True)
    assert output is None
Esempio n. 15
0
def test_pipe_interface(pipe: PipeLike, expected: PipeInterface):
    env = make_test_env()
    if isinstance(pipe, Pipe):
        val = pipe.get_interface()
    elif isinstance(pipe, Callable):
        val = PipeInterface.from_pipe_definition(pipe)
    else:
        raise
    assert val == expected
    node = DeclaredNode(key="_test", pipe=pipe, upstream={"input": "mock"}).instantiate(
        env
    )
    assert node.get_interface() == expected
Esempio n. 16
0
def test_generated_schema():
    new_schema = infer_schema_from_records(sample_records)
    got = GeneratedSchema(key=new_schema.key, definition=asdict(new_schema))
    env = make_test_env()
    with env.session_scope() as sess:
        sess.add(got)
        got = (sess.query(GeneratedSchema).filter(
            GeneratedSchema.key == new_schema.key).first())
        got_schema = got.as_schema()
        assert asdict(got_schema) == asdict(new_schema)
        assert env.get_generated_schema(new_schema.key,
                                        sess).key == new_schema.key
        assert env.get_generated_schema("pizza", sess) is None
Esempio n. 17
0
def test_exe():
    env = make_test_env()
    g = Graph(env)
    node = g.create_node(key="node", snap=snap_t1_source)
    exe = env.get_executable(node)
    result = ExecutionManager(exe).execute()
    with env.md_api.begin():
        assert not result.output_blocks
        assert env.md_api.count(select(SnapLog)) == 1
        pl = env.md_api.execute(select(SnapLog)).scalar_one_or_none()
        assert pl.node_key == node.key
        assert pl.graph_id == g.get_metadata_obj().hash
        assert pl.node_start_state == {}
        assert pl.node_end_state == {}
        assert pl.snap_key == node.snap.key
        assert pl.snap_params == {}
Esempio n. 18
0
def test_node_params():
    env = make_test_env()
    g = Graph(env)
    param_vals = []

    @Param("test", "str")
    def snap_ctx(ctx: SnapContext):
        param_vals.append(ctx.get_param("test"))

    n = g.create_node(key="ctx",
                      snap=snap_ctx,
                      params={
                          "test": 1,
                          "extra_arg": 2
                      })
    env.run_node(n, g)
    assert param_vals == [1]
Esempio n. 19
0
def test_cast_to_schema(cast_level, inferred, nominal, expected):
    inferred = create_quick_schema("Inf", fields=inferred)
    nominal = create_quick_schema("Nom", fields=nominal)
    if expected not in (ERROR, WARN):
        expected = create_quick_schema("Exp", fields=expected)
    env = make_test_env()
    with env.md_api.begin():
        if expected == ERROR:
            with pytest.raises(SchemaTypeError):
                s = cast_to_realized_schema(env, inferred, nominal, cast_level)
        elif expected == WARN:
            with pytest.warns(UserWarning):
                s = cast_to_realized_schema(env, inferred, nominal, cast_level)
        else:
            s = cast_to_realized_schema(env, inferred, nominal, cast_level)
            for f in s.fields:
                e = expected.get_field(f.name)
                assert f == e
Esempio n. 20
0
def test_worker():
    env = make_test_env()
    g = Graph(env)
    rt = env.runtimes[0]
    ec = env.get_run_context(g, current_runtime=rt)
    with env.session_scope() as sess:
        node = g.create_node(key="node", pipe=pipe_t1_source)
        w = Worker(ec)
        dfi_mgr = NodeInterfaceManager(ec, sess, node)
        bdfi = dfi_mgr.get_bound_interface()
        r = Executable(
            node.key,
            CompiledPipe(node.pipe.key, node.pipe),
            bdfi,
        )
        run_result = w.execute(r)
        output = run_result.output_block
        assert output is None
Esempio n. 21
0
def make_graph() -> Graph:
    env = make_test_env()
    env.add_module(core)
    g = Graph(env)
    g.create_node(key="node1", function=function_t1_source)
    g.node(key="node2", function=function_t1_source)
    g.node(key="node3", function=function_t1_to_t2, input="node1")
    g.node(key="node4", function=function_t1_to_t2, input="node2")
    g.node(key="node5", function=function_generic, input="node4")
    g.node(key="node6", function=function_self, input="node4")
    g.node(
        key="node7",
        function=function_multiple_input,
        inputs={
            "input": "node4",
            "other_t2": "node3"
        },
    )
    return g
Esempio n. 22
0
def test_non_terminating_function_with_reference_input():
    def never_stop(input: Optional[Reference]) -> DataFrame:
        # Does not use input but doesn't matter cause reference
        pass

    env = make_test_env()
    g = Graph(env)
    source = g.create_node(
        function="core.import_dataframe",
        params={"dataframe": pd.DataFrame({"a": range(10)})},
    )
    node = g.create_node(key="node", function=never_stop, input=source)
    exe = env.get_executable(source)
    # TODO: reference inputs need to log too? (So they know when to update)
    # with env.md_api.begin():
    #     assert env.md_api.count(select(DataBlockLog)) == 1
    result = ExecutionManager(exe).execute()
    exe = env.get_executable(node)
    result = ExecutionManager(exe).execute()
    assert result.get_output_block(env) is None
Esempio n. 23
0
def test_schema_translation():
    env = make_test_env()
    t_base = create_quick_schema("t_base",
                                 fields=[("f1", "Unicode"), ("f2", "Integer")])
    t_impl = create_quick_schema(
        "t_impl",
        fields=[("g1", "Unicode"), ("g2", "Integer")],
        implementations=[Implementation("t_base", {
            "f1": "g1",
            "f2": "g2"
        })],
    )
    env.add_schema(t_base)
    env.add_schema(t_impl)
    with env.session_scope() as sess:
        trans = get_schema_translation(env,
                                       sess,
                                       source_schema=t_impl,
                                       target_schema=t_base)
        assert trans.translation == {"g1": "f1", "g2": "f2"}
Esempio n. 24
0
def test_any_schema():
    env = make_test_env()
    env.add_module(core)
    with env.session_scope() as sess:
        anyschema = env.get_schema("Any", sess)
    assert anyschema.fields == []