def test_node_with_query_file():

    query_str = "SELECT {value}"
    query_params = {"value": 1}

    test_file = "bigquery_node_test.sql"

    with open(test_file, "w") as f:
        f.write(query_str)

    with FeatureDAG():
        a = BigQueryNode(name="query a",
                         query_file=test_file,
                         project="my-project")

    assert a._query == query_str

    with FeatureDAG():
        a = BigQueryNode(
            name="query a",
            query_file=test_file,
            query_params=query_params,
            project="my-project",
        )

    assert a._query == query_str.format(**query_params)

    os.remove(test_file)
Exemple #2
0
def test_same_node_cant_be_added_to_dag_twice():

    with FeatureDAG() as dag:
        a = FeatureNode(name="query")

    with pytest.raises(ValueError):
        dag.add_node(a)
def test_node_with_non_existent_file():

    with FeatureDAG():
        with pytest.raises(FileNotFoundError):
            _ = BigQueryNode(name="query a",
                             query_file="does_not_exist.sql",
                             project="my-project")
def test_node_project_specified_in_dag():

    project = "my-project"
    with FeatureDAG(dag_params={"project": project}):
        a = BigQueryNode(name="query a", query="SELECT 1")

    assert a.project == project
Exemple #5
0
def test_assign_node_to_self_fail():

    with FeatureDAG():

        a = FeatureNode(name="query a")

        with pytest.raises(ValueError):
            a >> a
def test_calc_cache_without_input_tables():

    with FeatureDAG(dag_params={"project": "my-project"}):
        a = BigQueryNode(name="query a", query="SELECT 1")
        b = BigQueryNode(name="query b", query="SELECT 1")
        c = BigQueryNode(name="query c", query="SELECT 2")

    assert a._calc_current_cache_tag() == b._calc_current_cache_tag()
    assert a._calc_current_cache_tag() != c._calc_current_cache_tag()
Exemple #7
0
def test_node_cant_be_assigned_twice():

    with FeatureDAG():
        a = FeatureNode(name="query a")
        b = FeatureNode(name="query b")

        a >> b
        with pytest.raises(ValueError):
            a >> b
Exemple #8
0
def test_simple_dag():

    with FeatureDAG():
        a = FeatureNode(name="query a")
        b = FeatureNode(name="query b")

        a >> b

    assert b in a.children
    assert a in b.parents
def test_node_with_both_query_and_file():

    with FeatureDAG():
        with pytest.raises(ValueError):
            _ = BigQueryNode(
                name="query a",
                query="test",
                query_file="test.sql",
                project="my-project",
            )
def test_node_with_query_str():

    query_str = "SELECT {value}"
    query_params = {"value": 1}

    with FeatureDAG():
        a = BigQueryNode(name="query a", query=query_str, project="my-project")

    assert a._query == query_str

    with FeatureDAG():
        a = BigQueryNode(
            name="query a",
            query=query_str,
            query_params=query_params,
            project="my-project",
        )

    assert a._query == query_str.format(**query_params)
Exemple #11
0
def test_assign_child_node_fail():

    with FeatureDAG():
        a = FeatureNode(name="query a")
        b = FeatureNode(name="query b")
        c = FeatureNode(name="query c")

        a >> b
        with pytest.raises(ValueError):
            # Only occurs when rrshift is used, which only happens when a list of nodes
            # assigns another node as a parent
            [c, b] >> a
Exemple #12
0
def test_bracket_middle_dag():

    with FeatureDAG():
        a = FeatureNode(name="query a")
        b = FeatureNode(name="query b")
        c = FeatureNode(name="query c")
        d = FeatureNode(name="query d")

        a >> [b, c] >> d

    assert b in a.children
    assert c in a.children
    assert a in b.parents
    assert a in c.parents
    assert d in b.children
    assert d in c.children
    assert b in d.parents
    assert c in d.parents

    del a, b, c, d

    with FeatureDAG():
        a = FeatureNode(name="query a")
        b = FeatureNode(name="query b")
        c = FeatureNode(name="query c")
        d = FeatureNode(name="query d")

        a >> b
        b >> c
        [c, a] >> d

    assert b in a.children
    assert c in b.children
    assert a in b.parents
    assert b in c.parents
    assert d in a.children
    assert d in c.children
    assert a in d.parents
    assert c in d.parents
Exemple #13
0
def test_bracket_children_dag():

    with FeatureDAG():
        a = FeatureNode(name="query a")
        b = FeatureNode(name="query b")
        c = FeatureNode(name="query c")

        a >> [b, c]

    assert b in a.children
    assert c in a.children
    assert a in b.parents
    assert a in c.parents
Exemple #14
0
def test_double_arrow_dag():

    with FeatureDAG():
        a = FeatureNode(name="query a")
        b = FeatureNode(name="query b")
        c = FeatureNode(name="query c")

        a >> b >> c

    assert b in a.children
    assert c in b.children
    assert a in b.parents
    assert b in c.parents
Exemple #15
0
def test_bracket_parent_dag():

    with FeatureDAG():
        a = FeatureNode(name="query a")
        b = FeatureNode(name="query b")
        c = FeatureNode(name="query c")

        [a, b] >> c

    assert c in a.children
    assert c in b.children
    assert a in c.parents
    assert b in c.parents
Exemple #16
0
def test_assign_parent_to_node_fail():

    with FeatureDAG():
        a = FeatureNode(name="query a")
        b = FeatureNode(name="query b")

        a >> b
        with pytest.raises(ValueError):
            b >> a

        c = FeatureNode(name="query c")
        b >> c
        with pytest.raises(ValueError):
            c >> a
Exemple #17
0
def test_compact_state():

    with FeatureDAG() as dag:
        a = FeatureNode(name="query a")

    dag.run_feature_graph()

    dag._nodes.remove(a)

    assert a.node_id in dag._state_dict.keys()

    dag.compact_state()

    assert a.node_id not in dag._state_dict.keys()
Exemple #18
0
def test_clear_state():

    with FeatureDAG() as dag:
        a = FeatureNode(name="query a")

    assert a._get_state_cache_tag is None

    dag.run_feature_graph()

    assert a._get_state_cache_tag is not None

    a.clear_state()

    assert a._get_state_cache_tag is None
Exemple #19
0
def test_state_stored_with_file():

    state_db = "./dag_state.sqlite"
    if os.path.exists(state_db):
        os.remove(state_db)

    with FeatureDAG(state_db=state_db) as dag:
        a = FeatureNode(name="query a")

    assert a.is_node_stale is True

    dag.run_feature_graph()

    assert a.is_node_stale is False

    del a, dag

    with FeatureDAG(state_db=state_db):
        a = FeatureNode(name="query a")

    assert a.is_node_stale is False

    os.remove(state_db)
Exemple #20
0
def test_state_stored():

    with FeatureDAG() as dag:
        a = FeatureNode(name="query a")

    assert a.is_node_stale is True

    # Mock the run function to ensure it's called
    a.run = Mock()

    dag.run_feature_graph()

    a.run.assert_called_once()

    assert a.is_node_stale is False

    # Re-run DAG and ensure node is no re-run
    a.run.reset_mock()
    dag.run_feature_graph()
    a.run.assert_not_called()
def test_node_project_not_specified():

    with FeatureDAG():
        with pytest.raises(LookupError):
            _ = BigQueryNode(name="query a", query="SELECT 1")
Exemple #22
0
def test_two_nodes_cant_have_same_name():

    with FeatureDAG():
        _ = FeatureNode(name="query")
        with pytest.raises(ValueError):
            _ = FeatureNode(name="query")
def test_node_with_neither_query_or_file():

    with FeatureDAG():
        with pytest.raises(ValueError):
            _ = BigQueryNode(name="query a", project="my-project")
def test_calc_cache_with_input_tables():

    client = bigquery.Client()

    mod_timestamp = datetime.now()

    with FeatureDAG(dag_params={"project": "my-project"}):

        client.get_table = MagicMock(
            project="my-project",
            dataset_id="my_fake_dataset",
            table_id="my_fake_table",
            modified=mod_timestamp,
        )
        a = BigQueryNode(
            name="query a",
            query="SELECT 1",
            input_tables="my_fake_dataset.my_fake_table",
            client=client,
        )
        node_a_catch_tag = a._calc_current_cache_tag()
        b = BigQueryNode(
            name="query b",
            query="SELECT 1",
            input_tables="my_fake_dataset.my_fake_table",
            client=client,
        )
        node_b_catch_tag = b._calc_current_cache_tag()

        client.get_table = MagicMock(
            project="my-project",
            dataset_id="my_fake_dataset",
            table_id="my_other_fake_table",
            modified=mod_timestamp,
        )
        c = BigQueryNode(
            name="query c",
            query="SELECT 2",
            input_tables="my_fake_dataset.my_other_fake_table",
            client=client,
        )

    assert node_a_catch_tag == node_b_catch_tag
    assert node_a_catch_tag != c._calc_current_cache_tag()

    del a
    del b

    # Also test date changed
    with FeatureDAG(dag_params={"project": "my-project"}):

        client.get_table = MagicMock(
            project="my-project",
            dataset_id="my_fake_dataset",
            table_id="my_fake_table",
            modified=mod_timestamp,
        )
        a = BigQueryNode(
            name="query a",
            query="SELECT 1",
            input_tables="my_fake_dataset.my_fake_table",
            client=client,
        )
        node_a_catch_tag = a._calc_current_cache_tag()

        client.get_table = MagicMock(
            project="my-project",
            dataset_id="my_fake_dataset",
            table_id="my_fake_table",
            modified=datetime.now(),
        )
        b = BigQueryNode(
            name="query b",
            query="SELECT 1",
            input_tables="my_fake_dataset.my_fake_table",
            client=client,
        )

    assert node_a_catch_tag != b._calc_current_cache_tag()