Esempio n. 1
0
def test_get_node_when_node_is_disabled(raw_manifest):
    manifest = Manifest(raw_manifest)

    node = manifest.get_node("x")

    assert node.id == "x"
    assert isinstance(node, Node)
def manifest() -> Manifest:
    return Manifest({
        "nodes": {
            "a": {
                "unique_id": "a",
                "config": {
                    "enabled": True
                }
            },
            "b": {
                "unique_id": "b",
                "config": {
                    "enabled": True
                }
            },
        },
        "child_map": {
            "a": [],
            "b": [],
            "x": [],
        },
        "disabled": [
            {
                "unique_id": "x",
                "config": {
                    "enabled": False
                }
            },
        ],
    })
Esempio n. 3
0
def manifest() -> Manifest:
    return Manifest({
        "nodes": {
            "stg_a": {
                "unique_id": "stg_a",
                "resource_type": "model"
            },
            "stg_x": {
                "unique_id": "stg_x",
                "resource_type": "model"
            },
        },
        "child_map": {
            "source_a": ["stg_a"],
            "stg_a": [],
            "stg_x": [],
        },
        "disabled": [],
        "sources": {
            "source_a": {
                "unique_id": "source_a",
                "resource_type": "source"
            },
        },
    })
Esempio n. 4
0
def test_no_rejoin_models(raw_manifest):
    manifest = Manifest(raw_manifest)

    passes, failures = no_rejoin_models(manifest)

    assert [m.id for m in passes] == ["S", "D", "3", "4"]
    assert [m.id for m in failures] == ["1", "2"]
def manifest() -> Manifest:
    return Manifest({
        "nodes": {
            "staging_1": {
                "unique_id": "staging_1",
                "resource_type": "model",
                "fqn": ["foo", "staging", "bar"],
            },
            "mart_1": {
                "unique_id": "mart_1",
                "resource_type": "model",
                "fqn": ["foo", "marts", "bar"],
            },
        },
        "child_map": {
            "source_1": ["mart_1"],
            "staging_1": [],
            "mart_1": [],
        },
        "disabled": [],
        "sources": {
            "source_1": {
                "unique_id": "source_1",
                "resource_type": "source",
                "fqn": ["foo", "bar"],
            },
        },
    })
Esempio n. 6
0
def test_rule_instances_are_callable(empty_raw_manifest):
    callable = Mock()
    dummy_manifest = Manifest(empty_raw_manifest)

    rule = Rule(id="id", name="basic_rule", func=callable)
    rule(dummy_manifest)

    callable.assert_called_once_with(dummy_manifest)
Esempio n. 7
0
def test_rule_engine_returns_results_for_rule_set(empty_raw_manifest):
    failures = [Node({})]
    engine = RuleEngine(
        [Rule("this always fails!", "test", lambda m: ([], failures))])

    results = engine.run(Manifest(empty_raw_manifest))

    assert len(results) == 1
    assert isinstance(results[0], Result)
Esempio n. 8
0
def no_rejoin_models(
    manifest: Manifest,
) -> Tuple[List[Node], List[Node]]:
    rejoin_nodes = __find_rejoin_nodes(manifest.graph)

    def is_rejoin_node(node: Node):
        return node.id in rejoin_nodes

    passes, failures = partition(is_rejoin_node, manifest.nodes())
    return list(passes), list(failures)
def no_references_to_marts_from_staging(
    manifest: Manifest, ) -> Tuple[List[Node], List[Node]]:
    def staging_depends_on_mart(node: Node):
        mart_refs = [
            p for p in manifest.graph.predecessors(node.id)
            if manifest.get_node(p).is_mart
        ]
        return node.is_staging and len(list(mart_refs)) > 0

    passes, failures = partition(staging_depends_on_mart, manifest.nodes())
    return list(passes), list(failures)
Esempio n. 10
0
def no_references_to_source_from_marts(
    manifest: Manifest,
) -> Tuple[List[Node], List[Node]]:
    def mart_depends_on_source(node: Node):
        source_refs = [
            p
            for p in manifest.graph.predecessors(node.id)
            if manifest.get_node(p).is_source
        ]
        return node.is_mart and len(list(source_refs)) > 0

    passes, failures = partition(mart_depends_on_source, manifest.nodes())
    return list(passes), list(failures)
Esempio n. 11
0
def test_manifest_graph(raw_manifest):
    expected_graph = nx.DiGraph()
    expected_graph.add_edge("a", "b")
    expected_graph.add_edge("a", "c")
    expected_graph.add_node("e")
    expected_graph.add_node("s")
    expected_graph.add_node("x")

    manifest = Manifest(raw_manifest)
    actual_graph = manifest.graph

    assert actual_graph.nodes == expected_graph.nodes
    assert actual_graph.edges == expected_graph.edges
def no_references_outside_of_its_own_staging_area(
    manifest: Manifest, ) -> Tuple[List[Node], List[Node]]:
    def staging_depends_on_staging_in_another_area(node: Node):
        different_staging_area_refs = [
            p for p in manifest.graph.predecessors(node.id)
            if manifest.get_node(p).is_staging
            if not manifest.get_node(p).area == node.area
        ]
        return node.is_staging and len(list(different_staging_area_refs)) > 0

    passes, failures = partition(staging_depends_on_staging_in_another_area,
                                 manifest.nodes())
    return list(passes), list(failures)
Esempio n. 13
0
def test_apply_splits_nodes_using_callable(empty_raw_manifest):
    passes = [Node({"original_file_path": "filepath1"})]
    failures = [
        Node({"original_file_path": "filepath2"}),
        Node({"original_file_path": "filepath3"}),
    ]
    dummy_manifest = Manifest(empty_raw_manifest)

    rule = Rule("warning", "basic_rule", lambda _: (passes, failures))
    result = rule.apply(dummy_manifest)

    assert result[0] == passes
    assert result[1] == failures
def staging_models_have_single_source(
    manifest: Manifest,
) -> Tuple[List[Node], List[Node]]:
    def staging_model_has_more_than_one_source(node: Node):
        sources = [
            p
            for p in manifest.graph.predecessors(node.id)
            if manifest.get_node(p).is_source
        ]
        return node.is_staging and len(list(sources)) > 1

    passes, failures = partition(
        staging_model_has_more_than_one_source, manifest.nodes()
    )
    return list(passes), list(failures)
def no_orphaned_models(manifest: Manifest) -> Tuple[List[Node], List[Node]]:
    """
    return [
        node for node in dbt_manifest_file['nodes'].values()
        if is_staging(node) or is_mart(node)
        if not node['depends_on']['nodes']
    ]
    """

    def is_orphan(node: Node) -> bool:
        dependencies = list(manifest.graph.predecessors(node.id))
        return (node.is_staging or node.is_mart) and len(dependencies) < 1

    passes, failures = partition(is_orphan, manifest.nodes())
    return list(passes), list(failures)
Esempio n. 16
0
def manifest() -> Manifest:
    return Manifest(
        {
            "nodes": {
                "staging.b": {
                    "unique_id": "staging.b",
                    "fqn": ["staging", "b"],
                    "resource_type": "model",
                },
                "staging.z": {
                    "unique_id": "staging.z",
                    "fqn": ["staging", "z"],
                    "resource_type": "model",
                },
            },
            "child_map": {
                "a": ["staging.b"],
                "staging.b": [],
                "x": ["staging.z"],
                "y": ["staging.z"],
                "staging.z": [],
            },
            "disabled": [],
            "sources": {
                "a": {
                    "unique_id": "a",
                    "fqn": ["a"],
                    "resource_type": "source",
                },
                "x": {
                    "unique_id": "x",
                    "fqn": ["x"],
                    "resource_type": "source",
                },
                "y": {
                    "unique_id": "y",
                    "fqn": ["y"],
                    "resource_type": "source",
                },
            },
        }
    )
Esempio n. 17
0
def manifest() -> Manifest:
    return Manifest({
        "nodes": {
            "staging_1": {
                "unique_id": "staging_1",
                "resource_type": "model",
                "fqn": ["foo", "staging", "area_1"],
            },
            "staging_2": {
                "unique_id": "staging_2",
                "resource_type": "model",
                "fqn": ["foo", "staging", "area_2"],
            }
        },
        "child_map": {
            "staging_1": ["staging_2"],
            "staging_2": [],
        },
        "disabled": [],
        "sources": {},
    })
Esempio n. 18
0
def check(input, config, add_rules_paths, html=True, browser=False):
    """Check dbt DAG against configured rules."""
    config = ConfigIO.read(config)
    manifest = Manifest(json.load(input))
    rule_engine = RuleEngine.with_configured_rules(config)
    for rule_path in add_rules_paths:
        rule_engine.extend(RuleEngine.with_configured_rules(config, rule_path))

    results = rule_engine.run(manifest)
    report_to_terminal(results)
    metric_results = MetricEngine().run(manifest)
    report = to_html_report(results, metric_results)
    oliver_twist = json.loads(MyEncoder().encode(report))
    output_json(oliver_twist)
    if html or browser:
        logger.debug("Generating HTML report...")
        render_html_report(oliver_twist)
        if browser:
            webbrowser.open(f"file://{os.getcwd()}/target/index.html")

    exit_message(results)
Esempio n. 19
0
    def run(self, manifest: Manifest) -> List[MetricResult]:
        graph = manifest.graph
        degree_centrality = nx.centrality.degree_centrality(graph)
        in_degree_centrality = nx.centrality.in_degree_centrality(graph)
        out_degree_centrality = nx.centrality.out_degree_centrality(graph)
        closeness_centrality = nx.centrality.closeness_centrality(graph)
        betweenness_centrality = nx.centrality.betweenness_centrality(graph)
        pagerank = nx.link_analysis.pagerank_alg.pagerank(graph)

        results = []
        for node in graph.nodes:
            results.append(
                MetricResult(
                    manifest.get_node(node),
                    degree_centrality[node],
                    in_degree_centrality[node],
                    out_degree_centrality[node],
                    closeness_centrality[node],
                    betweenness_centrality[node],
                    pagerank[node],
                ))

        return results
def test_metric_engine_returns_results(raw_manifest):
    manifest = Manifest(raw_manifest)

    results = MetricEngine().run(manifest)

    assert len(results) == 3
Esempio n. 21
0
def test_nodes_returns_all_nodes(raw_manifest):
    manifest = Manifest(raw_manifest)

    node_ids = [node.id for node in manifest.nodes()]

    assert node_ids == ["a", "b", "c", "e", "s", "x"]
Esempio n. 22
0
def manifest() -> Manifest:
    return Manifest({
        "nodes": {
            "physical_node_1": {
                "unique_id": "physical_node_1",
                "resource_type": "model",
                "config": {
                    "materialized": "view"
                },
                "meta": {
                    "owner": "Joe"
                }
            },
            "physical_node_2": {
                "unique_id": "physical_node_2",
                "resource_type": "model",
                "config": {
                    "materialized": "table"
                },
                "meta": {
                    "owner": "Joe"
                }
            },
            "physical_node_3": {
                "unique_id": "physical_node_3",
                "resource_type": "model",
                "config": {
                    "materialized": "incremental"
                },
                "meta": {
                    "owner": "Joe"
                }
            },
            "ephemeral_node_1": {
                "unique_id": "ephemeral_node_1",
                "resource_type": "model",
                "config": {
                    "materialized": "ephemeral"
                },
                "meta": {
                    "owner": "Joe"
                }
            },
            "ephemeral_node_2": {
                "unique_id": "ephemeral_node_2",
                "resource_type": "model",
                "config": {
                    "materialized": "ephemeral"
                },
                "meta": {}
            },
            "no_owner_physical_node_1": {
                "unique_id": "no_owner_physical_node_1",
                "resource_type": "model",
                "config": {
                    "materialized": "incremental"
                },
                "meta": {}
            },
            "no_owner_physical_node_2": {
                "unique_id": "no_owner_physical_node_2",
                "resource_type": "model",
                "config": {
                    "materialized": "incremental"
                },
                "meta": {
                    "owner": ""
                }
            },
            "no_owner_physical_node_3": {
                "unique_id": "no_owner_physical_node_3",
                "resource_type": "model",
                "config": {
                    "materialized": "incremental"
                },
                "meta": {
                    "owner": "   "
                }
            },
        },
        "child_map": {
            "physical_node_1": [],
            "physical_node_2": [],
            "physical_node_3": [],
            "ephemeral_node_1": [],
            "ephemeral_node_2": [],
            "no_owner_physical_node_1": [],
            "no_owner_physical_node_2": [],
            "no_owner_physical_node_3": [],
            "source_1": [],
            "no_owner_source_1": [],
            "no_owner_source_2": [],
            "no_owner_source_3": [],
        },
        "disabled": [],
        "sources": {
            "source_1": {
                "unique_id": "source_1",
                "resource_type": "source",
                "meta": {
                    "owner": "Joe"
                }
            },
            "no_owner_source_1": {
                "unique_id": "no_owner_source_1",
                "resource_type": "source",
                "meta": {
                    "owner": "  "
                }
            },
            "no_owner_source_2": {
                "unique_id": "no_owner_source_2",
                "resource_type": "source",
                "meta": {
                    "owner": ""
                }
            },
            "no_owner_source_3": {
                "unique_id": "no_owner_source_3",
                "resource_type": "source",
                "meta": {}
            },
        },
    })
Esempio n. 23
0
def test_get_node_when_node_does_not_exist(raw_manifest):
    manifest = Manifest(raw_manifest)

    with pytest.raises(KeyError):
        manifest.get_node("foo")
Esempio n. 24
0
def no_owner_on_physical_models(
        manifest: Manifest) -> Tuple[List[Node], List[Node]]:
    passes, failures = partition(
        lambda x: x.is_db_relation and __is_none_or_blank(x.owner),
        manifest.nodes())
    return list(passes), list(failures)
Esempio n. 25
0
def no_disabled_models(manifest: Manifest) -> Tuple[List[Node], List[Node]]:
    passes, failures = partition(lambda x: not x.is_enabled, manifest.nodes())
    return list(passes), list(failures)