def test_get_node_when_node_is_disabled(raw_manifest): manifest = Manifest(raw_manifest) node = manifest.get_node("x") assert node.id == "x" assert isinstance(node, Node)
def manifest() -> Manifest: return Manifest({ "nodes": { "a": { "unique_id": "a", "config": { "enabled": True } }, "b": { "unique_id": "b", "config": { "enabled": True } }, }, "child_map": { "a": [], "b": [], "x": [], }, "disabled": [ { "unique_id": "x", "config": { "enabled": False } }, ], })
def manifest() -> Manifest: return Manifest({ "nodes": { "stg_a": { "unique_id": "stg_a", "resource_type": "model" }, "stg_x": { "unique_id": "stg_x", "resource_type": "model" }, }, "child_map": { "source_a": ["stg_a"], "stg_a": [], "stg_x": [], }, "disabled": [], "sources": { "source_a": { "unique_id": "source_a", "resource_type": "source" }, }, })
def test_no_rejoin_models(raw_manifest): manifest = Manifest(raw_manifest) passes, failures = no_rejoin_models(manifest) assert [m.id for m in passes] == ["S", "D", "3", "4"] assert [m.id for m in failures] == ["1", "2"]
def manifest() -> Manifest: return Manifest({ "nodes": { "staging_1": { "unique_id": "staging_1", "resource_type": "model", "fqn": ["foo", "staging", "bar"], }, "mart_1": { "unique_id": "mart_1", "resource_type": "model", "fqn": ["foo", "marts", "bar"], }, }, "child_map": { "source_1": ["mart_1"], "staging_1": [], "mart_1": [], }, "disabled": [], "sources": { "source_1": { "unique_id": "source_1", "resource_type": "source", "fqn": ["foo", "bar"], }, }, })
def test_rule_instances_are_callable(empty_raw_manifest): callable = Mock() dummy_manifest = Manifest(empty_raw_manifest) rule = Rule(id="id", name="basic_rule", func=callable) rule(dummy_manifest) callable.assert_called_once_with(dummy_manifest)
def test_rule_engine_returns_results_for_rule_set(empty_raw_manifest): failures = [Node({})] engine = RuleEngine( [Rule("this always fails!", "test", lambda m: ([], failures))]) results = engine.run(Manifest(empty_raw_manifest)) assert len(results) == 1 assert isinstance(results[0], Result)
def no_rejoin_models( manifest: Manifest, ) -> Tuple[List[Node], List[Node]]: rejoin_nodes = __find_rejoin_nodes(manifest.graph) def is_rejoin_node(node: Node): return node.id in rejoin_nodes passes, failures = partition(is_rejoin_node, manifest.nodes()) return list(passes), list(failures)
def no_references_to_marts_from_staging( manifest: Manifest, ) -> Tuple[List[Node], List[Node]]: def staging_depends_on_mart(node: Node): mart_refs = [ p for p in manifest.graph.predecessors(node.id) if manifest.get_node(p).is_mart ] return node.is_staging and len(list(mart_refs)) > 0 passes, failures = partition(staging_depends_on_mart, manifest.nodes()) return list(passes), list(failures)
def no_references_to_source_from_marts( manifest: Manifest, ) -> Tuple[List[Node], List[Node]]: def mart_depends_on_source(node: Node): source_refs = [ p for p in manifest.graph.predecessors(node.id) if manifest.get_node(p).is_source ] return node.is_mart and len(list(source_refs)) > 0 passes, failures = partition(mart_depends_on_source, manifest.nodes()) return list(passes), list(failures)
def test_manifest_graph(raw_manifest): expected_graph = nx.DiGraph() expected_graph.add_edge("a", "b") expected_graph.add_edge("a", "c") expected_graph.add_node("e") expected_graph.add_node("s") expected_graph.add_node("x") manifest = Manifest(raw_manifest) actual_graph = manifest.graph assert actual_graph.nodes == expected_graph.nodes assert actual_graph.edges == expected_graph.edges
def no_references_outside_of_its_own_staging_area( manifest: Manifest, ) -> Tuple[List[Node], List[Node]]: def staging_depends_on_staging_in_another_area(node: Node): different_staging_area_refs = [ p for p in manifest.graph.predecessors(node.id) if manifest.get_node(p).is_staging if not manifest.get_node(p).area == node.area ] return node.is_staging and len(list(different_staging_area_refs)) > 0 passes, failures = partition(staging_depends_on_staging_in_another_area, manifest.nodes()) return list(passes), list(failures)
def test_apply_splits_nodes_using_callable(empty_raw_manifest): passes = [Node({"original_file_path": "filepath1"})] failures = [ Node({"original_file_path": "filepath2"}), Node({"original_file_path": "filepath3"}), ] dummy_manifest = Manifest(empty_raw_manifest) rule = Rule("warning", "basic_rule", lambda _: (passes, failures)) result = rule.apply(dummy_manifest) assert result[0] == passes assert result[1] == failures
def staging_models_have_single_source( manifest: Manifest, ) -> Tuple[List[Node], List[Node]]: def staging_model_has_more_than_one_source(node: Node): sources = [ p for p in manifest.graph.predecessors(node.id) if manifest.get_node(p).is_source ] return node.is_staging and len(list(sources)) > 1 passes, failures = partition( staging_model_has_more_than_one_source, manifest.nodes() ) return list(passes), list(failures)
def no_orphaned_models(manifest: Manifest) -> Tuple[List[Node], List[Node]]: """ return [ node for node in dbt_manifest_file['nodes'].values() if is_staging(node) or is_mart(node) if not node['depends_on']['nodes'] ] """ def is_orphan(node: Node) -> bool: dependencies = list(manifest.graph.predecessors(node.id)) return (node.is_staging or node.is_mart) and len(dependencies) < 1 passes, failures = partition(is_orphan, manifest.nodes()) return list(passes), list(failures)
def manifest() -> Manifest: return Manifest( { "nodes": { "staging.b": { "unique_id": "staging.b", "fqn": ["staging", "b"], "resource_type": "model", }, "staging.z": { "unique_id": "staging.z", "fqn": ["staging", "z"], "resource_type": "model", }, }, "child_map": { "a": ["staging.b"], "staging.b": [], "x": ["staging.z"], "y": ["staging.z"], "staging.z": [], }, "disabled": [], "sources": { "a": { "unique_id": "a", "fqn": ["a"], "resource_type": "source", }, "x": { "unique_id": "x", "fqn": ["x"], "resource_type": "source", }, "y": { "unique_id": "y", "fqn": ["y"], "resource_type": "source", }, }, } )
def manifest() -> Manifest: return Manifest({ "nodes": { "staging_1": { "unique_id": "staging_1", "resource_type": "model", "fqn": ["foo", "staging", "area_1"], }, "staging_2": { "unique_id": "staging_2", "resource_type": "model", "fqn": ["foo", "staging", "area_2"], } }, "child_map": { "staging_1": ["staging_2"], "staging_2": [], }, "disabled": [], "sources": {}, })
def check(input, config, add_rules_paths, html=True, browser=False): """Check dbt DAG against configured rules.""" config = ConfigIO.read(config) manifest = Manifest(json.load(input)) rule_engine = RuleEngine.with_configured_rules(config) for rule_path in add_rules_paths: rule_engine.extend(RuleEngine.with_configured_rules(config, rule_path)) results = rule_engine.run(manifest) report_to_terminal(results) metric_results = MetricEngine().run(manifest) report = to_html_report(results, metric_results) oliver_twist = json.loads(MyEncoder().encode(report)) output_json(oliver_twist) if html or browser: logger.debug("Generating HTML report...") render_html_report(oliver_twist) if browser: webbrowser.open(f"file://{os.getcwd()}/target/index.html") exit_message(results)
def run(self, manifest: Manifest) -> List[MetricResult]: graph = manifest.graph degree_centrality = nx.centrality.degree_centrality(graph) in_degree_centrality = nx.centrality.in_degree_centrality(graph) out_degree_centrality = nx.centrality.out_degree_centrality(graph) closeness_centrality = nx.centrality.closeness_centrality(graph) betweenness_centrality = nx.centrality.betweenness_centrality(graph) pagerank = nx.link_analysis.pagerank_alg.pagerank(graph) results = [] for node in graph.nodes: results.append( MetricResult( manifest.get_node(node), degree_centrality[node], in_degree_centrality[node], out_degree_centrality[node], closeness_centrality[node], betweenness_centrality[node], pagerank[node], )) return results
def test_metric_engine_returns_results(raw_manifest): manifest = Manifest(raw_manifest) results = MetricEngine().run(manifest) assert len(results) == 3
def test_nodes_returns_all_nodes(raw_manifest): manifest = Manifest(raw_manifest) node_ids = [node.id for node in manifest.nodes()] assert node_ids == ["a", "b", "c", "e", "s", "x"]
def manifest() -> Manifest: return Manifest({ "nodes": { "physical_node_1": { "unique_id": "physical_node_1", "resource_type": "model", "config": { "materialized": "view" }, "meta": { "owner": "Joe" } }, "physical_node_2": { "unique_id": "physical_node_2", "resource_type": "model", "config": { "materialized": "table" }, "meta": { "owner": "Joe" } }, "physical_node_3": { "unique_id": "physical_node_3", "resource_type": "model", "config": { "materialized": "incremental" }, "meta": { "owner": "Joe" } }, "ephemeral_node_1": { "unique_id": "ephemeral_node_1", "resource_type": "model", "config": { "materialized": "ephemeral" }, "meta": { "owner": "Joe" } }, "ephemeral_node_2": { "unique_id": "ephemeral_node_2", "resource_type": "model", "config": { "materialized": "ephemeral" }, "meta": {} }, "no_owner_physical_node_1": { "unique_id": "no_owner_physical_node_1", "resource_type": "model", "config": { "materialized": "incremental" }, "meta": {} }, "no_owner_physical_node_2": { "unique_id": "no_owner_physical_node_2", "resource_type": "model", "config": { "materialized": "incremental" }, "meta": { "owner": "" } }, "no_owner_physical_node_3": { "unique_id": "no_owner_physical_node_3", "resource_type": "model", "config": { "materialized": "incremental" }, "meta": { "owner": " " } }, }, "child_map": { "physical_node_1": [], "physical_node_2": [], "physical_node_3": [], "ephemeral_node_1": [], "ephemeral_node_2": [], "no_owner_physical_node_1": [], "no_owner_physical_node_2": [], "no_owner_physical_node_3": [], "source_1": [], "no_owner_source_1": [], "no_owner_source_2": [], "no_owner_source_3": [], }, "disabled": [], "sources": { "source_1": { "unique_id": "source_1", "resource_type": "source", "meta": { "owner": "Joe" } }, "no_owner_source_1": { "unique_id": "no_owner_source_1", "resource_type": "source", "meta": { "owner": " " } }, "no_owner_source_2": { "unique_id": "no_owner_source_2", "resource_type": "source", "meta": { "owner": "" } }, "no_owner_source_3": { "unique_id": "no_owner_source_3", "resource_type": "source", "meta": {} }, }, })
def test_get_node_when_node_does_not_exist(raw_manifest): manifest = Manifest(raw_manifest) with pytest.raises(KeyError): manifest.get_node("foo")
def no_owner_on_physical_models( manifest: Manifest) -> Tuple[List[Node], List[Node]]: passes, failures = partition( lambda x: x.is_db_relation and __is_none_or_blank(x.owner), manifest.nodes()) return list(passes), list(failures)
def no_disabled_models(manifest: Manifest) -> Tuple[List[Node], List[Node]]: passes, failures = partition(lambda x: not x.is_enabled, manifest.nodes()) return list(passes), list(failures)