def deserialize_graph(self): linker = Linker() base_target_path = self.project['target-path'] filename = 'graph-{}.yml'.format(self.graph_type) graph_file = os.path.join(base_target_path, filename) linker.read_graph(graph_file) return linker
def link_graph(self, linker: Linker, manifest: Manifest): for source in manifest.sources.values(): linker.add_node(source.unique_id) for node in manifest.nodes.values(): self.link_node(linker, node, manifest) cycle = linker.find_cycles() if cycle: raise RuntimeError("Found a cycle: {}".format(cycle))
def deserialize_graph(self): logger.info("Loading dependency graph file") linker = Linker() base_target_path = self.project['target-path'] graph_file = os.path.join(base_target_path, dbt.compilation.graph_file_name) linker.read_graph(graph_file) return linker
def compile_archives(self): linker = Linker() all_archives = self.get_archives(self.project) for archive in all_archives: sql = archive.compile() fqn = tuple(archive.fqn) linker.update_node_data(fqn, archive.serialize()) self.__write(archive.build_path(), sql) self.write_graph_file(linker, 'archive') return all_archives
def compile(self): linker = Linker() root_project = self.project.cfg all_projects = self.get_all_projects() manifest = dbt.loader.GraphLoader.load_all(root_project, all_projects) self.write_manifest_file(manifest) flat_graph = manifest.to_flat_graph() self._check_resource_uniqueness(flat_graph) linked_graph = self.link_graph(linker, flat_graph) stats = defaultdict(int) for node_name, node in itertools.chain( linked_graph.get('nodes').items(), linked_graph.get('macros').items()): stats[node.get('resource_type')] += 1 self.write_graph_file(linker) print_compile_stats(stats) return linked_graph, linker
def compile(self): linker = Linker() root_project = self.project.cfg all_projects = self.get_all_projects() flat_graph = dbt.loader.GraphLoader.load_all(root_project, all_projects) flat_graph = dbt.parser.process_refs(flat_graph, root_project.get('name')) linked_graph = self.link_graph(linker, flat_graph) stats = defaultdict(int) for node_name, node in itertools.chain( linked_graph.get('nodes').items(), linked_graph.get('macros').items()): stats[node.get('resource_type')] += 1 self.write_graph_file(linker) print_compile_stats(stats) return linked_graph, linker
def compile(self, dry=False): linker = Linker() all_models = self.model_sources(this_project=self.project) for project in dependency_projects(self.project): all_models.extend( self.model_sources(this_project=self.project, own_project=project)) enabled_models = [model for model in all_models if model.is_enabled] compiled_models, written_models = self.compile_models( linker, enabled_models) # TODO : only compile schema tests for enabled models written_schema_tests = self.compile_schema_tests(linker) self.validate_models_unique(compiled_models) self.validate_models_unique(written_schema_tests) self.write_graph_file(linker) if self.create_template.label != 'test': written_analyses = self.compile_analyses(linker, compiled_models) else: written_analyses = [] return len(written_models), len(written_schema_tests), len( written_analyses)
def compile(self): linker = Linker() root_project = self.project.cfg all_projects = self.get_all_projects() all_macros = self.load_all_macros(root_project, all_projects) all_nodes = self.load_all_nodes(root_project, all_projects) flat_graph = {'nodes': all_nodes, 'macros': all_macros} flat_graph = dbt.parser.process_refs(flat_graph) linked_graph = self.link_graph(linker, flat_graph) stats = defaultdict(int) for node_name, node in linked_graph.get('nodes').items(): stats[node.get('resource_type')] += 1 for node_name, node in linked_graph.get('macros').items(): stats[node.get('resource_type')] += 1 print_compile_stats(stats) return linked_graph, linker
def compile(self): linker = Linker() all_projects = self.get_all_projects() manifest = dbt.loader.GraphLoader.load_all(self.config, all_projects) self.write_manifest_file(manifest) self._check_resource_uniqueness(manifest) resource_fqns = manifest.get_resource_fqns() self.config.warn_for_unused_resource_config_paths(resource_fqns, manifest.disabled) self.link_graph(linker, manifest) stats = defaultdict(int) for node_name, node in itertools.chain( manifest.nodes.items(), manifest.macros.items()): stats[node.resource_type] += 1 self.write_graph_file(linker) print_compile_stats(stats) return manifest, linker
def compile(self, limit_to=None): linker = Linker() all_models = self.get_models() all_macros = self.get_macros(this_project=self.project) for project in dependency_projects(self.project): all_macros.extend( self.get_macros(this_project=self.project, own_project=project)) self.macro_generator = self.generate_macros(all_macros) if limit_to is not None and 'models' in limit_to: enabled_models = [ model for model in all_models if model.is_enabled and not model.is_empty ] else: enabled_models = [] compiled_models, written_models = self.compile_models( linker, enabled_models) # TODO : only compile schema tests for enabled models if limit_to is not None and 'tests' in limit_to: written_schema_tests = self.compile_schema_tests(linker) written_data_tests = self.compile_data_tests(linker) else: written_schema_tests = [] written_data_tests = [] self.validate_models_unique(compiled_models) self.validate_models_unique(written_schema_tests) self.write_graph_file(linker, self.create_template.label) if limit_to is not None and 'analyses' in limit_to and \ self.create_template.label not in ['test', 'archive']: written_analyses = self.compile_analyses(linker, compiled_models) else: written_analyses = [] if limit_to is not None and 'archives' in limit_to: compiled_archives = self.compile_archives() else: compiled_archives = [] return { "models": len(written_models), "schema tests": len(written_schema_tests), "data tests": len(written_data_tests), "archives": len(compiled_archives), "analyses": len(written_analyses) }
def compile(self, manifest: Manifest, write=True): linker = Linker() self.link_graph(linker, manifest) stats = _generate_stats(manifest) if write: self.write_graph_file(linker, manifest) print_compile_stats(stats) return linker
def compile(self, manifest, write=True): linker = Linker() self.link_graph(linker, manifest) stats = defaultdict(int) for node_name, node in itertools.chain(manifest.nodes.items(), manifest.macros.items()): stats[node.resource_type] += 1 if write: self.write_graph_file(linker, manifest) print_compile_stats(stats) return linker
def compile(self): linker = Linker() all_models = self.get_models() all_macros = self.get_macros(this_project=self.project) for project in dbt.utils.dependency_projects(self.project): all_macros.extend( self.get_macros(this_project=self.project, own_project=project)) self.macro_generator = self.generate_macros(all_macros) enabled_models = [ model for model in all_models if model.is_enabled and not model.is_empty ] compiled_models, written_models = self.compile_models( linker, enabled_models) compilers = { 'schema tests': self.compile_schema_tests, 'data tests': self.compile_data_tests, 'archives': self.compile_archives, 'analyses': self.compile_analyses } compiled = {'models': written_models} for (compile_type, compiler_f) in compilers.items(): newly_compiled = compiler_f(linker, compiled_models) compiled[compile_type] = newly_compiled self.validate_models_unique(compiled['models'], dbt.utils.compiler_error) self.validate_models_unique(compiled['data tests'], dbt.utils.compiler_warning) self.validate_models_unique(compiled['schema tests'], dbt.utils.compiler_warning) self.write_graph_file(linker) stats = {ttype: len(m) for (ttype, m) in compiled.items()} return stats
def link_node(self, linker: Linker, node: NonSourceNode, manifest: Manifest): linker.add_node(node.unique_id) for dependency in node.depends_on_nodes: if dependency in manifest.nodes: linker.dependency(node.unique_id, (manifest.nodes[dependency].unique_id)) elif dependency in manifest.sources: linker.dependency(node.unique_id, (manifest.sources[dependency].unique_id)) else: dbt.exceptions.dependency_not_found(node, dependency)
def compile(self, dry=False): linker = Linker() all_models = self.get_models() all_macros = self.get_macros(this_project=self.project) for project in dependency_projects(self.project): all_macros.extend(self.get_macros(this_project=self.project, own_project=project)) self.macro_generator = self.generate_macros(all_macros) enabled_models = [model for model in all_models if model.is_enabled] compiled_models, written_models = self.compile_models(linker, enabled_models) # TODO : only compile schema tests for enabled models written_schema_tests = self.compile_schema_tests(linker) written_data_tests = self.compile_data_tests(linker) self.validate_models_unique(compiled_models) self.validate_models_unique(written_schema_tests) self.write_graph_file(linker, self.create_template.label) if self.create_template.label not in ['test', 'archive']: written_analyses = self.compile_analyses(linker, compiled_models) else: written_analyses = [] compiled_archives = self.compile_archives() return { "models": len(written_models), "schema tests" : len(written_schema_tests), "data tests" : len(written_data_tests), "archives": len(compiled_archives), "analyses" : len(written_analyses) }
def write_graph_file(self, linker: Linker, manifest: Manifest): filename = graph_file_name graph_path = os.path.join(self.config.target_path, filename) if dbt.flags.WRITE_JSON: linker.write_graph(graph_path, manifest)