def get_selected(self, include, exclude, resource_types, tags, required): graph = self.linker.graph include = coalesce(include, ['*']) exclude = coalesce(exclude, []) tags = coalesce(tags, []) to_run = self.get_valid_nodes(graph) filtered_graph = graph.subgraph(to_run) selected_nodes = self.select_nodes(filtered_graph, include, exclude) filtered_nodes = set() for node_name in selected_nodes: if self._is_match(node_name, resource_types, tags, required): filtered_nodes.add(node_name) return filtered_nodes
def get_selected(self, include, exclude, resource_types, tags, required): graph = self.linker.graph include = coalesce(include, ['fqn:*', 'source:*']) exclude = coalesce(exclude, []) tags = coalesce(tags, []) to_run = self.get_valid_nodes(graph) filtered_graph = graph.subgraph(to_run) selected_nodes = self.select_nodes(filtered_graph, include, exclude) filtered_nodes = set() for node_name in selected_nodes: if self._is_match(node_name, resource_types, tags, required): filtered_nodes.add(node_name) return filtered_nodes
def get_selected(self, include, exclude, resource_types, tags, required): include = coalesce(include, ['fqn:*', 'source:*']) exclude = coalesce(exclude, []) tags = coalesce(tags, []) graph_members = { node_name for node_name in self.full_graph.nodes() if self._is_graph_member(node_name) } filtered_graph = self.full_graph.subgraph(graph_members) selected_nodes = self.select_nodes(filtered_graph, include, exclude) filtered_nodes = set() for node_name in selected_nodes: if self._is_match(node_name, resource_types, tags, required): filtered_nodes.add(node_name) return filtered_nodes
def get_selected(self, include, exclude, resource_types, tags): graph = self.linker.graph include = coalesce(include, ['*']) exclude = coalesce(exclude, []) tags = coalesce(tags, []) to_run = self.get_valid_nodes(graph) filtered_graph = graph.subgraph(to_run) selected_nodes = self.select_nodes(filtered_graph, include, exclude) filtered_nodes = set() for node_name in selected_nodes: node = self.manifest.nodes[node_name] matched_resource = node.resource_type in resource_types matched_tags = (len(tags) == 0 or bool(set(node.tags) & set(tags))) if matched_resource and matched_tags: filtered_nodes.add(node_name) return filtered_nodes
def parse_node(self, node, node_path, package_project_config, tags=None, fqn_extra=None, fqn=None, agate_table=None, archive_config=None, column_name=None): """Parse a node, given an UnparsedNode and any other required information. agate_table should be set if the node came from a seed file. archive_config should be set if the node is an Archive node. column_name should be set if the node is a Test node associated with a particular column. """ logger.debug("Parsing {}".format(node_path)) tags = coalesce(tags, []) fqn_extra = coalesce(fqn_extra, []) if fqn is None: fqn = self.get_fqn(node.path, package_project_config, fqn_extra) config = SourceConfig(self.root_project_config, package_project_config, fqn, node.resource_type) parsed_dict = self._build_intermediate_node_dict( config, node.serialize(), node_path, config, tags, fqn, agate_table, archive_config, column_name) parsed_node = ParsedNode(**parsed_dict) self._render_with_context(parsed_node, config) self._update_parsed_node_info(parsed_node, config) parsed_node.validate() return parsed_node
def parse_node(self, node, node_path, package_project_config, tags=None, fqn_extra=None, fqn=None, agate_table=None, archive_config=None, column_name=None): """Parse a node, given an UnparsedNode and any other required information. agate_table should be set if the node came from a seed file. archive_config should be set if the node is an Archive node. column_name should be set if the node is a Test node associated with a particular column. """ logger.debug("Parsing {}".format(node_path)) tags = coalesce(tags, []) fqn_extra = coalesce(fqn_extra, []) if fqn is None: fqn = self.get_fqn(node.path, package_project_config, fqn_extra) config = SourceConfig( self.root_project_config, package_project_config, fqn, node.resource_type) parsed_dict = self._build_intermediate_node_dict( config, node.serialize(), node_path, config, tags, fqn, agate_table, archive_config, column_name ) parsed_node = ParsedNode(**parsed_dict) self._render_with_context(parsed_node, config) self._update_parsed_node_info(parsed_node, config) parsed_node.validate() return parsed_node
def _build_intermediate_node_dict(self, config, node_dict, node_path, package_project_config, tags, fqn, agate_table, snapshot_config, column_name): """Update the unparsed node dictionary and build the basis for an intermediate ParsedNode that will be passed into the renderer """ # because this takes and returns dicts, subclasses can safely override # this and mutate its results using super() both before and after. if agate_table is not None: node_dict['agate_table'] = agate_table # Set this temporarily. Not the full config yet (as config() hasn't # been called from jinja yet). But the Var() call below needs info # about project level configs b/c they might contain refs. # TODO: Restructure this? config_dict = coalesce(snapshot_config, {}) config_dict.update(config.config) empty = ( 'raw_sql' in node_dict and len(node_dict['raw_sql'].strip()) == 0 ) node_dict.update({ 'refs': [], 'sources': [], 'depends_on': { 'nodes': [], 'macros': [], }, 'unique_id': node_path, 'empty': empty, 'fqn': fqn, 'tags': tags, 'config': config_dict, # Set these temporarily so get_rendered() has access to a schema, # database, and alias. 'schema': self.default_schema, 'database': self.default_database, 'alias': node_dict.get('name'), }) # if there's a column, it should end up part of the ParsedNode if column_name is not None: node_dict['column_name'] = column_name return node_dict
def _build_intermediate_node_dict(self, config, node_dict, node_path, package_project_config, tags, fqn, agate_table, archive_config, column_name): """Update the unparsed node dictionary and build the basis for an intermediate ParsedNode that will be passed into the renderer """ # because this takes and returns dicts, subclasses can safely override # this and mutate its results using super() both before and after. if agate_table is not None: node_dict['agate_table'] = agate_table # Set this temporarily. Not the full config yet (as config() hasn't # been called from jinja yet). But the Var() call below needs info # about project level configs b/c they might contain refs. # TODO: Restructure this? config_dict = coalesce(archive_config, {}) config_dict.update(config.config) empty = ( 'raw_sql' in node_dict and len(node_dict['raw_sql'].strip()) == 0 ) node_dict.update({ 'refs': [], 'sources': [], 'depends_on': { 'nodes': [], 'macros': [], }, 'unique_id': node_path, 'empty': empty, 'fqn': fqn, 'tags': tags, 'config': config_dict, # Set these temporarily so get_rendered() has access to a schema, # database, and alias. 'schema': self.default_schema, 'database': self.default_database, 'alias': node_dict.get('name'), }) # if there's a column, it should end up part of the ParsedNode if column_name is not None: node_dict['column_name'] = column_name return node_dict
def get_nodes_from_multiple_specs( self, graph, specs, nodes=None, check_existence=False, exclude=False ): selected_nodes: Set[str] = coalesce(nodes, set()) operator = set.difference_update if exclude else set.update for raw_spec in split_specs(specs): nodes = self.get_nodes_from_intersection_spec(graph, raw_spec) if check_existence: alert_non_existence(raw_spec, nodes) operator(selected_nodes, nodes) return selected_nodes
def select_nodes(self, graph, raw_include_specs, raw_exclude_specs): raw_exclude_specs = coalesce(raw_exclude_specs, []) check_existence = True if not raw_include_specs: check_existence = False raw_include_specs = ['fqn:*', 'source:*'] selected_nodes = self.get_nodes_from_multiple_specs( graph, raw_include_specs, check_existence=check_existence ) selected_nodes = self.get_nodes_from_multiple_specs( graph, raw_exclude_specs, nodes=selected_nodes, exclude=True ) return selected_nodes
def parse_node(cls, node, node_path, root_project_config, package_project_config, all_projects, tags=None, fqn_extra=None, fqn=None, macros=None, agate_table=None, archive_config=None): """Parse a node, given an UnparsedNode and any other required information. agate_table should be set if the node came from a seed file. archive_config should be set if the node is an Archive node. """ logger.debug("Parsing {}".format(node_path)) node = node.serialize() if agate_table is not None: node['agate_table'] = agate_table tags = coalesce(tags, []) fqn_extra = coalesce(fqn_extra, []) macros = coalesce(macros, {}) node.update({ 'refs': [], 'depends_on': { 'nodes': [], 'macros': [], } }) if fqn is None: fqn = cls.get_fqn(node.get('path'), package_project_config, fqn_extra) config = dbt.model.SourceConfig(root_project_config, package_project_config, fqn, node['resource_type']) node['unique_id'] = node_path node['empty'] = ('raw_sql' in node and len(node['raw_sql'].strip()) == 0) node['fqn'] = fqn node['tags'] = tags node['config_reference'] = config # Set this temporarily. Not the full config yet (as config() hasn't # been called from jinja yet). But the Var() call below needs info # about project level configs b/c they might contain refs. # TODO: Restructure this? config_dict = coalesce(archive_config, {}) config_dict.update(config.config) node['config'] = config_dict # Set this temporarily so get_rendered() has access to a schema & alias profile = dbt.utils.get_profile_from_project(root_project_config) default_schema = profile.get('schema', 'public') node['schema'] = default_schema default_alias = node.get('name') node['alias'] = default_alias context = dbt.context.parser.generate(node, root_project_config, {"macros": macros}) dbt.clients.jinja.get_rendered(node.get('raw_sql'), context, node, capture_macros=True) # Clean up any open conns opened by adapter functions that hit the db db_wrapper = context['adapter'] adapter = db_wrapper.adapter profile = db_wrapper.profile adapter.release_connection(profile, node.get('name')) # Special macro defined in the global project schema_override = config.config.get('schema') get_schema = context.get('generate_schema_name', lambda x: default_schema) node['schema'] = get_schema(schema_override) node['alias'] = config.config.get('alias', default_alias) # Overwrite node config config_dict = node.get('config', {}) config_dict.update(config.config) node['config'] = config_dict for hook_type in dbt.hooks.ModelHookType.Both: node['config'][hook_type] = dbt.hooks.get_hooks(node, hook_type) del node['config_reference'] return ParsedNode(**node)
def parse_node(cls, node, node_path, root_project_config, package_project_config, all_projects, tags=None, fqn_extra=None, fqn=None, macros=None, agate_table=None, archive_config=None, column_name=None): """Parse a node, given an UnparsedNode and any other required information. agate_table should be set if the node came from a seed file. archive_config should be set if the node is an Archive node. column_name should be set if the node is a Test node associated with a particular column. """ logger.debug("Parsing {}".format(node_path)) node = node.serialize() if agate_table is not None: node['agate_table'] = agate_table tags = coalesce(tags, []) fqn_extra = coalesce(fqn_extra, []) macros = coalesce(macros, {}) node.update({ 'refs': [], 'depends_on': { 'nodes': [], 'macros': [], } }) if fqn is None: fqn = cls.get_fqn(node.get('path'), package_project_config, fqn_extra) config = dbt.model.SourceConfig(root_project_config, package_project_config, fqn, node['resource_type']) node['unique_id'] = node_path node['empty'] = ('raw_sql' in node and len(node['raw_sql'].strip()) == 0) node['fqn'] = fqn node['tags'] = tags # Set this temporarily. Not the full config yet (as config() hasn't # been called from jinja yet). But the Var() call below needs info # about project level configs b/c they might contain refs. # TODO: Restructure this? config_dict = coalesce(archive_config, {}) config_dict.update(config.config) node['config'] = config_dict # Set this temporarily so get_rendered() has access to a schema & alias default_schema = getattr(root_project_config.credentials, 'schema', 'public') node['schema'] = default_schema default_alias = node.get('name') node['alias'] = default_alias # if there's a column, it should end up part of the ParsedNode if column_name is not None: node['column_name'] = column_name # make a manifest with just the macros to get the context manifest = Manifest(macros=macros, nodes={}, docs={}, generated_at=dbt.utils.timestring()) parsed_node = ParsedNode(**node) context = dbt.context.parser.generate(parsed_node, root_project_config, manifest, config) dbt.clients.jinja.get_rendered(parsed_node.raw_sql, context, parsed_node.to_shallow_dict(), capture_macros=True) # Clean up any open conns opened by adapter functions that hit the db db_wrapper = context['adapter'] adapter = db_wrapper.adapter runtime_config = db_wrapper.config adapter.release_connection(parsed_node.name) # Special macro defined in the global project schema_override = config.config.get('schema') get_schema = context.get('generate_schema_name', lambda x: default_schema) parsed_node.schema = get_schema(schema_override) parsed_node.alias = config.config.get('alias', default_alias) # Overwrite node config config_dict = parsed_node.get('config', {}) config_dict.update(config.config) parsed_node.config = config_dict for hook_type in dbt.hooks.ModelHookType.Both: parsed_node.config[hook_type] = dbt.hooks.get_hooks( parsed_node, hook_type) parsed_node.validate() return parsed_node
def parse_node(node, node_path, root_project_config, package_project_config, all_projects, tags=None, fqn_extra=None, fqn=None, macros=None): logger.debug("Parsing {}".format(node_path)) node = copy.deepcopy(node) tags = coalesce(tags, set()) fqn_extra = coalesce(fqn_extra, []) macros = coalesce(macros, {}) node.update({ 'refs': [], 'depends_on': { 'nodes': [], 'macros': [], } }) if fqn is None: fqn = get_fqn(node.get('path'), package_project_config, fqn_extra) config = dbt.model.SourceConfig(root_project_config, package_project_config, fqn) node['unique_id'] = node_path node['empty'] = (len(node.get('raw_sql').strip()) == 0) node['fqn'] = fqn node['tags'] = tags node['config_reference'] = config # Set this temporarily. Not the full config yet (as config() hasn't been # called from jinja yet). But the Var() call below needs info about project # level configs b/c they might contain refs. TODO: Restructure this? config_dict = node.get('config', {}) config_dict.update(config.config) node['config'] = config_dict # Set this temporarily so get_rendered() below has access to a schema profile = dbt.utils.get_profile_from_project(root_project_config) default_schema = profile.get('schema', 'public') node['schema'] = default_schema context = dbt.context.parser.generate(node, root_project_config, {"macros": macros}) dbt.clients.jinja.get_rendered(node.get('raw_sql'), context, node, capture_macros=True) # Clean up any open connections opened by adapter functions that hit the db db_wrapper = context['adapter'] adapter = db_wrapper.adapter profile = db_wrapper.profile adapter.release_connection(profile, node.get('name')) # Special macro defined in the global project schema_override = config.config.get('schema') get_schema = context.get('generate_schema_name', lambda x: default_schema) node['schema'] = get_schema(schema_override) # Overwrite node config config_dict = node.get('config', {}) config_dict.update(config.config) node['config'] = config_dict for hook_type in dbt.hooks.ModelHookType.Both: node['config'][hook_type] = dbt.hooks.get_hooks(node, hook_type) del node['config_reference'] return node