def _pluralize(string: Union[str, NodeType]) -> str: try: convert = NodeType(string) except ValueError: return f'{string}s' else: return convert.pluralize()
def merge_from_artifact( self, adapter, other: 'WritableManifest', selected: AbstractSet[UniqueID], ) -> None: """Given the selected unique IDs and a writable manifest, update this manifest by replacing any unselected nodes with their counterpart. Only non-ephemeral refable nodes are examined. """ refables = set(NodeType.refable()) merged = set() for unique_id, node in other.nodes.items(): current = self.nodes.get(unique_id) if current and ( node.resource_type in refables and not node.is_ephemeral and unique_id not in selected and not adapter.get_relation( current.database, current.schema, current.identifier)): merged.add(unique_id) self.nodes[unique_id] = node.replace(deferred=True) # log up to 5 items sample = list(islice(merged, 5)) logger.debug( f'Merged {len(merged)} items from state (sample: {sample})')
def _check_resource_uniqueness(manifest): names_resources = {} alias_resources = {} for resource, node in manifest.nodes.items(): if node.resource_type not in NodeType.refable(): continue name = node.name alias = "{}.{}".format(node.schema, node.alias) existing_node = names_resources.get(name) if existing_node is not None: dbt.exceptions.raise_duplicate_resource_name( existing_node, node ) existing_alias = alias_resources.get(alias) if existing_alias is not None: dbt.exceptions.raise_ambiguous_alias( existing_alias, node ) names_resources[name] = node alias_resources[alias] = node
def parse_file(self, block: FileBlock) -> None: dct = self._yaml_from_file(block.file) # mark the file as seen, even if there are no macros in it self.results.get_file(block.file) if dct: try: dct = self.raw_renderer.render_data(dct) except CompilationException as exc: raise CompilationException( f'Failed to render {block.path.original_file_path} from ' f'project {self.project.project_name}: {exc}') from exc yaml_block = YamlBlock.from_file_block(block, dct) self._parse_format_version(yaml_block) parser: YamlDocsReader for key in NodeType.documentable(): plural = key.pluralize() if key == NodeType.Source: parser = SourceParser(self, yaml_block, plural) elif key == NodeType.Macro: parser = MacroPatchParser(self, yaml_block, plural) elif key == NodeType.Analysis: parser = AnalysisPatchParser(self, yaml_block, plural) else: parser = TestablePatchParser(self, yaml_block, plural) for test_block in parser.parse(): self.parse_tests(test_block)
def build_query(self): return { "include": self.args.models, "exclude": self.args.exclude, "resource_types": NodeType.executable(), "tags": [], }
def _check_resource_uniqueness( manifest: Manifest, config: RuntimeConfig, ) -> None: names_resources: Dict[str, ManifestNode] = {} alias_resources: Dict[str, ManifestNode] = {} for resource, node in manifest.nodes.items(): if node.resource_type not in NodeType.refable(): continue # appease mypy - sources aren't refable! assert not isinstance(node, ParsedSourceDefinition) name = node.name # the full node name is really defined by the adapter's relation relation_cls = get_relation_class_by_name(config.credentials.type) relation = relation_cls.create_from(config=config, node=node) full_node_name = str(relation) existing_node = names_resources.get(name) if existing_node is not None: dbt.exceptions.raise_duplicate_resource_name(existing_node, node) existing_alias = alias_resources.get(full_node_name) if existing_alias is not None: dbt.exceptions.raise_ambiguous_alias(existing_alias, node, full_node_name) names_resources[name] = node alias_resources[full_node_name] = node
def _get_relation_name(self, node: ParsedNode): relation_name = None if (node.resource_type in NodeType.refable() and not node.is_ephemeral_model): adapter = get_adapter(self.config) relation_cls = adapter.Relation relation_name = str(relation_cls.create_from(self.config, node)) return relation_name
def find_disabled_by_name( self, name: str, package: Optional[str] = None) -> Optional[ManifestNode]: searcher: NameSearcher = NameSearcher(name, package, NodeType.refable()) result = searcher.search(self.disabled) return result
class SchemaYamlRenderer(BaseRenderer): DOCUMENTABLE_NODES = frozenset(n.pluralize() for n in NodeType.documentable()) @property def name(self): return 'Rendering yaml' def _is_norender_key(self, keypath: Keypath) -> bool: """ models: - name: blah - description: blah tests: ... - columns: - name: - description: blah tests: ... Return True if it's tests or description - those aren't rendered """ if len(keypath) >= 2 and keypath[1] in ('tests', 'description'): return True if (len(keypath) >= 4 and keypath[1] == 'columns' and keypath[3] in ('tests', 'description')): return True return False # don't render descriptions or test keyword arguments def should_render_keypath(self, keypath: Keypath) -> bool: if len(keypath) < 2: return True if keypath[0] not in self.DOCUMENTABLE_NODES: return True if len(keypath) < 3: return True if keypath[0] == NodeType.Source.pluralize(): if keypath[2] == 'description': return False if keypath[2] == 'tables': if self._is_norender_key(keypath[3:]): return False elif keypath[0] == NodeType.Macro.pluralize(): if keypath[2] == 'arguments': if self._is_norender_key(keypath[3:]): return False elif self._is_norender_key(keypath[1:]): return False else: # keypath[0] in self.DOCUMENTABLE_NODES: if self._is_norender_key(keypath[1:]): return False return True
def _get_cache_schemas(self, manifest: Manifest) -> Set[BaseRelation]: """Get the set of schema relations that the cache logic needs to populate. This means only executable nodes are included. """ # the cache only cares about executable nodes return { self.Relation.create_from(self.config, node).without_identifier() for node in manifest.nodes.values() if node.resource_type in NodeType.executable() }
def get_node_selector(self) -> ResourceTypeSelector: if self.manifest is None or self.graph is None: raise InternalException( 'manifest and graph must be set to get perform node selection') return ResourceTypeSelector( graph=self.graph, manifest=self.manifest, previous_state=self.previous_state, resource_types=NodeType.executable(), )
def search(self, included_nodes: Set[UniqueId], selector: str) -> Iterator[UniqueId]: try: resource_type = NodeType(selector) except ValueError as exc: raise RuntimeException( f'Invalid resource_type selector "{selector}"') from exc for node, real_node in self.parsed_nodes(included_nodes): if real_node.resource_type == resource_type: yield node
def add_nodes(self, new_nodes: Mapping[str, ManifestNode]): """Add the given dict of new nodes to the manifest.""" for unique_id, node in new_nodes.items(): if unique_id in self.nodes: raise_duplicate_resource_name(node, self.nodes[unique_id]) self.nodes[unique_id] = node # fixup the cache if it exists. if self._refs_cache is not None: if node.resource_type in NodeType.refable(): self._refs_cache.add_node(node)
def run(self): query = { "include": self.args.models, "exclude": self.args.exclude, "resource_types": NodeType.executable(), "tags": [], } results = RunManager(self.config, query, CompileRunner).run() dbt.ui.printer.print_timestamped_line('Done.') return results
def _get_cache_schemas(self, manifest, exec_only=False): info_schema_name_map = SchemaSearchMap() for node in manifest.nodes.values(): if exec_only and node.resource_type not in NodeType.executable(): continue relation = self.Relation.create( database=node.database, schema=node.schema, identifier='information_schema', quote_policy=self.config.quoting, ) key = relation.information_schema_only() info_schema_name_map[key] = {node.schema} return info_schema_name_map
def run(self): runner = RunManager(self.project, self.project['target-path'], self.args) query = { "include": self.args.models, "exclude": self.args.exclude, "resource_types": NodeType.executable(), "tags": set() } results = runner.run(query, CompileRunner) dbt.ui.printer.print_timestamped_line('Done.') return results
def _get_cache_schemas(self, manifest, exec_only=False): """Get a mapping of each node's "information_schema" relations to a set of all schemas expected in that information_schema. There may be keys that are technically duplicates on the database side, for example all of '"foo", 'foo', '"FOO"' and 'FOO' could coexist as databases, and values could overlap as appropriate. All values are lowercase strings. """ info_schema_name_map = SchemaSearchMap() for node in manifest.nodes.values(): if exec_only and node.resource_type not in NodeType.executable(): continue relation = self.Relation.create_from(self.config, node) info_schema_name_map.add(relation) # result is a map whose keys are information_schema Relations without # identifiers that have appropriate database prefixes, and whose values # are sets of lowercase schema names that are valid members of those # schemas return info_schema_name_map
def raise_duplicate_resource_name(node_1, node_2): duped_name = node_1.name if node_1.resource_type in NodeType.refable(): get_func = 'ref("{}")'.format(duped_name) elif node_1.resource_type == NodeType.Source: duped_name = node_1.get_full_source_name() get_func = node_1.get_source_representation() elif node_1.resource_type == NodeType.Documentation: get_func = 'doc("{}")'.format(duped_name) elif node_1.resource_type == NodeType.Test and 'schema' in node_1.tags: return else: get_func = '"{}"'.format(duped_name) raise_compiler_error( 'dbt found two resources with the name "{}". Since these resources ' 'have the same name,\ndbt will be unable to find the correct resource ' 'when {} is used. To fix this,\nchange the name of one of ' 'these resources:\n- {} ({})\n- {} ({})'.format( duped_name, get_func, node_1.unique_id, node_1.original_file_path, node_2.unique_id, node_2.original_file_path))
def _check_resource_uniqueness(manifest): names_resources = {} alias_resources = {} for resource, node in manifest.nodes.items(): if node.resource_type not in NodeType.refable(): continue name = node.name alias = "{}.{}".format(node.schema, node.alias) existing_node = names_resources.get(name) if existing_node is not None: dbt.exceptions.raise_duplicate_resource_name( existing_node, node) existing_alias = alias_resources.get(alias) if existing_alias is not None: dbt.exceptions.raise_ambiguous_alias( existing_alias, node) names_resources[name] = node alias_resources[alias] = node
def find_refable_by_name(self, name, package): """Find any valid target for "ref()" in the graph by its name and package name, or None for any package. """ return self._find_by_name(name, package, 'nodes', NodeType.refable())
def is_refable(self): return self.resource_type in NodeType.refable()
def run(self): # Look up all of the relations in the DB adapter = dbt.adapters.factory.get_adapter(self.config) manifest = self._get_manifest() schemas = set() model_relations = set() # Look up all of the relations dbt knows about for node in manifest.nodes.values(): if node["resource_type"] != "source": schema_info = (node["database"], node["schema"]) schemas.update([schema_info]) node = node.to_dict() is_refable = (node["resource_type"] in NodeType.refable() or node["resource_type"] == "archive") is_enabled = check_is_enabled(node) is_ephemeral = node["config"]["materialized"] == "ephemeral" if is_refable and is_enabled and not is_ephemeral: rel = (node["schema"].lower(), node["alias"].lower()) model_relations.add(rel) db_relations = [] for database_name, schema_name in schemas: db_relations.extend( adapter.list_relations(database_name, schema_name)) database_relations = set() database_relations_map = dict() for relation in db_relations: relation_id = (relation.schema.lower(), relation.identifier.lower()) database_relations_map[relation_id] = relation database_relations.add(relation_id) logger.info("Comparing local models to the database catalog. " "Checking schemas:") for database_name, schema_name in schemas: logger.info("- {}".format(schema_name)) problems = database_relations - model_relations if len(problems) == 0: logger.info( dbt.ui.printer.green( "All clear! There are no relations in the checked schemas in the database" "that are not defined in dbt models.")) else: logger.info( dbt.ui.printer.yellow( "Warning: The following relations do not match any models " "found in this project:")) problem_relation_list = [] # Get a list of relations to return for relation_id in problems: relation = database_relations_map[relation_id] problem_relation_list.append(relation) logger.info("{} {}".format(relation.type.upper(), relation)) # TODO: Fix this so that it doesn't break when type is None # logger.info("{} {}".format(relation.type, relation)) return problem_relation_list
def is_refable(cls, node): return node.get('resource_type') in NodeType.refable()
def find_refable_by_name(flat_graph, target_name, target_package): return find_by_name(flat_graph, target_name, target_package, 'nodes', NodeType.refable())
def find_disabled_by_name(self, name, package=None): return dbt.utils.find_in_list_by_name(self.disabled, name, package, NodeType.refable())
def __init__(self, manifest: 'Manifest'): self._cached_types = set(NodeType.refable()) super().__init__(manifest)