Exemple #1
0
def _pluralize(string: Union[str, NodeType]) -> str:
    try:
        convert = NodeType(string)
    except ValueError:
        return f'{string}s'
    else:
        return convert.pluralize()
Exemple #2
0
    def merge_from_artifact(
        self,
        adapter,
        other: 'WritableManifest',
        selected: AbstractSet[UniqueID],
    ) -> None:
        """Given the selected unique IDs and a writable manifest, update this
        manifest by replacing any unselected nodes with their counterpart.

        Only non-ephemeral refable nodes are examined.
        """
        refables = set(NodeType.refable())
        merged = set()
        for unique_id, node in other.nodes.items():
            current = self.nodes.get(unique_id)
            if current and (
                    node.resource_type in refables and not node.is_ephemeral
                    and unique_id not in selected and not adapter.get_relation(
                        current.database, current.schema, current.identifier)):
                merged.add(unique_id)
                self.nodes[unique_id] = node.replace(deferred=True)

        # log up to 5 items
        sample = list(islice(merged, 5))
        logger.debug(
            f'Merged {len(merged)} items from state (sample: {sample})')
Exemple #3
0
def _check_resource_uniqueness(manifest):
    names_resources = {}
    alias_resources = {}

    for resource, node in manifest.nodes.items():
        if node.resource_type not in NodeType.refable():
            continue

        name = node.name
        alias = "{}.{}".format(node.schema, node.alias)

        existing_node = names_resources.get(name)
        if existing_node is not None:
            dbt.exceptions.raise_duplicate_resource_name(
                existing_node, node
            )

        existing_alias = alias_resources.get(alias)
        if existing_alias is not None:
            dbt.exceptions.raise_ambiguous_alias(
                existing_alias, node
            )

        names_resources[name] = node
        alias_resources[alias] = node
Exemple #4
0
    def parse_file(self, block: FileBlock) -> None:
        dct = self._yaml_from_file(block.file)
        # mark the file as seen, even if there are no macros in it
        self.results.get_file(block.file)
        if dct:
            try:
                dct = self.raw_renderer.render_data(dct)
            except CompilationException as exc:
                raise CompilationException(
                    f'Failed to render {block.path.original_file_path} from '
                    f'project {self.project.project_name}: {exc}') from exc

            yaml_block = YamlBlock.from_file_block(block, dct)

            self._parse_format_version(yaml_block)

            parser: YamlDocsReader
            for key in NodeType.documentable():
                plural = key.pluralize()
                if key == NodeType.Source:
                    parser = SourceParser(self, yaml_block, plural)
                elif key == NodeType.Macro:
                    parser = MacroPatchParser(self, yaml_block, plural)
                elif key == NodeType.Analysis:
                    parser = AnalysisPatchParser(self, yaml_block, plural)
                else:
                    parser = TestablePatchParser(self, yaml_block, plural)
                for test_block in parser.parse():
                    self.parse_tests(test_block)
Exemple #5
0
 def build_query(self):
     return {
         "include": self.args.models,
         "exclude": self.args.exclude,
         "resource_types": NodeType.executable(),
         "tags": [],
     }
Exemple #6
0
def _check_resource_uniqueness(
    manifest: Manifest,
    config: RuntimeConfig,
) -> None:
    names_resources: Dict[str, ManifestNode] = {}
    alias_resources: Dict[str, ManifestNode] = {}

    for resource, node in manifest.nodes.items():
        if node.resource_type not in NodeType.refable():
            continue
        # appease mypy - sources aren't refable!
        assert not isinstance(node, ParsedSourceDefinition)

        name = node.name
        # the full node name is really defined by the adapter's relation
        relation_cls = get_relation_class_by_name(config.credentials.type)
        relation = relation_cls.create_from(config=config, node=node)
        full_node_name = str(relation)

        existing_node = names_resources.get(name)
        if existing_node is not None:
            dbt.exceptions.raise_duplicate_resource_name(existing_node, node)

        existing_alias = alias_resources.get(full_node_name)
        if existing_alias is not None:
            dbt.exceptions.raise_ambiguous_alias(existing_alias, node,
                                                 full_node_name)

        names_resources[name] = node
        alias_resources[full_node_name] = node
Exemple #7
0
 def _get_relation_name(self, node: ParsedNode):
     relation_name = None
     if (node.resource_type in NodeType.refable()
             and not node.is_ephemeral_model):
         adapter = get_adapter(self.config)
         relation_cls = adapter.Relation
         relation_name = str(relation_cls.create_from(self.config, node))
     return relation_name
Exemple #8
0
 def find_disabled_by_name(
         self,
         name: str,
         package: Optional[str] = None) -> Optional[ManifestNode]:
     searcher: NameSearcher = NameSearcher(name, package,
                                           NodeType.refable())
     result = searcher.search(self.disabled)
     return result
Exemple #9
0
class SchemaYamlRenderer(BaseRenderer):
    DOCUMENTABLE_NODES = frozenset(n.pluralize()
                                   for n in NodeType.documentable())

    @property
    def name(self):
        return 'Rendering yaml'

    def _is_norender_key(self, keypath: Keypath) -> bool:
        """
        models:
            - name: blah
            - description: blah
              tests: ...
            - columns:
                - name:
                - description: blah
                  tests: ...

        Return True if it's tests or description - those aren't rendered
        """
        if len(keypath) >= 2 and keypath[1] in ('tests', 'description'):
            return True

        if (len(keypath) >= 4 and keypath[1] == 'columns'
                and keypath[3] in ('tests', 'description')):
            return True

        return False

    # don't render descriptions or test keyword arguments
    def should_render_keypath(self, keypath: Keypath) -> bool:
        if len(keypath) < 2:
            return True

        if keypath[0] not in self.DOCUMENTABLE_NODES:
            return True

        if len(keypath) < 3:
            return True

        if keypath[0] == NodeType.Source.pluralize():
            if keypath[2] == 'description':
                return False
            if keypath[2] == 'tables':
                if self._is_norender_key(keypath[3:]):
                    return False
        elif keypath[0] == NodeType.Macro.pluralize():
            if keypath[2] == 'arguments':
                if self._is_norender_key(keypath[3:]):
                    return False
            elif self._is_norender_key(keypath[1:]):
                return False
        else:  # keypath[0] in self.DOCUMENTABLE_NODES:
            if self._is_norender_key(keypath[1:]):
                return False
        return True
Exemple #10
0
 def _get_cache_schemas(self, manifest: Manifest) -> Set[BaseRelation]:
     """Get the set of schema relations that the cache logic needs to
     populate. This means only executable nodes are included.
     """
     # the cache only cares about executable nodes
     return {
         self.Relation.create_from(self.config, node).without_identifier()
         for node in manifest.nodes.values()
         if node.resource_type in NodeType.executable()
     }
Exemple #11
0
 def get_node_selector(self) -> ResourceTypeSelector:
     if self.manifest is None or self.graph is None:
         raise InternalException(
             'manifest and graph must be set to get perform node selection')
     return ResourceTypeSelector(
         graph=self.graph,
         manifest=self.manifest,
         previous_state=self.previous_state,
         resource_types=NodeType.executable(),
     )
Exemple #12
0
 def search(self, included_nodes: Set[UniqueId],
            selector: str) -> Iterator[UniqueId]:
     try:
         resource_type = NodeType(selector)
     except ValueError as exc:
         raise RuntimeException(
             f'Invalid resource_type selector "{selector}"') from exc
     for node, real_node in self.parsed_nodes(included_nodes):
         if real_node.resource_type == resource_type:
             yield node
Exemple #13
0
 def add_nodes(self, new_nodes: Mapping[str, ManifestNode]):
     """Add the given dict of new nodes to the manifest."""
     for unique_id, node in new_nodes.items():
         if unique_id in self.nodes:
             raise_duplicate_resource_name(node, self.nodes[unique_id])
         self.nodes[unique_id] = node
         # fixup the cache if it exists.
         if self._refs_cache is not None:
             if node.resource_type in NodeType.refable():
                 self._refs_cache.add_node(node)
Exemple #14
0
    def run(self):

        query = {
            "include": self.args.models,
            "exclude": self.args.exclude,
            "resource_types": NodeType.executable(),
            "tags": [],
        }
        results = RunManager(self.config, query, CompileRunner).run()

        dbt.ui.printer.print_timestamped_line('Done.')

        return results
Exemple #15
0
 def _get_cache_schemas(self, manifest, exec_only=False):
     info_schema_name_map = SchemaSearchMap()
     for node in manifest.nodes.values():
         if exec_only and node.resource_type not in NodeType.executable():
             continue
         relation = self.Relation.create(
             database=node.database,
             schema=node.schema,
             identifier='information_schema',
             quote_policy=self.config.quoting,
         )
         key = relation.information_schema_only()
         info_schema_name_map[key] = {node.schema}
     return info_schema_name_map
Exemple #16
0
    def run(self):
        runner = RunManager(self.project, self.project['target-path'],
                            self.args)

        query = {
            "include": self.args.models,
            "exclude": self.args.exclude,
            "resource_types": NodeType.executable(),
            "tags": set()
        }

        results = runner.run(query, CompileRunner)

        dbt.ui.printer.print_timestamped_line('Done.')

        return results
Exemple #17
0
    def _get_cache_schemas(self, manifest, exec_only=False):
        """Get a mapping of each node's "information_schema" relations to a
        set of all schemas expected in that information_schema.

        There may be keys that are technically duplicates on the database side,
        for example all of '"foo", 'foo', '"FOO"' and 'FOO' could coexist as
        databases, and values could overlap as appropriate. All values are
        lowercase strings.
        """
        info_schema_name_map = SchemaSearchMap()
        for node in manifest.nodes.values():
            if exec_only and node.resource_type not in NodeType.executable():
                continue
            relation = self.Relation.create_from(self.config, node)
            info_schema_name_map.add(relation)
        # result is a map whose keys are information_schema Relations without
        # identifiers that have appropriate database prefixes, and whose values
        # are sets of lowercase schema names that are valid members of those
        # schemas
        return info_schema_name_map
Exemple #18
0
    def _get_cache_schemas(self, manifest, exec_only=False):
        """Get a mapping of each node's "information_schema" relations to a
        set of all schemas expected in that information_schema.

        There may be keys that are technically duplicates on the database side,
        for example all of '"foo", 'foo', '"FOO"' and 'FOO' could coexist as
        databases, and values could overlap as appropriate. All values are
        lowercase strings.
        """
        info_schema_name_map = SchemaSearchMap()
        for node in manifest.nodes.values():
            if exec_only and node.resource_type not in NodeType.executable():
                continue
            relation = self.Relation.create_from(self.config, node)
            info_schema_name_map.add(relation)
        # result is a map whose keys are information_schema Relations without
        # identifiers that have appropriate database prefixes, and whose values
        # are sets of lowercase schema names that are valid members of those
        # schemas
        return info_schema_name_map
Exemple #19
0
def raise_duplicate_resource_name(node_1, node_2):
    duped_name = node_1.name

    if node_1.resource_type in NodeType.refable():
        get_func = 'ref("{}")'.format(duped_name)
    elif node_1.resource_type == NodeType.Source:
        duped_name = node_1.get_full_source_name()
        get_func = node_1.get_source_representation()
    elif node_1.resource_type == NodeType.Documentation:
        get_func = 'doc("{}")'.format(duped_name)
    elif node_1.resource_type == NodeType.Test and 'schema' in node_1.tags:
        return
    else:
        get_func = '"{}"'.format(duped_name)

    raise_compiler_error(
        'dbt found two resources with the name "{}". Since these resources '
        'have the same name,\ndbt will be unable to find the correct resource '
        'when {} is used. To fix this,\nchange the name of one of '
        'these resources:\n- {} ({})\n- {} ({})'.format(
            duped_name, get_func, node_1.unique_id, node_1.original_file_path,
            node_2.unique_id, node_2.original_file_path))
Exemple #20
0
def _check_resource_uniqueness(manifest):
    names_resources = {}
    alias_resources = {}

    for resource, node in manifest.nodes.items():
        if node.resource_type not in NodeType.refable():
            continue

        name = node.name
        alias = "{}.{}".format(node.schema, node.alias)

        existing_node = names_resources.get(name)
        if existing_node is not None:
            dbt.exceptions.raise_duplicate_resource_name(
                    existing_node, node)

        existing_alias = alias_resources.get(alias)
        if existing_alias is not None:
            dbt.exceptions.raise_ambiguous_alias(
                    existing_alias, node)

        names_resources[name] = node
        alias_resources[alias] = node
Exemple #21
0
 def find_refable_by_name(self, name, package):
     """Find any valid target for "ref()" in the graph by its name and
     package name, or None for any package.
     """
     return self._find_by_name(name, package, 'nodes', NodeType.refable())
Exemple #22
0
 def is_refable(self):
     return self.resource_type in NodeType.refable()
Exemple #23
0
    def run(self):

        # Look up all of the relations in the DB
        adapter = dbt.adapters.factory.get_adapter(self.config)
        manifest = self._get_manifest()

        schemas = set()
        model_relations = set()
        # Look up all of the relations dbt knows about
        for node in manifest.nodes.values():
            if node["resource_type"] != "source":
                schema_info = (node["database"], node["schema"])
                schemas.update([schema_info])
                node = node.to_dict()
                is_refable = (node["resource_type"] in NodeType.refable()
                              or node["resource_type"] == "archive")
                is_enabled = check_is_enabled(node)
                is_ephemeral = node["config"]["materialized"] == "ephemeral"
                if is_refable and is_enabled and not is_ephemeral:
                    rel = (node["schema"].lower(), node["alias"].lower())
                    model_relations.add(rel)

        db_relations = []
        for database_name, schema_name in schemas:
            db_relations.extend(
                adapter.list_relations(database_name, schema_name))

        database_relations = set()
        database_relations_map = dict()
        for relation in db_relations:
            relation_id = (relation.schema.lower(),
                           relation.identifier.lower())
            database_relations_map[relation_id] = relation
            database_relations.add(relation_id)

        logger.info("Comparing local models to the database catalog. "
                    "Checking schemas:")
        for database_name, schema_name in schemas:
            logger.info("- {}".format(schema_name))

        problems = database_relations - model_relations

        if len(problems) == 0:
            logger.info(
                dbt.ui.printer.green(
                    "All clear! There are no relations in the checked schemas in the database"
                    "that are not defined in dbt models."))
        else:
            logger.info(
                dbt.ui.printer.yellow(
                    "Warning: The following relations do not match any models "
                    "found in this project:"))

        problem_relation_list = []  # Get a list of relations to return

        for relation_id in problems:
            relation = database_relations_map[relation_id]
            problem_relation_list.append(relation)
            logger.info("{} {}".format(relation.type.upper(), relation))
            # TODO: Fix this so that it doesn't break when type is None
            # logger.info("{} {}".format(relation.type, relation))

        return problem_relation_list
Exemple #24
0
 def is_refable(cls, node):
     return node.get('resource_type') in NodeType.refable()
Exemple #25
0
def find_refable_by_name(flat_graph, target_name, target_package):
    return find_by_name(flat_graph, target_name, target_package, 'nodes',
                        NodeType.refable())
Exemple #26
0
 def find_disabled_by_name(self, name, package=None):
     return dbt.utils.find_in_list_by_name(self.disabled, name, package,
                                           NodeType.refable())
Exemple #27
0
 def find_refable_by_name(self, name, package):
     """Find any valid target for "ref()" in the graph by its name and
     package name, or None for any package.
     """
     return self._find_by_name(name, package, 'nodes', NodeType.refable())
Exemple #28
0
 def __init__(self, manifest: 'Manifest'):
     self._cached_types = set(NodeType.refable())
     super().__init__(manifest)
Exemple #29
0
 def find_disabled_by_name(self, name, package=None):
     return dbt.utils.find_in_list_by_name(self.disabled, name, package,
                                           NodeType.refable())