Esempio n. 1
0
def _process_docs_for_source(
    context: Dict[str, Any],
    source: ParsedSourceDefinition,
):
    table_description = source.description
    source_description = source.source_description
    table_description = get_rendered(table_description, context)
    source_description = get_rendered(source_description, context)
    source.description = table_description
    source.source_description = source_description

    for column in source.columns.values():
        column_desc = column.description
        column_desc = get_rendered(column_desc, context)
        column.description = column_desc
Esempio n. 2
0
 def test__parse_basic_source(self):
     block = self.file_block_for(SINGLE_TABLE_SOURCE, 'test_one.yml')
     self.parser.parse_file(block)
     # self.parser.parse_yaml_sources(yaml_block=block)
     self.assert_has_results_length(self.parser.results, sources=1)
     src = list(self.parser.results.sources.values())[0]
     expected = ParsedSourceDefinition(
         package_name='snowplow',
         source_name='my_source',
         schema='my_source',
         name='my_table',
         loader='',
         freshness=FreshnessThreshold(),
         external=ExternalTable(),
         source_description='',
         identifier='my_table',
         fqn=['snowplow', 'my_source', 'my_table'],
         database='test',
         unique_id='source.snowplow.my_source.my_table',
         root_path=get_abs_os_path('./dbt_modules/snowplow'),
         path=normalize('models/test_one.yml'),
         original_file_path=normalize('models/test_one.yml'),
         resource_type=NodeType.Source,
     )
     self.assertEqual(src, expected)
Esempio n. 3
0
    def parse_source(
            self, target: UnpatchedSourceDefinition) -> ParsedSourceDefinition:
        source = target.source
        table = target.table
        refs = ParserRef.from_target(table)
        unique_id = target.unique_id
        description = table.description or ''
        meta = table.meta or {}
        source_description = source.description or ''
        loaded_at_field = table.loaded_at_field or source.loaded_at_field

        freshness = merge_freshness(source.freshness, table.freshness)
        quoting = source.quoting.merged(table.quoting)
        # path = block.path.original_file_path
        source_meta = source.meta or {}

        # make sure we don't do duplicate tags from source + table
        tags = sorted(set(itertools.chain(source.tags, table.tags)))

        config = self.config_generator.calculate_node_config(
            config_calls=[],
            fqn=target.fqn,
            resource_type=NodeType.Source,
            project_name=self.project.project_name,
            base=False,
        )
        if not isinstance(config, SourceConfig):
            raise InternalException(
                f'Calculated a {type(config)} for a source, but expected '
                f'a SourceConfig')

        default_database = self.root_project.credentials.database

        return ParsedSourceDefinition(
            package_name=target.package_name,
            database=(source.database or default_database),
            schema=(source.schema or source.name),
            identifier=(table.identifier or table.name),
            root_path=target.root_path,
            path=target.path,
            original_file_path=target.original_file_path,
            columns=refs.column_info,
            unique_id=unique_id,
            name=table.name,
            description=description,
            external=table.external,
            source_name=source.name,
            source_description=source_description,
            source_meta=source_meta,
            meta=meta,
            loader=source.loader,
            loaded_at_field=loaded_at_field,
            freshness=freshness,
            quoting=quoting,
            resource_type=NodeType.Source,
            fqn=target.fqn,
            tags=tags,
            config=config,
        )
Esempio n. 4
0
    def generate_source_node(self, source, table, path, package_name, root_dir,
                             refs):
        unique_id = self.get_path(NodeType.Source, package_name, source.name,
                                  table.name)

        context = {'doc': dbt.context.parser.docs(source, refs.docrefs)}
        description = table.get('description', '')
        source_description = source.get('description', '')
        get_rendered(description, context)
        get_rendered(source_description, context)

        freshness = dbt.utils.deep_merge(source.get('freshness', {}),
                                         table.get('freshness', {}))

        loaded_at_field = table.get('loaded_at_field',
                                    source.get('loaded_at_field'))

        # use 'or {}' to allow quoting: null
        source_quoting = source.get('quoting') or {}
        table_quoting = table.get('quoting') or {}
        quoting = dbt.utils.deep_merge(source_quoting, table_quoting)

        default_database = self.root_project_config.credentials.database
        return ParsedSourceDefinition(
            package_name=package_name,
            database=source.get('database', default_database),
            schema=source.get('schema', source.name),
            identifier=table.get('identifier', table.name),
            root_path=root_dir,
            path=path,
            original_file_path=path,
            columns=refs.column_info,
            unique_id=unique_id,
            name=table.name,
            description=description,
            source_name=source.name,
            source_description=source_description,
            loader=source.get('loader', ''),
            docrefs=refs.docrefs,
            loaded_at_field=loaded_at_field,
            freshness=freshness,
            quoting=quoting,
            resource_type=NodeType.Source,
            fqn=[package_name, source.name, table.name])
Esempio n. 5
0
    def generate_source_node(self, block: TargetBlock,
                             refs: ParserRef) -> ParsedSourceDefinition:
        assert isinstance(block.target, SourceTarget)
        source = block.target.source
        table = block.target.table
        unique_id = '.'.join([
            NodeType.Source, self.project.project_name, source.name, table.name
        ])
        description = table.description or ''
        source_description = source.description or ''
        collect_docrefs(source, refs, None, description, source_description)

        loaded_at_field = table.loaded_at_field or source.loaded_at_field

        freshness = self._calculate_freshness(source, table)
        quoting = source.quoting.merged(table.quoting)
        path = block.path.original_file_path

        return ParsedSourceDefinition(
            package_name=self.project.project_name,
            database=(source.database or self.default_database),
            schema=(source.schema or source.name),
            identifier=(table.identifier or table.name),
            root_path=self.project.project_root,
            path=path,
            original_file_path=path,
            columns=refs.column_info,
            unique_id=unique_id,
            name=table.name,
            description=description,
            external=table.external,
            source_name=source.name,
            source_description=source_description,
            loader=source.loader,
            docrefs=refs.docrefs,
            loaded_at_field=loaded_at_field,
            freshness=freshness,
            quoting=quoting,
            resource_type=NodeType.Source,
            fqn=[self.project.project_name, source.name, table.name],
        )
Esempio n. 6
0
def make_source(pkg,
                source_name,
                table_name,
                path=None,
                loader=None,
                identifier=None,
                fqn_extras=None):
    if path is None:
        path = 'models/schema.yml'
    if loader is None:
        loader = 'my_loader'
    if identifier is None:
        identifier = table_name

    if fqn_extras is None:
        fqn_extras = []

    fqn = [pkg] + fqn_extras + [source_name, table_name]

    return ParsedSourceDefinition(
        fqn=fqn,
        database='dbt',
        schema='dbt_schema',
        unique_id=f'source.{pkg}.{source_name}.{table_name}',
        package_name=pkg,
        root_path='/usr/dbt/some-project',
        path=path,
        original_file_path=path,
        name=table_name,
        source_name=source_name,
        loader='my_loader',
        identifier=identifier,
        resource_type=NodeType.Source,
        loaded_at_field='loaded_at',
        tags=[],
        source_description='',
    )
Esempio n. 7
0
def parse_manifest(
    manifest: dict[str, Any]
) -> tuple[dict[str, ParsedModelNode | CompiledModelNode] | None,
           dict[str, ParsedSeedNode | CompiledSeedNode] | None,
           dict[str, ParsedGenericTestNode | CompiledGenericTestNode] | None,
           dict[str, ParsedSourceDefinition] | None, ]:
    """
    Parse the manifest.

    Only V4 manifest is supported.

    Parameters
    ----------
    manifest : dict[str, Any]
        The raw manifest.

    Returns
    -------
    dbt manifest parsed into a tuple of dicts containing: models_nodes, seed_nodes, test_nodes and source_nodes.
    Any of them may be `None` if they were not in the manifest.

    Raises
    ------
    NotImplementedError :
        If the dbt schema is not equal to the V4 manifest

    Source
    ------
    https://docs.getdbt.com/reference/artifacts/manifest-json
    """
    if manifest.get('nodes') is not None:
        model_nodes = {
            node_name: CompiledModelNode(**node)
            if "compiled" in node.keys() else ParsedModelNode(**node)
            for node_name, node in manifest["nodes"].items()
            if node["resource_type"] == NodeType.Model
        }
        seed_nodes = {
            node_name: CompiledSeedNode(**node)
            if "compiled" in node.keys() else ParsedSeedNode(**node)
            for node_name, node in manifest["nodes"].items()
            if node["resource_type"] == NodeType.Seed
        }

        test_nodes = {}
        for node_name, node in manifest["nodes"].items():
            if node["resource_type"] == NodeType.Test:
                if "test_metadata" in node.keys():
                    if "compiled" in node.keys():
                        node = CompiledGenericTestNode(**node)
                    else:
                        node = ParsedGenericTestNode(**node)
                    test_nodes[node_name] = node
                else:
                    logger.info(f"Ignoring unsupported {node_name}")

    else:
        model_nodes = None
        seed_nodes = None
        test_nodes = None

    if manifest.get('sources') is not None:
        source_nodes: Optional[dict] = {
            source_name: ParsedSourceDefinition(**source)
            for source_name, source in manifest["sources"].items()
            if source['resource_type'] == NodeType.Source
        }
    else:
        source_nodes = None
    return model_nodes, seed_nodes, test_nodes, source_nodes