Exemple #1
0
 def transform(self, node: IntermediateSnapshotNode) -> ParsedSnapshotNode:
     try:
         parsed_node = ParsedSnapshotNode.from_dict(node.to_dict())
         self.set_snapshot_attributes(parsed_node)
         return parsed_node
     except ValidationError as exc:
         raise CompilationException(validator_error_message(exc), node)
Exemple #2
0
 def validate(self):
     try:
         if self.credentials:
             self.credentials.to_dict(validate=True)
         ProfileConfig.from_dict(
             self.to_profile_info(serialize_credentials=True))
     except ValidationError as exc:
         raise DbtProfileError(validator_error_message(exc)) from exc
Exemple #3
0
    def validate(self):
        """Validate the configuration against its contract.

        :raises DbtProjectError: If the configuration fails validation.
        """
        try:
            Configuration.validate(self.serialize())
        except ValidationError as e:
            raise DbtProjectError(validator_error_message(e)) from e
Exemple #4
0
 def render_update(self, node: IntermediateNode,
                   config: ContextConfig) -> None:
     try:
         self.render_with_context(node, config)
         self.update_parsed_node(node, config)
     except ValidationError as exc:
         # we got a ValidationError - probably bad types in config()
         msg = validator_error_message(exc)
         raise CompilationException(msg, node=node) from exc
Exemple #5
0
 def validate(self):
     try:
         if self.credentials:
             dct = self.credentials.to_dict(omit_none=True)
             self.credentials.validate(dct)
         dct = self.to_profile_info(serialize_credentials=True)
         ProfileConfig.validate(dct)
     except ValidationError as exc:
         raise DbtProfileError(validator_error_message(exc)) from exc
Exemple #6
0
def package_config_from_data(packages_data):
    if packages_data is None:
        packages_data = {'packages': []}

    try:
        packages = PackageConfig.from_dict(packages_data)
    except ValidationError as e:
        raise DbtProjectError(
            'Invalid package config: {}'.format(validator_error_message(e))
        ) from e
    return packages
Exemple #7
0
    def validate(self):
        """Validate the configuration against its contract.

        :raises DbtProjectError: If the configuration fails validation.
        """
        try:
            Configuration.from_dict(self.serialize())
        except ValidationError as e:
            raise DbtProjectError(validator_error_message(e)) from e

        if getattr(self.args, 'version_check', False):
            self.validate_version()
Exemple #8
0
 def _yaml_from_file(self,
                     source_file: SourceFile) -> Optional[Dict[str, Any]]:
     """If loading the yaml fails, raise an exception.
     """
     path: str = source_file.path.relative_path
     try:
         return load_yaml_text(source_file.contents)
     except ValidationException as e:
         reason = validator_error_message(e)
         raise CompilationException('Error reading {}: {} - {}'.format(
             self.project.project_name, path, reason))
     return None
Exemple #9
0
def error_context(
        path: str, key: str, data: Any,
        cause: Union[str, ValidationException,
                     JSONValidationException]) -> str:
    """Provide contextual information about an error while parsing
    """
    if isinstance(cause, str):
        reason = cause
    elif isinstance(cause, ValidationError):
        reason = validator_error_message(cause)
    else:
        reason = cause.msg
    return ('Invalid {key} config given in {path} @ {key}: {data} - {reason}'.
            format(key=key, path=path, data=data, reason=reason))
Exemple #10
0
    def create_test_node(
        self,
        target: Union[UnpatchedSourceDefinition, UnparsedNodeUpdate],
        path: str,
        config: ContextConfig,
        tags: List[str],
        fqn: List[str],
        name: str,
        raw_sql: str,
        test_metadata: Dict[str, Any],
        column_name: Optional[str],
    ) -> ParsedSchemaTestNode:

        dct = {
            'alias': name,
            'schema': self.default_schema,
            'database': self.default_database,
            'fqn': fqn,
            'name': name,
            'root_path': self.project.project_root,
            'resource_type': self.resource_type,
            'tags': tags,
            'path': path,
            'original_file_path': target.original_file_path,
            'package_name': self.project.project_name,
            'raw_sql': raw_sql,
            'unique_id': self.generate_unique_id(name),
            'config': self.config_dict(config),
            'test_metadata': test_metadata,
            'column_name': column_name,
            'checksum': FileHash.empty().to_dict(omit_none=True),
        }
        try:
            ParsedSchemaTestNode.validate(dct)
            return ParsedSchemaTestNode.from_dict(dct)
        except ValidationError as exc:
            msg = validator_error_message(exc)
            # this is a bit silly, but build an UnparsedNode just for error
            # message reasons
            node = self._create_error_node(
                name=target.name,
                path=path,
                original_file_path=target.original_file_path,
                raw_sql=raw_sql,
            )
            raise CompilationException(msg, node=node) from exc
Exemple #11
0
 def _create_parsetime_node(
     self,
     block: ConfiguredBlockType,
     path: str,
     config: ContextConfig,
     fqn: List[str],
     name=None,
     **kwargs,
 ) -> IntermediateNode:
     """Create the node that will be passed in to the parser context for
     "rendering". Some information may be partial, as it'll be updated by
     config() and any ref()/source() calls discovered during rendering.
     """
     if name is None:
         name = block.name
     dct = {
         'alias': name,
         'schema': self.default_schema,
         'database': self.default_database,
         'fqn': fqn,
         'name': name,
         'root_path': self.project.project_root,
         'resource_type': self.resource_type,
         'path': path,
         'original_file_path': block.path.original_file_path,
         'package_name': self.project.project_name,
         'raw_sql': block.contents,
         'unique_id': self.generate_unique_id(name),
         'config': self.config_dict(config),
         'checksum': block.file.checksum.to_dict(omit_none=True),
     }
     dct.update(kwargs)
     try:
         return self.parse_from_dict(dct, validate=True)
     except ValidationError as exc:
         msg = validator_error_message(exc)
         # this is a bit silly, but build an UnparsedNode just for error
         # message reasons
         node = self._create_error_node(
             name=block.name,
             path=path,
             original_file_path=block.path.original_file_path,
             raw_sql=block.contents,
         )
         raise CompilationException(msg, node=node)
Exemple #12
0
 def render_test_update(self, node, config, builder):
     macro_unique_id = self.macro_resolver.get_macro_id(
         node.package_name, 'test_' + builder.name)
     # Add the depends_on here so we can limit the macros added
     # to the context in rendering processing
     node.depends_on.add_macro(macro_unique_id)
     if (macro_unique_id
             in ['macro.dbt.test_not_null', 'macro.dbt.test_unique']):
         self.update_parsed_node(node, config)
         if builder.severity() is not None:
             node.unrendered_config['severity'] = builder.severity()
             node.config['severity'] = builder.severity()
         if builder.enabled() is not None:
             node.config['enabled'] = builder.enabled()
         # source node tests are processed at patch_source time
         if isinstance(builder.target, UnpatchedSourceDefinition):
             sources = [builder.target.fqn[-2], builder.target.fqn[-1]]
             node.sources.append(sources)
         else:  # all other nodes
             node.refs.append([builder.target.name])
     else:
         try:
             # make a base context that doesn't have the magic kwargs field
             context = generate_test_context(
                 node,
                 self.root_project,
                 self.manifest,
                 config,
                 self.macro_resolver,
             )
             # update with rendered test kwargs (which collects any refs)
             add_rendered_test_kwargs(context, node, capture_macros=True)
             # the parsed node is not rendered in the native context.
             get_rendered(node.raw_sql, context, node, capture_macros=True)
             self.update_parsed_node(node, config)
         except ValidationError as exc:
             # we got a ValidationError - probably bad types in config()
             msg = validator_error_message(exc)
             raise CompilationException(msg, node=node) from exc
Exemple #13
0
 def validate(self):
     try:
         ProjectContract.from_dict(self.to_project_config())
     except ValidationError as e:
         raise DbtProjectError(validator_error_message(e)) from e
Exemple #14
0
    def from_project_config(cls, project_dict, packages_dict=None):
        """Create a project from its project and package configuration, as read
        by yaml.safe_load().

        :param project_dict dict: The dictionary as read from disk
        :param packages_dict Optional[dict]: If it exists, the packages file as
            read from disk.
        :raises DbtProjectError: If the project is missing or invalid, or if
            the packages file exists and is invalid.
        :returns Project: The project, with defaults populated.
        """
        try:
            project_dict = cls._preprocess(project_dict)
        except RecursionException:
            raise DbtProjectError(
                'Cycle detected: Project input has a reference to itself',
                project=project_dict
            )
        # just for validation.
        try:
            ProjectContract.from_dict(project_dict)
        except ValidationError as e:
            raise DbtProjectError(validator_error_message(e)) from e

        # name/version are required in the Project definition, so we can assume
        # they are present
        name = project_dict['name']
        version = project_dict['version']
        # this is added at project_dict parse time and should always be here
        # once we see it.
        project_root = project_dict['project-root']
        # this is only optional in the sense that if it's not present, it needs
        # to have been a cli argument.
        profile_name = project_dict.get('profile')
        # these are optional
        source_paths = project_dict.get('source-paths', ['models'])
        macro_paths = project_dict.get('macro-paths', ['macros'])
        data_paths = project_dict.get('data-paths', ['data'])
        test_paths = project_dict.get('test-paths', ['test'])
        analysis_paths = project_dict.get('analysis-paths', [])
        docs_paths = project_dict.get('docs-paths', source_paths[:])
        target_path = project_dict.get('target-path', 'target')
        snapshot_paths = project_dict.get('snapshot-paths', ['snapshots'])
        # should this also include the modules path by default?
        clean_targets = project_dict.get('clean-targets', [target_path])
        log_path = project_dict.get('log-path', 'logs')
        modules_path = project_dict.get('modules-path', 'dbt_modules')
        # in the default case we'll populate this once we know the adapter type
        quoting = project_dict.get('quoting', {})

        models = project_dict.get('models', {})
        on_run_start = project_dict.get('on-run-start', [])
        on_run_end = project_dict.get('on-run-end', [])
        seeds = project_dict.get('seeds', {})
        snapshots = project_dict.get('snapshots', {})
        dbt_raw_version = project_dict.get('require-dbt-version', '>=0.0.0')
        query_comment = project_dict.get('query-comment', NoValue())

        try:
            dbt_version = _parse_versions(dbt_raw_version)
        except SemverException as e:
            raise DbtProjectError(str(e)) from e

        try:
            packages = package_config_from_data(packages_dict)
        except ValidationError as e:
            raise DbtProjectError(validator_error_message(e)) from e

        project = cls(
            project_name=name,
            version=version,
            project_root=project_root,
            profile_name=profile_name,
            source_paths=source_paths,
            macro_paths=macro_paths,
            data_paths=data_paths,
            test_paths=test_paths,
            analysis_paths=analysis_paths,
            docs_paths=docs_paths,
            target_path=target_path,
            snapshot_paths=snapshot_paths,
            clean_targets=clean_targets,
            log_path=log_path,
            modules_path=modules_path,
            quoting=quoting,
            models=models,
            on_run_start=on_run_start,
            on_run_end=on_run_end,
            seeds=seeds,
            snapshots=snapshots,
            dbt_version=dbt_version,
            packages=packages,
            query_comment=query_comment,
        )
        # sanity check - this means an internal issue
        project.validate()
        return project
Exemple #15
0
    def create_project(self, rendered: RenderComponents) -> 'Project':
        unrendered = RenderComponents(
            project_dict=self.project_dict,
            packages_dict=self.packages_dict,
            selectors_dict=self.selectors_dict,
        )
        dbt_version = _get_required_version(
            rendered.project_dict,
            verify_version=self.verify_version,
        )

        try:
            ProjectContract.validate(rendered.project_dict)
            cfg = ProjectContract.from_dict(rendered.project_dict)
        except ValidationError as e:
            raise DbtProjectError(validator_error_message(e)) from e
        # name/version are required in the Project definition, so we can assume
        # they are present
        name = cfg.name
        version = cfg.version
        # this is added at project_dict parse time and should always be here
        # once we see it.
        if cfg.project_root is None:
            raise DbtProjectError('cfg must have a project root!')
        else:
            project_root = cfg.project_root
        # this is only optional in the sense that if it's not present, it needs
        # to have been a cli argument.
        profile_name = cfg.profile
        # these are all the defaults
        source_paths: List[str] = value_or(cfg.source_paths, ['models'])
        macro_paths: List[str] = value_or(cfg.macro_paths, ['macros'])
        data_paths: List[str] = value_or(cfg.data_paths, ['data'])
        test_paths: List[str] = value_or(cfg.test_paths, ['test'])
        analysis_paths: List[str] = value_or(cfg.analysis_paths, [])
        snapshot_paths: List[str] = value_or(cfg.snapshot_paths, ['snapshots'])

        all_source_paths: List[str] = _all_source_paths(
            source_paths, data_paths, snapshot_paths, analysis_paths,
            macro_paths)

        docs_paths: List[str] = value_or(cfg.docs_paths, all_source_paths)
        asset_paths: List[str] = value_or(cfg.asset_paths, [])
        target_path: str = value_or(cfg.target_path, 'target')
        clean_targets: List[str] = value_or(cfg.clean_targets, [target_path])
        log_path: str = value_or(cfg.log_path, 'logs')
        modules_path: str = value_or(cfg.modules_path, 'dbt_modules')
        # in the default case we'll populate this once we know the adapter type
        # It would be nice to just pass along a Quoting here, but that would
        # break many things
        quoting: Dict[str, Any] = {}
        if cfg.quoting is not None:
            quoting = cfg.quoting.to_dict(omit_none=True)

        models: Dict[str, Any]
        seeds: Dict[str, Any]
        snapshots: Dict[str, Any]
        sources: Dict[str, Any]
        vars_value: VarProvider

        models = cfg.models
        seeds = cfg.seeds
        snapshots = cfg.snapshots
        sources = cfg.sources
        if cfg.vars is None:
            vars_dict: Dict[str, Any] = {}
        else:
            vars_dict = cfg.vars

        vars_value = VarProvider(vars_dict)
        on_run_start: List[str] = value_or(cfg.on_run_start, [])
        on_run_end: List[str] = value_or(cfg.on_run_end, [])

        query_comment = _query_comment_from_cfg(cfg.query_comment)

        packages = package_config_from_data(rendered.packages_dict)
        selectors = selector_config_from_data(rendered.selectors_dict)
        manifest_selectors: Dict[str, Any] = {}
        if rendered.selectors_dict and rendered.selectors_dict['selectors']:
            # this is a dict with a single key 'selectors' pointing to a list
            # of dicts.
            manifest_selectors = SelectorDict.parse_from_selectors_list(
                rendered.selectors_dict['selectors'])

        project = Project(
            project_name=name,
            version=version,
            project_root=project_root,
            profile_name=profile_name,
            source_paths=source_paths,
            macro_paths=macro_paths,
            data_paths=data_paths,
            test_paths=test_paths,
            analysis_paths=analysis_paths,
            docs_paths=docs_paths,
            asset_paths=asset_paths,
            target_path=target_path,
            snapshot_paths=snapshot_paths,
            clean_targets=clean_targets,
            log_path=log_path,
            modules_path=modules_path,
            quoting=quoting,
            models=models,
            on_run_start=on_run_start,
            on_run_end=on_run_end,
            seeds=seeds,
            snapshots=snapshots,
            dbt_version=dbt_version,
            packages=packages,
            manifest_selectors=manifest_selectors,
            selectors=selectors,
            query_comment=query_comment,
            sources=sources,
            vars=vars_value,
            config_version=cfg.config_version,
            unrendered=unrendered,
        )
        # sanity check - this means an internal issue
        project.validate()
        return project
Exemple #16
0
    def from_project_config(
        cls,
        project_dict: Dict[str, Any],
        packages_dict: Optional[Dict[str, Any]] = None,
        selectors_dict: Optional[Dict[str, Any]] = None,
        required_dbt_version: Optional[List[VersionSpecifier]] = None,
    ) -> 'Project':
        """Create a project from its project and package configuration, as read
        by yaml.safe_load().

        :param project_dict: The dictionary as read from disk
        :param packages_dict: If it exists, the packages file as
            read from disk.
        :raises DbtProjectError: If the project is missing or invalid, or if
            the packages file exists and is invalid.
        :returns: The project, with defaults populated.
        """
        if required_dbt_version is None:
            dbt_version = cls._get_required_version(project_dict)
        else:
            dbt_version = required_dbt_version

        try:
            project_dict = cls._preprocess(project_dict)
        except RecursionException:
            raise DbtProjectError(
                'Cycle detected: Project input has a reference to itself',
                project=project_dict
            )
        try:
            cfg = parse_project_config(project_dict)
        except ValidationError as e:
            raise DbtProjectError(validator_error_message(e)) from e

        # name/version are required in the Project definition, so we can assume
        # they are present
        name = cfg.name
        version = cfg.version
        # this is added at project_dict parse time and should always be here
        # once we see it.
        if cfg.project_root is None:
            raise DbtProjectError('cfg must have a project root!')
        else:
            project_root = cfg.project_root
        # this is only optional in the sense that if it's not present, it needs
        # to have been a cli argument.
        profile_name = cfg.profile
        # these are all the defaults
        source_paths: List[str] = value_or(cfg.source_paths, ['models'])
        macro_paths: List[str] = value_or(cfg.macro_paths, ['macros'])
        data_paths: List[str] = value_or(cfg.data_paths, ['data'])
        test_paths: List[str] = value_or(cfg.test_paths, ['test'])
        analysis_paths: List[str] = value_or(cfg.analysis_paths, [])
        snapshot_paths: List[str] = value_or(cfg.snapshot_paths, ['snapshots'])

        all_source_paths: List[str] = _all_source_paths(
            source_paths, data_paths, snapshot_paths, analysis_paths,
            macro_paths
        )

        docs_paths: List[str] = value_or(cfg.docs_paths, all_source_paths)
        asset_paths: List[str] = value_or(cfg.asset_paths, [])
        target_path: str = value_or(cfg.target_path, 'target')
        clean_targets: List[str] = value_or(cfg.clean_targets, [target_path])
        log_path: str = value_or(cfg.log_path, 'logs')
        modules_path: str = value_or(cfg.modules_path, 'dbt_modules')
        # in the default case we'll populate this once we know the adapter type
        # It would be nice to just pass along a Quoting here, but that would
        # break many things
        quoting: Dict[str, Any] = {}
        if cfg.quoting is not None:
            quoting = cfg.quoting.to_dict()

        models: Dict[str, Any]
        seeds: Dict[str, Any]
        snapshots: Dict[str, Any]
        sources: Dict[str, Any]
        vars_value: VarProvider

        if cfg.config_version == 1:
            assert isinstance(cfg, ProjectV1Contract)
            # extract everything named 'vars'
            models = cfg.models
            seeds = cfg.seeds
            snapshots = cfg.snapshots
            sources = {}
            vars_value = V1VarProvider(
                models=models, seeds=seeds, snapshots=snapshots
            )
        elif cfg.config_version == 2:
            assert isinstance(cfg, ProjectV2Contract)
            models = cfg.models
            seeds = cfg.seeds
            snapshots = cfg.snapshots
            sources = cfg.sources
            if cfg.vars is None:
                vars_dict: Dict[str, Any] = {}
            else:
                vars_dict = cfg.vars
            vars_value = V2VarProvider(vars_dict)
        else:
            raise ValidationError(
                f'Got unsupported config_version={cfg.config_version}'
            )

        on_run_start: List[str] = value_or(cfg.on_run_start, [])
        on_run_end: List[str] = value_or(cfg.on_run_end, [])

        query_comment = _query_comment_from_cfg(cfg.query_comment)

        try:
            packages = package_config_from_data(packages_dict)
        except ValidationError as e:
            raise DbtProjectError(validator_error_message(e)) from e

        try:
            selectors = selector_config_from_data(selectors_dict)
        except ValidationError as e:
            raise DbtProjectError(validator_error_message(e)) from e

        project = cls(
            project_name=name,
            version=version,
            project_root=project_root,
            profile_name=profile_name,
            source_paths=source_paths,
            macro_paths=macro_paths,
            data_paths=data_paths,
            test_paths=test_paths,
            analysis_paths=analysis_paths,
            docs_paths=docs_paths,
            asset_paths=asset_paths,
            target_path=target_path,
            snapshot_paths=snapshot_paths,
            clean_targets=clean_targets,
            log_path=log_path,
            modules_path=modules_path,
            quoting=quoting,
            models=models,
            on_run_start=on_run_start,
            on_run_end=on_run_end,
            seeds=seeds,
            snapshots=snapshots,
            dbt_version=dbt_version,
            packages=packages,
            selectors=selectors,
            query_comment=query_comment,
            sources=sources,
            vars=vars_value,
            config_version=cfg.config_version,
        )
        # sanity check - this means an internal issue
        project.validate()
        return project