def transform(self, node: IntermediateSnapshotNode) -> ParsedSnapshotNode: try: parsed_node = ParsedSnapshotNode.from_dict(node.to_dict()) self.set_snapshot_attributes(parsed_node) return parsed_node except ValidationError as exc: raise CompilationException(validator_error_message(exc), node)
def validate(self): try: if self.credentials: self.credentials.to_dict(validate=True) ProfileConfig.from_dict( self.to_profile_info(serialize_credentials=True)) except ValidationError as exc: raise DbtProfileError(validator_error_message(exc)) from exc
def validate(self): """Validate the configuration against its contract. :raises DbtProjectError: If the configuration fails validation. """ try: Configuration.validate(self.serialize()) except ValidationError as e: raise DbtProjectError(validator_error_message(e)) from e
def render_update(self, node: IntermediateNode, config: ContextConfig) -> None: try: self.render_with_context(node, config) self.update_parsed_node(node, config) except ValidationError as exc: # we got a ValidationError - probably bad types in config() msg = validator_error_message(exc) raise CompilationException(msg, node=node) from exc
def validate(self): try: if self.credentials: dct = self.credentials.to_dict(omit_none=True) self.credentials.validate(dct) dct = self.to_profile_info(serialize_credentials=True) ProfileConfig.validate(dct) except ValidationError as exc: raise DbtProfileError(validator_error_message(exc)) from exc
def package_config_from_data(packages_data): if packages_data is None: packages_data = {'packages': []} try: packages = PackageConfig.from_dict(packages_data) except ValidationError as e: raise DbtProjectError( 'Invalid package config: {}'.format(validator_error_message(e)) ) from e return packages
def validate(self): """Validate the configuration against its contract. :raises DbtProjectError: If the configuration fails validation. """ try: Configuration.from_dict(self.serialize()) except ValidationError as e: raise DbtProjectError(validator_error_message(e)) from e if getattr(self.args, 'version_check', False): self.validate_version()
def _yaml_from_file(self, source_file: SourceFile) -> Optional[Dict[str, Any]]: """If loading the yaml fails, raise an exception. """ path: str = source_file.path.relative_path try: return load_yaml_text(source_file.contents) except ValidationException as e: reason = validator_error_message(e) raise CompilationException('Error reading {}: {} - {}'.format( self.project.project_name, path, reason)) return None
def error_context( path: str, key: str, data: Any, cause: Union[str, ValidationException, JSONValidationException]) -> str: """Provide contextual information about an error while parsing """ if isinstance(cause, str): reason = cause elif isinstance(cause, ValidationError): reason = validator_error_message(cause) else: reason = cause.msg return ('Invalid {key} config given in {path} @ {key}: {data} - {reason}'. format(key=key, path=path, data=data, reason=reason))
def create_test_node( self, target: Union[UnpatchedSourceDefinition, UnparsedNodeUpdate], path: str, config: ContextConfig, tags: List[str], fqn: List[str], name: str, raw_sql: str, test_metadata: Dict[str, Any], column_name: Optional[str], ) -> ParsedSchemaTestNode: dct = { 'alias': name, 'schema': self.default_schema, 'database': self.default_database, 'fqn': fqn, 'name': name, 'root_path': self.project.project_root, 'resource_type': self.resource_type, 'tags': tags, 'path': path, 'original_file_path': target.original_file_path, 'package_name': self.project.project_name, 'raw_sql': raw_sql, 'unique_id': self.generate_unique_id(name), 'config': self.config_dict(config), 'test_metadata': test_metadata, 'column_name': column_name, 'checksum': FileHash.empty().to_dict(omit_none=True), } try: ParsedSchemaTestNode.validate(dct) return ParsedSchemaTestNode.from_dict(dct) except ValidationError as exc: msg = validator_error_message(exc) # this is a bit silly, but build an UnparsedNode just for error # message reasons node = self._create_error_node( name=target.name, path=path, original_file_path=target.original_file_path, raw_sql=raw_sql, ) raise CompilationException(msg, node=node) from exc
def _create_parsetime_node( self, block: ConfiguredBlockType, path: str, config: ContextConfig, fqn: List[str], name=None, **kwargs, ) -> IntermediateNode: """Create the node that will be passed in to the parser context for "rendering". Some information may be partial, as it'll be updated by config() and any ref()/source() calls discovered during rendering. """ if name is None: name = block.name dct = { 'alias': name, 'schema': self.default_schema, 'database': self.default_database, 'fqn': fqn, 'name': name, 'root_path': self.project.project_root, 'resource_type': self.resource_type, 'path': path, 'original_file_path': block.path.original_file_path, 'package_name': self.project.project_name, 'raw_sql': block.contents, 'unique_id': self.generate_unique_id(name), 'config': self.config_dict(config), 'checksum': block.file.checksum.to_dict(omit_none=True), } dct.update(kwargs) try: return self.parse_from_dict(dct, validate=True) except ValidationError as exc: msg = validator_error_message(exc) # this is a bit silly, but build an UnparsedNode just for error # message reasons node = self._create_error_node( name=block.name, path=path, original_file_path=block.path.original_file_path, raw_sql=block.contents, ) raise CompilationException(msg, node=node)
def render_test_update(self, node, config, builder): macro_unique_id = self.macro_resolver.get_macro_id( node.package_name, 'test_' + builder.name) # Add the depends_on here so we can limit the macros added # to the context in rendering processing node.depends_on.add_macro(macro_unique_id) if (macro_unique_id in ['macro.dbt.test_not_null', 'macro.dbt.test_unique']): self.update_parsed_node(node, config) if builder.severity() is not None: node.unrendered_config['severity'] = builder.severity() node.config['severity'] = builder.severity() if builder.enabled() is not None: node.config['enabled'] = builder.enabled() # source node tests are processed at patch_source time if isinstance(builder.target, UnpatchedSourceDefinition): sources = [builder.target.fqn[-2], builder.target.fqn[-1]] node.sources.append(sources) else: # all other nodes node.refs.append([builder.target.name]) else: try: # make a base context that doesn't have the magic kwargs field context = generate_test_context( node, self.root_project, self.manifest, config, self.macro_resolver, ) # update with rendered test kwargs (which collects any refs) add_rendered_test_kwargs(context, node, capture_macros=True) # the parsed node is not rendered in the native context. get_rendered(node.raw_sql, context, node, capture_macros=True) self.update_parsed_node(node, config) except ValidationError as exc: # we got a ValidationError - probably bad types in config() msg = validator_error_message(exc) raise CompilationException(msg, node=node) from exc
def validate(self): try: ProjectContract.from_dict(self.to_project_config()) except ValidationError as e: raise DbtProjectError(validator_error_message(e)) from e
def from_project_config(cls, project_dict, packages_dict=None): """Create a project from its project and package configuration, as read by yaml.safe_load(). :param project_dict dict: The dictionary as read from disk :param packages_dict Optional[dict]: If it exists, the packages file as read from disk. :raises DbtProjectError: If the project is missing or invalid, or if the packages file exists and is invalid. :returns Project: The project, with defaults populated. """ try: project_dict = cls._preprocess(project_dict) except RecursionException: raise DbtProjectError( 'Cycle detected: Project input has a reference to itself', project=project_dict ) # just for validation. try: ProjectContract.from_dict(project_dict) except ValidationError as e: raise DbtProjectError(validator_error_message(e)) from e # name/version are required in the Project definition, so we can assume # they are present name = project_dict['name'] version = project_dict['version'] # this is added at project_dict parse time and should always be here # once we see it. project_root = project_dict['project-root'] # this is only optional in the sense that if it's not present, it needs # to have been a cli argument. profile_name = project_dict.get('profile') # these are optional source_paths = project_dict.get('source-paths', ['models']) macro_paths = project_dict.get('macro-paths', ['macros']) data_paths = project_dict.get('data-paths', ['data']) test_paths = project_dict.get('test-paths', ['test']) analysis_paths = project_dict.get('analysis-paths', []) docs_paths = project_dict.get('docs-paths', source_paths[:]) target_path = project_dict.get('target-path', 'target') snapshot_paths = project_dict.get('snapshot-paths', ['snapshots']) # should this also include the modules path by default? clean_targets = project_dict.get('clean-targets', [target_path]) log_path = project_dict.get('log-path', 'logs') modules_path = project_dict.get('modules-path', 'dbt_modules') # in the default case we'll populate this once we know the adapter type quoting = project_dict.get('quoting', {}) models = project_dict.get('models', {}) on_run_start = project_dict.get('on-run-start', []) on_run_end = project_dict.get('on-run-end', []) seeds = project_dict.get('seeds', {}) snapshots = project_dict.get('snapshots', {}) dbt_raw_version = project_dict.get('require-dbt-version', '>=0.0.0') query_comment = project_dict.get('query-comment', NoValue()) try: dbt_version = _parse_versions(dbt_raw_version) except SemverException as e: raise DbtProjectError(str(e)) from e try: packages = package_config_from_data(packages_dict) except ValidationError as e: raise DbtProjectError(validator_error_message(e)) from e project = cls( project_name=name, version=version, project_root=project_root, profile_name=profile_name, source_paths=source_paths, macro_paths=macro_paths, data_paths=data_paths, test_paths=test_paths, analysis_paths=analysis_paths, docs_paths=docs_paths, target_path=target_path, snapshot_paths=snapshot_paths, clean_targets=clean_targets, log_path=log_path, modules_path=modules_path, quoting=quoting, models=models, on_run_start=on_run_start, on_run_end=on_run_end, seeds=seeds, snapshots=snapshots, dbt_version=dbt_version, packages=packages, query_comment=query_comment, ) # sanity check - this means an internal issue project.validate() return project
def create_project(self, rendered: RenderComponents) -> 'Project': unrendered = RenderComponents( project_dict=self.project_dict, packages_dict=self.packages_dict, selectors_dict=self.selectors_dict, ) dbt_version = _get_required_version( rendered.project_dict, verify_version=self.verify_version, ) try: ProjectContract.validate(rendered.project_dict) cfg = ProjectContract.from_dict(rendered.project_dict) except ValidationError as e: raise DbtProjectError(validator_error_message(e)) from e # name/version are required in the Project definition, so we can assume # they are present name = cfg.name version = cfg.version # this is added at project_dict parse time and should always be here # once we see it. if cfg.project_root is None: raise DbtProjectError('cfg must have a project root!') else: project_root = cfg.project_root # this is only optional in the sense that if it's not present, it needs # to have been a cli argument. profile_name = cfg.profile # these are all the defaults source_paths: List[str] = value_or(cfg.source_paths, ['models']) macro_paths: List[str] = value_or(cfg.macro_paths, ['macros']) data_paths: List[str] = value_or(cfg.data_paths, ['data']) test_paths: List[str] = value_or(cfg.test_paths, ['test']) analysis_paths: List[str] = value_or(cfg.analysis_paths, []) snapshot_paths: List[str] = value_or(cfg.snapshot_paths, ['snapshots']) all_source_paths: List[str] = _all_source_paths( source_paths, data_paths, snapshot_paths, analysis_paths, macro_paths) docs_paths: List[str] = value_or(cfg.docs_paths, all_source_paths) asset_paths: List[str] = value_or(cfg.asset_paths, []) target_path: str = value_or(cfg.target_path, 'target') clean_targets: List[str] = value_or(cfg.clean_targets, [target_path]) log_path: str = value_or(cfg.log_path, 'logs') modules_path: str = value_or(cfg.modules_path, 'dbt_modules') # in the default case we'll populate this once we know the adapter type # It would be nice to just pass along a Quoting here, but that would # break many things quoting: Dict[str, Any] = {} if cfg.quoting is not None: quoting = cfg.quoting.to_dict(omit_none=True) models: Dict[str, Any] seeds: Dict[str, Any] snapshots: Dict[str, Any] sources: Dict[str, Any] vars_value: VarProvider models = cfg.models seeds = cfg.seeds snapshots = cfg.snapshots sources = cfg.sources if cfg.vars is None: vars_dict: Dict[str, Any] = {} else: vars_dict = cfg.vars vars_value = VarProvider(vars_dict) on_run_start: List[str] = value_or(cfg.on_run_start, []) on_run_end: List[str] = value_or(cfg.on_run_end, []) query_comment = _query_comment_from_cfg(cfg.query_comment) packages = package_config_from_data(rendered.packages_dict) selectors = selector_config_from_data(rendered.selectors_dict) manifest_selectors: Dict[str, Any] = {} if rendered.selectors_dict and rendered.selectors_dict['selectors']: # this is a dict with a single key 'selectors' pointing to a list # of dicts. manifest_selectors = SelectorDict.parse_from_selectors_list( rendered.selectors_dict['selectors']) project = Project( project_name=name, version=version, project_root=project_root, profile_name=profile_name, source_paths=source_paths, macro_paths=macro_paths, data_paths=data_paths, test_paths=test_paths, analysis_paths=analysis_paths, docs_paths=docs_paths, asset_paths=asset_paths, target_path=target_path, snapshot_paths=snapshot_paths, clean_targets=clean_targets, log_path=log_path, modules_path=modules_path, quoting=quoting, models=models, on_run_start=on_run_start, on_run_end=on_run_end, seeds=seeds, snapshots=snapshots, dbt_version=dbt_version, packages=packages, manifest_selectors=manifest_selectors, selectors=selectors, query_comment=query_comment, sources=sources, vars=vars_value, config_version=cfg.config_version, unrendered=unrendered, ) # sanity check - this means an internal issue project.validate() return project
def from_project_config( cls, project_dict: Dict[str, Any], packages_dict: Optional[Dict[str, Any]] = None, selectors_dict: Optional[Dict[str, Any]] = None, required_dbt_version: Optional[List[VersionSpecifier]] = None, ) -> 'Project': """Create a project from its project and package configuration, as read by yaml.safe_load(). :param project_dict: The dictionary as read from disk :param packages_dict: If it exists, the packages file as read from disk. :raises DbtProjectError: If the project is missing or invalid, or if the packages file exists and is invalid. :returns: The project, with defaults populated. """ if required_dbt_version is None: dbt_version = cls._get_required_version(project_dict) else: dbt_version = required_dbt_version try: project_dict = cls._preprocess(project_dict) except RecursionException: raise DbtProjectError( 'Cycle detected: Project input has a reference to itself', project=project_dict ) try: cfg = parse_project_config(project_dict) except ValidationError as e: raise DbtProjectError(validator_error_message(e)) from e # name/version are required in the Project definition, so we can assume # they are present name = cfg.name version = cfg.version # this is added at project_dict parse time and should always be here # once we see it. if cfg.project_root is None: raise DbtProjectError('cfg must have a project root!') else: project_root = cfg.project_root # this is only optional in the sense that if it's not present, it needs # to have been a cli argument. profile_name = cfg.profile # these are all the defaults source_paths: List[str] = value_or(cfg.source_paths, ['models']) macro_paths: List[str] = value_or(cfg.macro_paths, ['macros']) data_paths: List[str] = value_or(cfg.data_paths, ['data']) test_paths: List[str] = value_or(cfg.test_paths, ['test']) analysis_paths: List[str] = value_or(cfg.analysis_paths, []) snapshot_paths: List[str] = value_or(cfg.snapshot_paths, ['snapshots']) all_source_paths: List[str] = _all_source_paths( source_paths, data_paths, snapshot_paths, analysis_paths, macro_paths ) docs_paths: List[str] = value_or(cfg.docs_paths, all_source_paths) asset_paths: List[str] = value_or(cfg.asset_paths, []) target_path: str = value_or(cfg.target_path, 'target') clean_targets: List[str] = value_or(cfg.clean_targets, [target_path]) log_path: str = value_or(cfg.log_path, 'logs') modules_path: str = value_or(cfg.modules_path, 'dbt_modules') # in the default case we'll populate this once we know the adapter type # It would be nice to just pass along a Quoting here, but that would # break many things quoting: Dict[str, Any] = {} if cfg.quoting is not None: quoting = cfg.quoting.to_dict() models: Dict[str, Any] seeds: Dict[str, Any] snapshots: Dict[str, Any] sources: Dict[str, Any] vars_value: VarProvider if cfg.config_version == 1: assert isinstance(cfg, ProjectV1Contract) # extract everything named 'vars' models = cfg.models seeds = cfg.seeds snapshots = cfg.snapshots sources = {} vars_value = V1VarProvider( models=models, seeds=seeds, snapshots=snapshots ) elif cfg.config_version == 2: assert isinstance(cfg, ProjectV2Contract) models = cfg.models seeds = cfg.seeds snapshots = cfg.snapshots sources = cfg.sources if cfg.vars is None: vars_dict: Dict[str, Any] = {} else: vars_dict = cfg.vars vars_value = V2VarProvider(vars_dict) else: raise ValidationError( f'Got unsupported config_version={cfg.config_version}' ) on_run_start: List[str] = value_or(cfg.on_run_start, []) on_run_end: List[str] = value_or(cfg.on_run_end, []) query_comment = _query_comment_from_cfg(cfg.query_comment) try: packages = package_config_from_data(packages_dict) except ValidationError as e: raise DbtProjectError(validator_error_message(e)) from e try: selectors = selector_config_from_data(selectors_dict) except ValidationError as e: raise DbtProjectError(validator_error_message(e)) from e project = cls( project_name=name, version=version, project_root=project_root, profile_name=profile_name, source_paths=source_paths, macro_paths=macro_paths, data_paths=data_paths, test_paths=test_paths, analysis_paths=analysis_paths, docs_paths=docs_paths, asset_paths=asset_paths, target_path=target_path, snapshot_paths=snapshot_paths, clean_targets=clean_targets, log_path=log_path, modules_path=modules_path, quoting=quoting, models=models, on_run_start=on_run_start, on_run_end=on_run_end, seeds=seeds, snapshots=snapshots, dbt_version=dbt_version, packages=packages, selectors=selectors, query_comment=query_comment, sources=sources, vars=vars_value, config_version=cfg.config_version, ) # sanity check - this means an internal issue project.validate() return project