def validate_version(self): """Ensure this package works with the installed version of dbt.""" installed = get_installed_version() if not versions_compatible(*self.dbt_version): msg = IMPOSSIBLE_VERSION_ERROR.format( package=self.project_name, version_spec=[x.to_version_string() for x in self.dbt_version]) raise DbtProjectError(msg) if not versions_compatible(installed, *self.dbt_version): msg = INVALID_VERSION_ERROR.format( package=self.project_name, installed=installed.to_version_string(), version_spec=[x.to_version_string() for x in self.dbt_version]) raise DbtProjectError(msg)
def pick_profile_name(args_profile_name, project_profile_name=None): profile_name = project_profile_name if args_profile_name is not None: profile_name = args_profile_name if profile_name is None: raise DbtProjectError(NO_SUPPLIED_PROFILE_ERROR) return profile_name
def render_data(self, data: Dict[str, Any]) -> Dict[str, Any]: try: return deep_map(self.render_entry, data) except RecursionException: raise DbtProjectError( f'Cycle detected: {self.name} input has a reference to itself', project=data)
def from_project_root(cls, project_root, cli_vars): """Create a project from a root directory. Reads in dbt_project.yml and packages.yml, if it exists. :param project_root str: The path to the project root to load. :raises DbtProjectError: If the project is missing or invalid, or if the packages file exists and is invalid. :returns Project: The project, with defaults populated. """ project_root = os.path.normpath(project_root) project_yaml_filepath = os.path.join(project_root, 'dbt_project.yml') # get the project.yml contents if not path_exists(project_yaml_filepath): raise DbtProjectError( 'no dbt_project.yml found at expected path {}'.format( project_yaml_filepath)) if isinstance(cli_vars, compat.basestring): cli_vars = parse_cli_vars(cli_vars) renderer = ConfigRenderer(cli_vars) project_dict = _load_yaml(project_yaml_filepath) rendered_project = renderer.render_project(project_dict) rendered_project['project-root'] = project_root packages_dict = package_data_from_root(project_root) return cls.from_project_config(rendered_project, packages_dict)
def _raw_project_from(project_root: str) -> Dict[str, Any]: project_root = os.path.normpath(project_root) project_yaml_filepath = os.path.join(project_root, 'dbt_project.yml') # get the project.yml contents if not path_exists(project_yaml_filepath): raise DbtProjectError( 'no dbt_project.yml found at expected path {}'.format( project_yaml_filepath)) project_dict = _load_yaml(project_yaml_filepath) if not isinstance(project_dict, dict): raise DbtProjectError('dbt_project.yml does not parse to a dictionary') return project_dict
def render_project(self, as_parsed): """Render the parsed data, returning a new dict (or whatever was read). """ try: return deep_map(self._render_project_entry, as_parsed) except RecursionException: raise DbtProjectError( 'Cycle detected: Project input has a reference to itself', project=as_parsed)
def package_config_from_data(packages_data): if packages_data is None: packages_data = {'packages': []} try: packages = PackageConfig(**packages_data) except ValidationException as e: raise DbtProjectError('Invalid package config: {}'.format(str(e))) return packages
def validate(self): """Validate the configuration against its contract. :raises DbtProjectError: If the configuration fails validation. """ try: Configuration.validate(self.serialize()) except ValidationError as e: raise DbtProjectError(validator_error_message(e)) from e
def validate(self): """Validate the configuration against its contract. :raises DbtProjectError: If the configuration fails validation. """ try: Configuration(**self.serialize()) except dbt.exceptions.ValidationException as e: raise DbtProjectError(str(e))
def pick_profile_name( args_profile_name: Optional[str], project_profile_name: Optional[str] = None, ) -> str: profile_name = project_profile_name if args_profile_name is not None: profile_name = args_profile_name if profile_name is None: raise DbtProjectError(NO_SUPPLIED_PROFILE_ERROR) return profile_name
def package_config_from_data(packages_data): if packages_data is None: packages_data = {'packages': []} try: packages = PackageConfig.from_dict(packages_data) except ValidationError as e: raise DbtProjectError( MALFORMED_PACKAGE_ERROR.format(error=str(e.message))) from e return packages
def package_config_from_data(packages_data): if packages_data is None: packages_data = {'packages': []} try: packages = PackageConfig.from_dict(packages_data) except ValidationError as e: raise DbtProjectError( 'Invalid package config: {}'.format(validator_error_message(e)) ) from e return packages
def load_projects( self, paths: Iterable[Path]) -> Iterator[Tuple[str, 'RuntimeConfig']]: for path in paths: try: project = self.new_project(str(path)) except DbtProjectError as e: raise DbtProjectError( 'Failed to read package at {}: {}'.format(path, e)) from e else: yield project.project_name, project
def validate(self): """Validate the configuration against its contract. :raises DbtProjectError: If the configuration fails validation. """ try: Configuration.from_dict(self.serialize()) except ValidationError as e: raise DbtProjectError(validator_error_message(e)) from e if getattr(self.args, 'version_check', False): self.validate_version()
def validate(self): """Validate the configuration against its contract. :raises DbtProjectError: If the configuration fails validation. """ try: Configuration(**self.serialize()) except ValidationException as e: raise DbtProjectError(str(e)) if getattr(self.args, 'version_check', False): self.validate_version()
def _get_required_version( project_dict: Dict[str, Any], verify_version: bool, ) -> List[VersionSpecifier]: dbt_raw_version: Union[List[str], str] = '>=0.0.0' required = project_dict.get('require-dbt-version') if required is not None: dbt_raw_version = required try: dbt_version = _parse_versions(dbt_raw_version) except SemverException as e: raise DbtProjectError(str(e)) from e if verify_version: # no name is also an error that we want to raise if 'name' not in project_dict: raise DbtProjectError( 'Required "name" field not present in project', ) validate_version(dbt_version, project_dict['name']) return dbt_version
def load_projects( self, paths: Iterable[Path] ) -> Iterator[Tuple[str, 'RuntimeConfig']]: for path in paths: try: project = self.new_project(str(path)) except DbtProjectError as e: raise DbtProjectError( f'Failed to read package: {e}', result_type='invalid_project', path=path, ) from e else: yield project.project_name, project
def validate_version( required: List[VersionSpecifier], project_name: str, ) -> None: """Ensure this package works with the installed version of dbt.""" installed = get_installed_version() if not versions_compatible(*required): msg = IMPOSSIBLE_VERSION_ERROR.format( package=project_name, version_spec=[ x.to_version_string() for x in required ] ) raise DbtProjectError(msg) if not versions_compatible(installed, *required): msg = INVALID_VERSION_ERROR.format( package=project_name, installed=installed.to_version_string(), version_spec=[ x.to_version_string() for x in required ] ) raise DbtProjectError(msg)
def from_parts(cls, project, profile, args, allow_archive_configs=False): """Instantiate a RuntimeConfig from its components. :param profile Profile: A parsed dbt Profile. :param project Project: A parsed dbt Project. :param args argparse.Namespace: The parsed command-line arguments. :param allow_archive_configs bool: If True, ignore archive blocks in configs. This flag exists to enable archive migration. :returns RuntimeConfig: The new configuration. """ quoting = deepcopy( get_relation_class_by_name( profile.credentials.type).DEFAULTS['quote_policy']) quoting.update(project.quoting) if project.archive and not allow_archive_configs: # if the user has an `archive` section, raise an error raise DbtProjectError(_ARCHIVE_REMOVED_MESSAGE) return cls(project_name=project.project_name, version=project.version, project_root=project.project_root, source_paths=project.source_paths, macro_paths=project.macro_paths, data_paths=project.data_paths, test_paths=project.test_paths, analysis_paths=project.analysis_paths, docs_paths=project.docs_paths, target_path=project.target_path, snapshot_paths=project.snapshot_paths, clean_targets=project.clean_targets, log_path=project.log_path, modules_path=project.modules_path, quoting=quoting, models=project.models, on_run_start=project.on_run_start, on_run_end=project.on_run_end, archive=project.archive, seeds=project.seeds, dbt_version=project.dbt_version, packages=project.packages, profile_name=profile.profile_name, target_name=profile.target_name, config=profile.config, threads=profile.threads, credentials=profile.credentials, args=args)
def from_raw_profiles( cls, raw_profiles: Dict[str, Any], profile_name: str, renderer: ProfileRenderer, target_override: Optional[str] = None, threads_override: Optional[int] = None, ) -> 'Profile': """ :param raw_profiles: The profile data, from disk as yaml. :param profile_name: The profile name to use. :param renderer: The config renderer. :param target_override: The target to use, if provided on the command line. :param threads_override: The thread count to use, if provided on the command line. :raises DbtProjectError: If there is no profile name specified in the project or the command line arguments :raises DbtProfileError: If the profile is invalid or missing, or the target could not be found :returns: The new Profile object. """ if profile_name not in raw_profiles: raise DbtProjectError( "Could not find profile named '{}'".format(profile_name)) # First, we've already got our final decision on profile name, and we # don't render keys, so we can pluck that out raw_profile = raw_profiles[profile_name] if not raw_profile: msg = (f'Profile {profile_name} in profiles.yml is empty') raise DbtProfileError( INVALID_PROFILE_MESSAGE.format(error_string=msg)) user_cfg = raw_profiles.get('config') return cls.from_raw_profile_info( raw_profile=raw_profile, profile_name=profile_name, renderer=renderer, user_cfg=user_cfg, target_override=target_override, threads_override=threads_override, )
def from_project_root(cls, project_root: str, *, verify_version: bool = False) -> 'PartialProject': project_root = os.path.normpath(project_root) project_dict = _raw_project_from(project_root) config_version = project_dict.get('config-version', 1) if config_version != 2: raise DbtProjectError( f'Invalid config version: {config_version}, expected 2', path=os.path.join(project_root, 'dbt_project.yml')) packages_dict = package_data_from_root(project_root) selectors_dict = selector_data_from_root(project_root) return cls.from_dicts( project_root=project_root, project_dict=project_dict, selectors_dict=selectors_dict, packages_dict=packages_dict, verify_version=verify_version, )
def from_raw_profiles(cls, raw_profiles, profile_name, cli_vars, target_override=None, threads_override=None): """ :param raw_profiles dict: The profile data, from disk as yaml. :param profile_name str: The profile name to use. :param cli_vars dict: The command-line variables passed as arguments, as a dict. :param target_override Optional[str]: The target to use, if provided on the command line. :param threads_override Optional[str]: The thread count to use, if provided on the command line. :raises DbtProjectError: If there is no profile name specified in the project or the command line arguments :raises DbtProfileError: If the profile is invalid or missing, or the target could not be found :returns Profile: The new Profile object. """ if profile_name not in raw_profiles: raise DbtProjectError( "Could not find profile named '{}'".format(profile_name)) # First, we've already got our final decision on profile name, and we # don't render keys, so we can pluck that out raw_profile = raw_profiles[profile_name] user_cfg = raw_profiles.get('config') return cls.from_raw_profile_info( raw_profile=raw_profile, profile_name=profile_name, cli_vars=cli_vars, user_cfg=user_cfg, target_override=target_override, threads_override=threads_override, )
def create_project(self, rendered: RenderComponents) -> 'Project': unrendered = RenderComponents( project_dict=self.project_dict, packages_dict=self.packages_dict, selectors_dict=self.selectors_dict, ) dbt_version = _get_required_version( rendered.project_dict, verify_version=self.verify_version, ) try: ProjectContract.validate(rendered.project_dict) cfg = ProjectContract.from_dict(rendered.project_dict) except ValidationError as e: raise DbtProjectError(validator_error_message(e)) from e # name/version are required in the Project definition, so we can assume # they are present name = cfg.name version = cfg.version # this is added at project_dict parse time and should always be here # once we see it. if cfg.project_root is None: raise DbtProjectError('cfg must have a project root!') else: project_root = cfg.project_root # this is only optional in the sense that if it's not present, it needs # to have been a cli argument. profile_name = cfg.profile # these are all the defaults source_paths: List[str] = value_or(cfg.source_paths, ['models']) macro_paths: List[str] = value_or(cfg.macro_paths, ['macros']) data_paths: List[str] = value_or(cfg.data_paths, ['data']) test_paths: List[str] = value_or(cfg.test_paths, ['test']) analysis_paths: List[str] = value_or(cfg.analysis_paths, []) snapshot_paths: List[str] = value_or(cfg.snapshot_paths, ['snapshots']) all_source_paths: List[str] = _all_source_paths( source_paths, data_paths, snapshot_paths, analysis_paths, macro_paths) docs_paths: List[str] = value_or(cfg.docs_paths, all_source_paths) asset_paths: List[str] = value_or(cfg.asset_paths, []) target_path: str = value_or(cfg.target_path, 'target') clean_targets: List[str] = value_or(cfg.clean_targets, [target_path]) log_path: str = value_or(cfg.log_path, 'logs') modules_path: str = value_or(cfg.modules_path, 'dbt_modules') # in the default case we'll populate this once we know the adapter type # It would be nice to just pass along a Quoting here, but that would # break many things quoting: Dict[str, Any] = {} if cfg.quoting is not None: quoting = cfg.quoting.to_dict(omit_none=True) models: Dict[str, Any] seeds: Dict[str, Any] snapshots: Dict[str, Any] sources: Dict[str, Any] vars_value: VarProvider models = cfg.models seeds = cfg.seeds snapshots = cfg.snapshots sources = cfg.sources if cfg.vars is None: vars_dict: Dict[str, Any] = {} else: vars_dict = cfg.vars vars_value = VarProvider(vars_dict) on_run_start: List[str] = value_or(cfg.on_run_start, []) on_run_end: List[str] = value_or(cfg.on_run_end, []) query_comment = _query_comment_from_cfg(cfg.query_comment) packages = package_config_from_data(rendered.packages_dict) selectors = selector_config_from_data(rendered.selectors_dict) manifest_selectors: Dict[str, Any] = {} if rendered.selectors_dict and rendered.selectors_dict['selectors']: # this is a dict with a single key 'selectors' pointing to a list # of dicts. manifest_selectors = SelectorDict.parse_from_selectors_list( rendered.selectors_dict['selectors']) project = Project( project_name=name, version=version, project_root=project_root, profile_name=profile_name, source_paths=source_paths, macro_paths=macro_paths, data_paths=data_paths, test_paths=test_paths, analysis_paths=analysis_paths, docs_paths=docs_paths, asset_paths=asset_paths, target_path=target_path, snapshot_paths=snapshot_paths, clean_targets=clean_targets, log_path=log_path, modules_path=modules_path, quoting=quoting, models=models, on_run_start=on_run_start, on_run_end=on_run_end, seeds=seeds, snapshots=snapshots, dbt_version=dbt_version, packages=packages, manifest_selectors=manifest_selectors, selectors=selectors, query_comment=query_comment, sources=sources, vars=vars_value, config_version=cfg.config_version, unrendered=unrendered, ) # sanity check - this means an internal issue project.validate() return project
def from_project_config( cls, project_dict: Dict[str, Any], packages_dict: Optional[Dict[str, Any]] = None, selectors_dict: Optional[Dict[str, Any]] = None, required_dbt_version: Optional[List[VersionSpecifier]] = None, ) -> 'Project': """Create a project from its project and package configuration, as read by yaml.safe_load(). :param project_dict: The dictionary as read from disk :param packages_dict: If it exists, the packages file as read from disk. :raises DbtProjectError: If the project is missing or invalid, or if the packages file exists and is invalid. :returns: The project, with defaults populated. """ if required_dbt_version is None: dbt_version = cls._get_required_version(project_dict) else: dbt_version = required_dbt_version try: project_dict = cls._preprocess(project_dict) except RecursionException: raise DbtProjectError( 'Cycle detected: Project input has a reference to itself', project=project_dict ) try: cfg = parse_project_config(project_dict) except ValidationError as e: raise DbtProjectError(validator_error_message(e)) from e # name/version are required in the Project definition, so we can assume # they are present name = cfg.name version = cfg.version # this is added at project_dict parse time and should always be here # once we see it. if cfg.project_root is None: raise DbtProjectError('cfg must have a project root!') else: project_root = cfg.project_root # this is only optional in the sense that if it's not present, it needs # to have been a cli argument. profile_name = cfg.profile # these are all the defaults source_paths: List[str] = value_or(cfg.source_paths, ['models']) macro_paths: List[str] = value_or(cfg.macro_paths, ['macros']) data_paths: List[str] = value_or(cfg.data_paths, ['data']) test_paths: List[str] = value_or(cfg.test_paths, ['test']) analysis_paths: List[str] = value_or(cfg.analysis_paths, []) snapshot_paths: List[str] = value_or(cfg.snapshot_paths, ['snapshots']) all_source_paths: List[str] = _all_source_paths( source_paths, data_paths, snapshot_paths, analysis_paths, macro_paths ) docs_paths: List[str] = value_or(cfg.docs_paths, all_source_paths) asset_paths: List[str] = value_or(cfg.asset_paths, []) target_path: str = value_or(cfg.target_path, 'target') clean_targets: List[str] = value_or(cfg.clean_targets, [target_path]) log_path: str = value_or(cfg.log_path, 'logs') modules_path: str = value_or(cfg.modules_path, 'dbt_modules') # in the default case we'll populate this once we know the adapter type # It would be nice to just pass along a Quoting here, but that would # break many things quoting: Dict[str, Any] = {} if cfg.quoting is not None: quoting = cfg.quoting.to_dict() models: Dict[str, Any] seeds: Dict[str, Any] snapshots: Dict[str, Any] sources: Dict[str, Any] vars_value: VarProvider if cfg.config_version == 1: assert isinstance(cfg, ProjectV1Contract) # extract everything named 'vars' models = cfg.models seeds = cfg.seeds snapshots = cfg.snapshots sources = {} vars_value = V1VarProvider( models=models, seeds=seeds, snapshots=snapshots ) elif cfg.config_version == 2: assert isinstance(cfg, ProjectV2Contract) models = cfg.models seeds = cfg.seeds snapshots = cfg.snapshots sources = cfg.sources if cfg.vars is None: vars_dict: Dict[str, Any] = {} else: vars_dict = cfg.vars vars_value = V2VarProvider(vars_dict) else: raise ValidationError( f'Got unsupported config_version={cfg.config_version}' ) on_run_start: List[str] = value_or(cfg.on_run_start, []) on_run_end: List[str] = value_or(cfg.on_run_end, []) query_comment = _query_comment_from_cfg(cfg.query_comment) try: packages = package_config_from_data(packages_dict) except ValidationError as e: raise DbtProjectError(validator_error_message(e)) from e try: selectors = selector_config_from_data(selectors_dict) except ValidationError as e: raise DbtProjectError(validator_error_message(e)) from e project = cls( project_name=name, version=version, project_root=project_root, profile_name=profile_name, source_paths=source_paths, macro_paths=macro_paths, data_paths=data_paths, test_paths=test_paths, analysis_paths=analysis_paths, docs_paths=docs_paths, asset_paths=asset_paths, target_path=target_path, snapshot_paths=snapshot_paths, clean_targets=clean_targets, log_path=log_path, modules_path=modules_path, quoting=quoting, models=models, on_run_start=on_run_start, on_run_end=on_run_end, seeds=seeds, snapshots=snapshots, dbt_version=dbt_version, packages=packages, selectors=selectors, query_comment=query_comment, sources=sources, vars=vars_value, config_version=cfg.config_version, ) # sanity check - this means an internal issue project.validate() return project
def validate(self): try: ProjectContract(**self.to_project_config()) except ValidationException as exc: raise DbtProjectError(str(exc))
def from_project_config(cls, project_dict, packages_dict=None): """Create a project from its project and package configuration, as read by yaml.safe_load(). :param project_dict dict: The dictionary as read from disk :param packages_dict Optional[dict]: If it exists, the packages file as read from disk. :raises DbtProjectError: If the project is missing or invalid, or if the packages file exists and is invalid. :returns Project: The project, with defaults populated. """ try: project_dict = cls._preprocess(project_dict) except RecursionException: raise DbtProjectError( 'Cycle detected: Project input has a reference to itself', project=project_dict) # just for validation. try: ProjectContract(**project_dict) except ValidationException as e: raise DbtProjectError(str(e)) # name/version are required in the Project definition, so we can assume # they are present name = project_dict['name'] version = project_dict['version'] # this is added at project_dict parse time and should always be here # once we see it. project_root = project_dict['project-root'] # this is only optional in the sense that if it's not present, it needs # to have been a cli argument. profile_name = project_dict.get('profile') # these are optional source_paths = project_dict.get('source-paths', ['models']) macro_paths = project_dict.get('macro-paths', ['macros']) data_paths = project_dict.get('data-paths', ['data']) test_paths = project_dict.get('test-paths', ['test']) analysis_paths = project_dict.get('analysis-paths', []) docs_paths = project_dict.get('docs-paths', source_paths[:]) target_path = project_dict.get('target-path', 'target') # should this also include the modules path by default? clean_targets = project_dict.get('clean-targets', [target_path]) log_path = project_dict.get('log-path', 'logs') modules_path = project_dict.get('modules-path', 'dbt_modules') # in the default case we'll populate this once we know the adapter type quoting = project_dict.get('quoting', {}) models = project_dict.get('models', {}) on_run_start = project_dict.get('on-run-start', []) on_run_end = project_dict.get('on-run-end', []) archive = project_dict.get('archive', []) seeds = project_dict.get('seeds', {}) dbt_raw_version = project_dict.get('require-dbt-version', '>=0.0.0') try: dbt_version = _parse_versions(dbt_raw_version) except SemverException as e: raise DbtProjectError(str(e)) packages = package_config_from_data(packages_dict) project = cls(project_name=name, version=version, project_root=project_root, profile_name=profile_name, source_paths=source_paths, macro_paths=macro_paths, data_paths=data_paths, test_paths=test_paths, analysis_paths=analysis_paths, docs_paths=docs_paths, target_path=target_path, clean_targets=clean_targets, log_path=log_path, modules_path=modules_path, quoting=quoting, models=models, on_run_start=on_run_start, on_run_end=on_run_end, archive=archive, seeds=seeds, dbt_version=dbt_version, packages=packages) # sanity check - this means an internal issue project.validate() return project
def validate(self): try: ProjectContract.from_dict(self.to_project_config()) except ValidationError as e: raise DbtProjectError(validator_error_message(e)) from e