def parse_lookml_file(lookml_file_name: str) -> dict: """Parse a LookML file into a dictionary with keys for each of its primary properties and a list of values.""" logger.info("Parsing data from LookML file {}".format(lookml_file_name)) with open(lookml_file_name, "r") as lookml_file_stream: lookml_data = lkml.load(lookml_file_stream) return lookml_data
def run_dbt_debug(self, *args, **kwargs) -> None: """Run `dbt debug` command to check if your dbt_project.yml and profiles.yml files are properly configured.""" logger.info( "Confirming proper dbt project setup, profile and warehouse access..." ) result = self._dbt_cli_runner(DBT_DEBUG, *args, **kwargs) logger.info(result)
def run_cli_command( command: Union[List[str], str], working_directory: str, use_shell: bool = False, output_as_text: bool = True, capture_output: bool = True, **kwargs, ): """Execute command line subprocess""" result = subprocess.run( command, shell=use_shell, cwd=working_directory, text=output_as_text, capture_output=capture_output, **kwargs, ) if result.stderr: raise subprocess.CalledProcessError(returncode=result.returncode, cmd=result.args, stderr=result.stderr) if result.stdout: if "Encountered an error" in result.stdout: # Handle for dbt stdout errors logger.error("dbt Error: {}".format(result.stdout)) raise subprocess.CalledProcessError(returncode=result.returncode, cmd=result.args, stderr=result.stdout) logger.debug("Command Result:\n{}".format(result.stdout)) return result.stdout
def assemble_view( cls, view_name: str, sql_table_name: str = None, derived_table: str = None, dimensions: List[dict] = None, dimension_groups: List[dict] = None, measures: List[dict] = None, sets: List[dict] = None, parameters: List[dict] = None, label: str = None, required_access_grants: list = None, extends: str = None, extension_is_required: bool = False, include_suggestions: bool = True, ): assembled_view_dict = {"view": {"name": view_name}} logger.info("Creating LookML View: {}".format(view_name)) # Validate inputs if not sql_table_name and not derived_table and not extends: raise DbteaException( name="missing-lookml-view-properties", title="Missing Necessary LookML View Properties", detail="Created LookML Views must specify either a `sql_table_name`, `derived_table` or `extends` in order " "to properly specify the view source", ) # Add optional view options as needed if label: assembled_view_dict["view"]["label"] = label if extends: assembled_view_dict["view"]["extends"] = extends if extension_is_required: assembled_view_dict["view"]["extension"] = "required" if sql_table_name: assembled_view_dict["view"]["sql_table_name"] = sql_table_name if derived_table: assembled_view_dict["view"]["derived_table"] = derived_table if required_access_grants: assembled_view_dict["view"][ "required_access_grants" ] = required_access_grants if not include_suggestions: assembled_view_dict["view"]["suggestions"] = "no" # Add body of View if parameters: assembled_view_dict["view"]["parameters"] = parameters if dimensions: assembled_view_dict["view"]["dimensions"] = dimensions if dimension_groups: assembled_view_dict["view"]["dimension_groups"] = dimension_groups if measures: assembled_view_dict["view"]["measures"] = measures if sets: assembled_view_dict["view"]["sets"] = sets return lkml.dump(assembled_view_dict)
def dbt_model_schemas_to_lookml_views(dbt_models_data: List[dict]) -> dict: """""" lookml_views_dbt_data = dbt_models_data for model_data in lookml_views_dbt_data: if "columns" in model_data: model_data["dimensions"] = list() model_data["dimension_groups"] = list() invalid_column_properties = list() for index, column_data in enumerate(model_data["columns"]): for column_property_key in column_data.keys(): if column_property_key not in VALID_LOOKML_DIMENSION_PROPERTIES: invalid_column_properties.append(column_property_key) for invalid_column_property in invalid_column_properties: logger.debug( "Removing property invalid for LookML for dimension {}: {}".format( model_data.get("name"), invalid_column_property ) ) if model_data["columns"][index].get(invalid_column_property): del model_data["columns"][index][invalid_column_property] if column_data.get("type") in LOOKML_DIMENSION_GROUP_TYPES: column_data.update({"sql": "${TABLE}." + column_data.get("name")}) model_data["dimension_groups"].append(column_data) else: column_data.update({"sql": "${TABLE}." + column_data.get("name")}) model_data["dimensions"].append(column_data) # model_data["dimensions"][index]["sql"] = "${TABLE}." + column_data.get("name") del model_data["columns"] if "alias" in model_data: model_data["sql_table_name"] = model_data["alias"] else: model_data["sql_table_name"] = model_data.get( "name" ) # TODO: Needs to be fully qualified name] # Process model-level meta fields if "meta" in model_data: for meta_key, meta_value in model_data["meta"].items(): if meta_key[:7] == "looker_" or meta_key[:7] == "lookml_": model_data[meta_key[7:]] = model_data["meta"][meta_key] invalid_properties = list() for model_property in model_data.keys(): if model_property not in VALID_LOOKML_VIEW_PROPERTIES: invalid_properties.append(model_property) for invalid_property in invalid_properties: logger.debug( "Removing property invalid for LookML for view {}: {}".format( model_data.get("name"), invalid_property ) ) del model_data[invalid_property] return {"views": lookml_views_dbt_data}
def write_data_to_file(self, replace_if_exists: bool = True) -> None: """""" if self._config_file_exists() and self._config_data_exists( ) and not replace_if_exists: logger.warning( "Dbtea config file already exists, ignoring write config data to file step" ) else: logger.info("Creating {} file at path: {}".format( self.file_name, self.config_dir)) utils.write_to_yaml_file(self.config_data, self.config_name_and_path)
def run_dbt_run_operation( self, macro_name: str, macro_args: dict = None, *args, **kwargs ) -> None: """Run `dbt run-operation` command to run dbt macros.""" logger.info("Executing dbt macro operation {}...".format(macro_name)) operation_with_macro = DBT_RUN_OPERATION.copy() operation_with_macro.append(macro_name) if macro_args: operation_with_macro.append(f"--args '{macro_args}'") result = self._dbt_cli_runner(operation_with_macro, *args, **kwargs) logger.info(result)
def write_looker_config(self): """""" logger.info("Writing Looker config file at path: {}".format( self.looker_config_path)) self.looker_config.add_section(self.looker_config_section) for key, value in self.config_data.get(self.dbt_project, {}).items(): if key.startswith("looker_sdk_"): self.looker_config.set(self.looker_config_section, str(key.replace("looker_sdk_", "")), str(value)) with open(self.looker_config_path, "w") as config_stream: self.looker_config.write(config_stream)
def timed_function(*args, **kwargs): start_time = timeit.default_timer() try: result = fn(*args, **kwargs) finally: elapsed_time = timeit.default_timer() - start_time elapsed_formatted = human_readable(elapsed_time) message_detail = get_detail(fn.__name__) logger.info( f"Completed {message_detail}operation in {elapsed_formatted}.\n" ) return result
def read_data_from_file(self, local_lookml_project_path: str) -> dict: """Parse a LookML file into a dictionary with keys for each of its primary properties and a list of values.""" logger.info( "Parsing data from local LookML file {}".format( self.lookml_file_name_and_path ) ) with open( utils.assemble_path( local_lookml_project_path, self.lookml_file_name_and_path ), "r", ) as lookml_file: return lkml.load(lookml_file)
def _parse_artifact(self, artifact_file: str): """""" if artifact_file not in ARTIFACT_DATA_FILES.values(): logger.warning( "You have specified an artifact file which is not in the list of known dbt artifacts" ) artifact_path = utils.assemble_path( self.project_root, self.target_path, artifact_file ) if not utils.file_exists(artifact_path): raise DbteaException( name="artifact-file-missing", title="Artifact file {} is missing".format(artifact_file), detail="There is no artifact {} at path {}. You may not have yet generated this artifact and " "need to run models, source freshness or docs generation".format( artifact_file, artifact_path ), ) return utils.parse_json_file(artifact_path)
def fetch_dbt_project_directory(custom_project_directory: str = None) -> str: """Return path to the base of the closest dbt project by traversing from current working directory backwards in order to find a dbt_project.yml file. If an optional custom project path is specified (which should be a full path to the base project path of a dbt project), return that directory instead. """ project_directory = os.getcwd() root_path = os.path.abspath(os.sep) if custom_project_directory: custom_directory_project_file = assemble_path(custom_project_directory, DBT_PROJECT_FILE) if os.path.exists(custom_directory_project_file): return custom_project_directory else: raise DbteaException( name="invalid-custom-dbt-project-directory", title="No dbt project found at supplied custom directory", detail= "No dbt_project.yml file found at supplied custom project directory {}, confirm your " "custom project directory is valid".format( custom_project_directory), ) while project_directory != root_path: dbt_project_file = assemble_path(project_directory, DBT_PROJECT_FILE) if os.path.exists(dbt_project_file): logger.info("Running dbtea against dbt project at path: {}".format( project_directory)) return project_directory project_directory = os.path.dirname(project_directory) raise DbteaException( name="missing-dbt-project", title="No dbt project found", detail= "No dbt_project.yml file found in current or any direct parent paths. You need to run dbtea " "from within dbt project in order to use its tooling, or supply a custom project directory", )
def create_pull_request( organization_name: str, repository_name: str, git_token: str, head_branch: str, base_branch: str = "main", title: str = "dbtea updates", description: str = "dbtea metadata refresh", ): """Creates the pull request for the head_branch against the base_branch""" github_pulls_url = utils.assemble_path(GITHUB_API_URL, "repos", organization_name, repository_name, "pulls") headers = { "Authorization": "token {}".format(git_token), "Content-Type": "application/json", } payload = { "title": title, "body": description, "head": head_branch, "base": base_branch, } response = requests.post(github_pulls_url, headers=headers, data=json.dumps(payload)) if response.status_code >= 400: raise GitException( name="pull-request-create-fail", provider="github", title="Error Creating GitHub Pull Request via API", status=response.status_code, detail=response.json().get("errors"), response=response, ) logger.info("Created pull request for branch {} at URL: {}".format( head_branch, response.json().get("html_url")))
def wrapper(*args, **kwargs): try: return function(*args, **kwargs) except DbteaException as error: logger.error( f"\n{error}\n\n" + "For support, please create an issue at https://github.com/spectacles-ci/spectacles/issues" + "\n") sys.exit(error.exit_code) except KeyboardInterrupt as error: logger.debug(error, exc_info=True) logger.info("Spectacles was manually interrupted.") sys.exit(1) except Exception as error: logger.debug(error, exc_info=True) logger.error( f'\nEncountered unexpected {error.__class__.__name__}: "{error}"\n' f"Full error traceback logged to file.\n\n" + "For support, please create an issue at https://github.com/spectacles-ci/spectacles/issues" + "\n") sys.exit(1)
def convert_to_lookml_data_type(field_name: str, field_type: str, include_postal_code: bool = False) -> str: """""" if include_postal_code and field_name in LOOKML_ZIPCODE_FIELD_NAMES: lookml_data_type = LOOKML_TYPE_ZIP elif field_type in LOOKML_YESNO_DATA_TYPES: lookml_data_type = LOOKML_TYPE_BOOL elif field_type in LOOKML_TIME_DATA_TYPES: lookml_data_type = LOOKML_TYPE_DATETIME elif field_type in LOOKML_DATE_DATA_TYPES: lookml_data_type = LOOKML_TYPE_DATE elif field_type in LOOKML_NUMBER_DATA_TYPES: lookml_data_type = LOOKML_TYPE_NUMBER else: lookml_data_type = LOOKML_TYPE_STRING logger.debug( f"Field: {field_name} with data type: {field_type} was mapped to LookML type: {lookml_data_type}" ) return lookml_data_type
def _dbt_cli_runner(self, input_command_as_list: list, *args, **kwargs): """Run dbt CLI command based on input options.""" arg_flags = list() kwarg_flags = list() if args: for flag in args: arg_flags.append(f"--{flag}") input_command_as_list.extend(arg_flags) if kwargs: for flag, flag_value in kwargs.items(): kwarg_flags.append(f"--{flag} '{flag_value}'") input_command_as_list.extend(kwarg_flags) input_command = " ".join(input_command_as_list) logger.info("Running dbt command: ".format(" ".join(input_command_as_list))) return utils.run_cli_command( input_command, working_directory=self.project_root, use_shell=True, output_as_text=True, capture_output=True, )
def run_dbt_deps(self, require_codegen: bool = False, *args, **kwargs) -> None: """Run `dbt deps` command to install dbt project dependencies; the `codegen` package must be included.""" project_packages_file = utils.assemble_path(self.project_root, "packages.yml") if require_codegen: if not utils.file_exists(project_packages_file): raise FileExistsError( "You must have a packages.yml file specified in your project" ) package_data = utils.parse_yaml_file(project_packages_file) package_list = [ entry.get("package") for entry in package_data.get("packages", {}) ] if "fishtown-analytics/codegen" not in package_list: raise ValueError( "You have not brought the codegen dbt package into your project! You must include the " "package 'fishtown-analytics/codegen' in your `packages.yml` file to codegen in bulk." ) logger.info("Fetching dbt project package dependencies...") result = self._dbt_cli_runner(DBT_DEPS, *args, **kwargs) logger.info(result)
def run_dbt_parse(self, *args, **kwargs) -> None: """Run `dbt parse` command to provide information on performance.""" logger.info("Parsing dbt project for performance details...") result = self._dbt_cli_runner(DBT_PARSE, *args, **kwargs) logger.info(result)
def run_dbt_list(self, *args, **kwargs) -> None: """Run `dbt list` (ls) command to list all resources within the dbt project.""" logger.info("Listing dbt project resources...") result = self._dbt_cli_runner(DBT_LIST, *args, **kwargs) logger.info(result)
def run_dbt_run(self, *args, **kwargs) -> None: """Run `dbt run` command to run dbt models.""" logger.info("Running dbt models...") result = self._dbt_cli_runner(DBT_RUN, *args, **kwargs) logger.info(result)
def run_dbt_rpc(self, *args, **kwargs) -> None: """Run `dbt rpc` command to spin up an RPC server.""" logger.info("Starting dbt RPC server...") result = self._dbt_cli_runner(DBT_RPC, *args, **kwargs) logger.info(result)
def run_dbt_snapshot(self, *args, **kwargs) -> None: """Run `dbt snapshot` command to execute dbt snapshots.""" logger.info("Running dbt snapshots...") result = self._dbt_cli_runner(DBT_SNAPSHOT, *args, **kwargs) logger.info(result)
def run_dbt_seed(self, *args, **kwargs) -> None: """Run `dbt seed` command to upload seed data.""" logger.info("Uploading dbt seed data files to data warehouse...") result = self._dbt_cli_runner(DBT_SEED, *args, **kwargs) logger.info(result)
def run_dbt_source_snapshot_freshness(self, *args, **kwargs) -> None: """Run `dbt source snapshot-freshness` command to get freshness of data sources.""" logger.info("Checking freshness of data source tables...") result = self._dbt_cli_runner(DBT_SOURCE_SNAPSHOT_FRESHNESS, *args, **kwargs) logger.info(result)
def create_lookml_model( model_name: str, output_to: str = "stdout", connection: str = None, label: str = None, includes: list = None, explores: List[dict] = None, access_grants: List[dict] = None, tests: List[dict] = None, datagroups: List[dict] = None, map_layers: List[dict] = None, named_value_formats: List[dict] = None, fiscal_month_offset: int = None, persist_for: str = None, persist_with: str = None, week_start_day: str = None, case_sensitive: bool = True, output_directory: str = None, ) -> Optional[str]: """""" assembled_model_dict = dict() logger.info("Creating LookML Model: {}".format(model_name)) # Validate inputs if output_to not in OUTPUT_TO_OPTIONS: raise DbteaException( name="invalid-lookml-model-properties", title="Invalid LookML Model Properties", detail="You must choose a valid output_to option from the following: {}".format( OUTPUT_TO_OPTIONS ), ) if output_to == "file" and not output_directory: raise DbteaException( name="missing-output-directory", title="No Model Output Directory Specified", detail="You must include an output_directory param if outputting model to a file", ) # Add optional model options if connection: assembled_model_dict["connection"] = connection if label: assembled_model_dict["label"] = label if includes: assembled_model_dict["includes"] = includes if persist_for: assembled_model_dict["persist_for"] = persist_for if persist_with: assembled_model_dict["persist_with"] = persist_with if fiscal_month_offset: assembled_model_dict["fiscal_month_offset"] = fiscal_month_offset if week_start_day: assembled_model_dict["week_start_day"] = week_start_day if not case_sensitive: assembled_model_dict["case_sensitive"] = "no" # Add body of Model if datagroups: assembled_model_dict["datagroups"] = datagroups if access_grants: assembled_model_dict["access_grants"] = access_grants if explores: assembled_model_dict["explores"] = explores if named_value_formats: assembled_model_dict["named_value_formats"] = named_value_formats if map_layers: assembled_model_dict["map_layers"] = map_layers if tests: assembled_model_dict["tests"] = tests if output_to == "stdout": return lkml.dump(assembled_model_dict) else: model_file_name = utils.assemble_path( output_directory, model_name + ".model.lkml" ) with open(model_file_name, "w") as output_stream: output_stream.write(lkml.dump(assembled_model_dict))
def assemble_model( cls, model_name: str, directory_path: str, connection: str = None, label: str = None, includes: list = None, explores: List[dict] = None, access_grants: List[dict] = None, tests: List[dict] = None, datagroups: List[dict] = None, map_layers: List[dict] = None, named_value_formats: List[dict] = None, fiscal_month_offset: int = None, persist_for: str = None, persist_with: str = None, week_start_day: str = None, case_sensitive: bool = True, ): """""" assembled_model_dict = dict() logger.info("Creating LookML Model: {}".format(model_name)) # Add optional model options if connection: assembled_model_dict["connection"] = connection if label: assembled_model_dict["label"] = label if includes: assembled_model_dict["includes"] = includes if persist_for: assembled_model_dict["persist_for"] = persist_for if persist_with: assembled_model_dict["persist_with"] = persist_with if fiscal_month_offset: assembled_model_dict["fiscal_month_offset"] = fiscal_month_offset if week_start_day: assembled_model_dict["week_start_day"] = week_start_day if not case_sensitive: assembled_model_dict["case_sensitive"] = "no" # Add body of Model if datagroups: assembled_model_dict["datagroups"] = datagroups if access_grants: assembled_model_dict["access_grants"] = access_grants if explores: assembled_model_dict["explores"] = explores if named_value_formats: assembled_model_dict["named_value_formats"] = named_value_formats if map_layers: assembled_model_dict["map_layers"] = map_layers if tests: assembled_model_dict["tests"] = tests return super().__init__( model_name, "model", directory_path=directory_path, lookml_data=assembled_model_dict, )
def run_dbt_test(self, *args, **kwargs) -> None: """Run `dbt test` command to run dbt models.""" logger.info("Running dbt tests to validate models...") result = self._dbt_cli_runner(DBT_TEST, *args, **kwargs) logger.info(result)
def run_dbt_clean(self, *args, **kwargs) -> None: """Run `dbt clean` command to remove clean target folders (usually dbt_modules, target) from dbt project.""" logger.info("Removing dbt clean target folders from dbt project...") result = self._dbt_cli_runner(DBT_CLEAN, *args, **kwargs) logger.info(result)
def run_dbt_compile(self, *args, **kwargs) -> None: """Run `dbt compile` command to compile dbt models.""" logger.info("Compiling dbt models...") result = self._dbt_cli_runner(DBT_COMPILE, *args, **kwargs) logger.info(result)
def run_dbt_init(self, *args, **kwargs) -> None: """Run `dbt init` command to create a base dbt project.""" logger.info("Initialize a base dbt project...") result = self._dbt_cli_runner(DBT_INIT, *args, **kwargs) logger.info(result)