Ejemplo n.º 1
0
def parse_lookml_file(lookml_file_name: str) -> dict:
    """Parse a LookML file into a dictionary with keys for each of its primary properties and a list of values."""
    logger.info("Parsing data from LookML file {}".format(lookml_file_name))
    with open(lookml_file_name, "r") as lookml_file_stream:
        lookml_data = lkml.load(lookml_file_stream)

    return lookml_data
Ejemplo n.º 2
0
 def run_dbt_debug(self, *args, **kwargs) -> None:
     """Run `dbt debug` command to check if your dbt_project.yml and profiles.yml files are properly configured."""
     logger.info(
         "Confirming proper dbt project setup, profile and warehouse access..."
     )
     result = self._dbt_cli_runner(DBT_DEBUG, *args, **kwargs)
     logger.info(result)
Ejemplo n.º 3
0
def run_cli_command(
    command: Union[List[str], str],
    working_directory: str,
    use_shell: bool = False,
    output_as_text: bool = True,
    capture_output: bool = True,
    **kwargs,
):
    """Execute command line subprocess"""
    result = subprocess.run(
        command,
        shell=use_shell,
        cwd=working_directory,
        text=output_as_text,
        capture_output=capture_output,
        **kwargs,
    )

    if result.stderr:
        raise subprocess.CalledProcessError(returncode=result.returncode,
                                            cmd=result.args,
                                            stderr=result.stderr)
    if result.stdout:
        if "Encountered an error" in result.stdout:  # Handle for dbt stdout errors
            logger.error("dbt Error: {}".format(result.stdout))
            raise subprocess.CalledProcessError(returncode=result.returncode,
                                                cmd=result.args,
                                                stderr=result.stdout)

        logger.debug("Command Result:\n{}".format(result.stdout))

    return result.stdout
Ejemplo n.º 4
0
    def assemble_view(
        cls,
        view_name: str,
        sql_table_name: str = None,
        derived_table: str = None,
        dimensions: List[dict] = None,
        dimension_groups: List[dict] = None,
        measures: List[dict] = None,
        sets: List[dict] = None,
        parameters: List[dict] = None,
        label: str = None,
        required_access_grants: list = None,
        extends: str = None,
        extension_is_required: bool = False,
        include_suggestions: bool = True,
    ):
        assembled_view_dict = {"view": {"name": view_name}}
        logger.info("Creating LookML View: {}".format(view_name))

        # Validate inputs
        if not sql_table_name and not derived_table and not extends:
            raise DbteaException(
                name="missing-lookml-view-properties",
                title="Missing Necessary LookML View Properties",
                detail="Created LookML Views must specify either a `sql_table_name`, `derived_table` or `extends` in order "
                "to properly specify the view source",
            )

        # Add optional view options as needed
        if label:
            assembled_view_dict["view"]["label"] = label
        if extends:
            assembled_view_dict["view"]["extends"] = extends
        if extension_is_required:
            assembled_view_dict["view"]["extension"] = "required"
        if sql_table_name:
            assembled_view_dict["view"]["sql_table_name"] = sql_table_name
        if derived_table:
            assembled_view_dict["view"]["derived_table"] = derived_table
        if required_access_grants:
            assembled_view_dict["view"][
                "required_access_grants"
            ] = required_access_grants
        if not include_suggestions:
            assembled_view_dict["view"]["suggestions"] = "no"

        # Add body of View
        if parameters:
            assembled_view_dict["view"]["parameters"] = parameters
        if dimensions:
            assembled_view_dict["view"]["dimensions"] = dimensions
        if dimension_groups:
            assembled_view_dict["view"]["dimension_groups"] = dimension_groups
        if measures:
            assembled_view_dict["view"]["measures"] = measures
        if sets:
            assembled_view_dict["view"]["sets"] = sets

        return lkml.dump(assembled_view_dict)
Ejemplo n.º 5
0
def dbt_model_schemas_to_lookml_views(dbt_models_data: List[dict]) -> dict:
    """"""
    lookml_views_dbt_data = dbt_models_data

    for model_data in lookml_views_dbt_data:
        if "columns" in model_data:
            model_data["dimensions"] = list()
            model_data["dimension_groups"] = list()
            invalid_column_properties = list()

            for index, column_data in enumerate(model_data["columns"]):
                for column_property_key in column_data.keys():
                    if column_property_key not in VALID_LOOKML_DIMENSION_PROPERTIES:
                        invalid_column_properties.append(column_property_key)
                for invalid_column_property in invalid_column_properties:
                    logger.debug(
                        "Removing property invalid for LookML for dimension {}: {}".format(
                            model_data.get("name"), invalid_column_property
                        )
                    )
                    if model_data["columns"][index].get(invalid_column_property):
                        del model_data["columns"][index][invalid_column_property]

                if column_data.get("type") in LOOKML_DIMENSION_GROUP_TYPES:
                    column_data.update({"sql": "${TABLE}." + column_data.get("name")})
                    model_data["dimension_groups"].append(column_data)
                else:
                    column_data.update({"sql": "${TABLE}." + column_data.get("name")})
                    model_data["dimensions"].append(column_data)
                # model_data["dimensions"][index]["sql"] = "${TABLE}." + column_data.get("name")

            del model_data["columns"]

        if "alias" in model_data:
            model_data["sql_table_name"] = model_data["alias"]
        else:
            model_data["sql_table_name"] = model_data.get(
                "name"
            )  # TODO: Needs to be fully qualified name]

        # Process model-level meta fields
        if "meta" in model_data:
            for meta_key, meta_value in model_data["meta"].items():
                if meta_key[:7] == "looker_" or meta_key[:7] == "lookml_":
                    model_data[meta_key[7:]] = model_data["meta"][meta_key]

        invalid_properties = list()
        for model_property in model_data.keys():
            if model_property not in VALID_LOOKML_VIEW_PROPERTIES:
                invalid_properties.append(model_property)
        for invalid_property in invalid_properties:
            logger.debug(
                "Removing property invalid for LookML for view {}: {}".format(
                    model_data.get("name"), invalid_property
                )
            )
            del model_data[invalid_property]

    return {"views": lookml_views_dbt_data}
Ejemplo n.º 6
0
 def write_data_to_file(self, replace_if_exists: bool = True) -> None:
     """"""
     if self._config_file_exists() and self._config_data_exists(
     ) and not replace_if_exists:
         logger.warning(
             "Dbtea config file already exists, ignoring write config data to file step"
         )
     else:
         logger.info("Creating {} file at path: {}".format(
             self.file_name, self.config_dir))
         utils.write_to_yaml_file(self.config_data,
                                  self.config_name_and_path)
Ejemplo n.º 7
0
    def run_dbt_run_operation(
        self, macro_name: str, macro_args: dict = None, *args, **kwargs
    ) -> None:
        """Run `dbt run-operation` command to run dbt macros."""
        logger.info("Executing dbt macro operation {}...".format(macro_name))
        operation_with_macro = DBT_RUN_OPERATION.copy()
        operation_with_macro.append(macro_name)
        if macro_args:
            operation_with_macro.append(f"--args '{macro_args}'")

        result = self._dbt_cli_runner(operation_with_macro, *args, **kwargs)
        logger.info(result)
Ejemplo n.º 8
0
    def write_looker_config(self):
        """"""
        logger.info("Writing Looker config file at path: {}".format(
            self.looker_config_path))
        self.looker_config.add_section(self.looker_config_section)
        for key, value in self.config_data.get(self.dbt_project, {}).items():
            if key.startswith("looker_sdk_"):
                self.looker_config.set(self.looker_config_section,
                                       str(key.replace("looker_sdk_", "")),
                                       str(value))

        with open(self.looker_config_path, "w") as config_stream:
            self.looker_config.write(config_stream)
Ejemplo n.º 9
0
    def timed_function(*args, **kwargs):
        start_time = timeit.default_timer()
        try:
            result = fn(*args, **kwargs)
        finally:
            elapsed_time = timeit.default_timer() - start_time
            elapsed_formatted = human_readable(elapsed_time)
            message_detail = get_detail(fn.__name__)

            logger.info(
                f"Completed {message_detail}operation in {elapsed_formatted}.\n"
            )
        return result
Ejemplo n.º 10
0
 def read_data_from_file(self, local_lookml_project_path: str) -> dict:
     """Parse a LookML file into a dictionary with keys for each of its primary properties and a list of values."""
     logger.info(
         "Parsing data from local LookML file {}".format(
             self.lookml_file_name_and_path
         )
     )
     with open(
         utils.assemble_path(
             local_lookml_project_path, self.lookml_file_name_and_path
         ),
         "r",
     ) as lookml_file:
         return lkml.load(lookml_file)
Ejemplo n.º 11
0
 def _parse_artifact(self, artifact_file: str):
     """"""
     if artifact_file not in ARTIFACT_DATA_FILES.values():
         logger.warning(
             "You have specified an artifact file which is not in the list of known dbt artifacts"
         )
     artifact_path = utils.assemble_path(
         self.project_root, self.target_path, artifact_file
     )
     if not utils.file_exists(artifact_path):
         raise DbteaException(
             name="artifact-file-missing",
             title="Artifact file {} is missing".format(artifact_file),
             detail="There is no artifact {} at path {}. You may not have yet generated this artifact and "
             "need to run models, source freshness or docs generation".format(
                 artifact_file, artifact_path
             ),
         )
     return utils.parse_json_file(artifact_path)
Ejemplo n.º 12
0
def fetch_dbt_project_directory(custom_project_directory: str = None) -> str:
    """Return path to the base of the closest dbt project by traversing from current working directory backwards in
    order to find a dbt_project.yml file.

    If an optional custom project path is specified (which should be a full path to the base project path of a dbt
    project), return that directory instead.
    """
    project_directory = os.getcwd()
    root_path = os.path.abspath(os.sep)

    if custom_project_directory:
        custom_directory_project_file = assemble_path(custom_project_directory,
                                                      DBT_PROJECT_FILE)
        if os.path.exists(custom_directory_project_file):
            return custom_project_directory
        else:
            raise DbteaException(
                name="invalid-custom-dbt-project-directory",
                title="No dbt project found at supplied custom directory",
                detail=
                "No dbt_project.yml file found at supplied custom project directory {}, confirm your "
                "custom project directory is valid".format(
                    custom_project_directory),
            )

    while project_directory != root_path:
        dbt_project_file = assemble_path(project_directory, DBT_PROJECT_FILE)
        if os.path.exists(dbt_project_file):
            logger.info("Running dbtea against dbt project at path: {}".format(
                project_directory))
            return project_directory

        project_directory = os.path.dirname(project_directory)

    raise DbteaException(
        name="missing-dbt-project",
        title="No dbt project found",
        detail=
        "No dbt_project.yml file found in current or any direct parent paths. You need to run dbtea "
        "from within dbt project in order to use its tooling, or supply a custom project directory",
    )
Ejemplo n.º 13
0
def create_pull_request(
    organization_name: str,
    repository_name: str,
    git_token: str,
    head_branch: str,
    base_branch: str = "main",
    title: str = "dbtea updates",
    description: str = "dbtea metadata refresh",
):
    """Creates the pull request for the head_branch against the base_branch"""
    github_pulls_url = utils.assemble_path(GITHUB_API_URL, "repos",
                                           organization_name, repository_name,
                                           "pulls")
    headers = {
        "Authorization": "token {}".format(git_token),
        "Content-Type": "application/json",
    }

    payload = {
        "title": title,
        "body": description,
        "head": head_branch,
        "base": base_branch,
    }

    response = requests.post(github_pulls_url,
                             headers=headers,
                             data=json.dumps(payload))
    if response.status_code >= 400:
        raise GitException(
            name="pull-request-create-fail",
            provider="github",
            title="Error Creating GitHub Pull Request via API",
            status=response.status_code,
            detail=response.json().get("errors"),
            response=response,
        )

    logger.info("Created pull request for branch {} at URL: {}".format(
        head_branch,
        response.json().get("html_url")))
Ejemplo n.º 14
0
 def wrapper(*args, **kwargs):
     try:
         return function(*args, **kwargs)
     except DbteaException as error:
         logger.error(
             f"\n{error}\n\n" +
             "For support, please create an issue at https://github.com/spectacles-ci/spectacles/issues"
             + "\n")
         sys.exit(error.exit_code)
     except KeyboardInterrupt as error:
         logger.debug(error, exc_info=True)
         logger.info("Spectacles was manually interrupted.")
         sys.exit(1)
     except Exception as error:
         logger.debug(error, exc_info=True)
         logger.error(
             f'\nEncountered unexpected {error.__class__.__name__}: "{error}"\n'
             f"Full error traceback logged to file.\n\n" +
             "For support, please create an issue at https://github.com/spectacles-ci/spectacles/issues"
             + "\n")
         sys.exit(1)
Ejemplo n.º 15
0
def convert_to_lookml_data_type(field_name: str,
                                field_type: str,
                                include_postal_code: bool = False) -> str:
    """"""
    if include_postal_code and field_name in LOOKML_ZIPCODE_FIELD_NAMES:
        lookml_data_type = LOOKML_TYPE_ZIP
    elif field_type in LOOKML_YESNO_DATA_TYPES:
        lookml_data_type = LOOKML_TYPE_BOOL
    elif field_type in LOOKML_TIME_DATA_TYPES:
        lookml_data_type = LOOKML_TYPE_DATETIME
    elif field_type in LOOKML_DATE_DATA_TYPES:
        lookml_data_type = LOOKML_TYPE_DATE
    elif field_type in LOOKML_NUMBER_DATA_TYPES:
        lookml_data_type = LOOKML_TYPE_NUMBER
    else:
        lookml_data_type = LOOKML_TYPE_STRING

    logger.debug(
        f"Field: {field_name} with data type: {field_type} was mapped to LookML type: {lookml_data_type}"
    )
    return lookml_data_type
Ejemplo n.º 16
0
    def _dbt_cli_runner(self, input_command_as_list: list, *args, **kwargs):
        """Run dbt CLI command based on input options."""
        arg_flags = list()
        kwarg_flags = list()
        if args:
            for flag in args:
                arg_flags.append(f"--{flag}")
            input_command_as_list.extend(arg_flags)
        if kwargs:
            for flag, flag_value in kwargs.items():
                kwarg_flags.append(f"--{flag} '{flag_value}'")
            input_command_as_list.extend(kwarg_flags)

        input_command = " ".join(input_command_as_list)

        logger.info("Running dbt command: ".format(" ".join(input_command_as_list)))
        return utils.run_cli_command(
            input_command,
            working_directory=self.project_root,
            use_shell=True,
            output_as_text=True,
            capture_output=True,
        )
Ejemplo n.º 17
0
    def run_dbt_deps(self, require_codegen: bool = False, *args, **kwargs) -> None:
        """Run `dbt deps` command to install dbt project dependencies; the `codegen` package must be included."""
        project_packages_file = utils.assemble_path(self.project_root, "packages.yml")

        if require_codegen:
            if not utils.file_exists(project_packages_file):
                raise FileExistsError(
                    "You must have a packages.yml file specified in your project"
                )

            package_data = utils.parse_yaml_file(project_packages_file)

            package_list = [
                entry.get("package") for entry in package_data.get("packages", {})
            ]
            if "fishtown-analytics/codegen" not in package_list:
                raise ValueError(
                    "You have not brought the codegen dbt package into your project! You must include the "
                    "package 'fishtown-analytics/codegen' in your `packages.yml` file to codegen in bulk."
                )

        logger.info("Fetching dbt project package dependencies...")
        result = self._dbt_cli_runner(DBT_DEPS, *args, **kwargs)
        logger.info(result)
Ejemplo n.º 18
0
 def run_dbt_parse(self, *args, **kwargs) -> None:
     """Run `dbt parse` command to provide information on performance."""
     logger.info("Parsing dbt project for performance details...")
     result = self._dbt_cli_runner(DBT_PARSE, *args, **kwargs)
     logger.info(result)
Ejemplo n.º 19
0
 def run_dbt_list(self, *args, **kwargs) -> None:
     """Run `dbt list` (ls) command to list all resources within the dbt project."""
     logger.info("Listing dbt project resources...")
     result = self._dbt_cli_runner(DBT_LIST, *args, **kwargs)
     logger.info(result)
Ejemplo n.º 20
0
 def run_dbt_run(self, *args, **kwargs) -> None:
     """Run `dbt run` command to run dbt models."""
     logger.info("Running dbt models...")
     result = self._dbt_cli_runner(DBT_RUN, *args, **kwargs)
     logger.info(result)
Ejemplo n.º 21
0
 def run_dbt_rpc(self, *args, **kwargs) -> None:
     """Run `dbt rpc` command to spin up an RPC server."""
     logger.info("Starting dbt RPC server...")
     result = self._dbt_cli_runner(DBT_RPC, *args, **kwargs)
     logger.info(result)
Ejemplo n.º 22
0
 def run_dbt_snapshot(self, *args, **kwargs) -> None:
     """Run `dbt snapshot` command to execute dbt snapshots."""
     logger.info("Running dbt snapshots...")
     result = self._dbt_cli_runner(DBT_SNAPSHOT, *args, **kwargs)
     logger.info(result)
Ejemplo n.º 23
0
 def run_dbt_seed(self, *args, **kwargs) -> None:
     """Run `dbt seed` command to upload seed data."""
     logger.info("Uploading dbt seed data files to data warehouse...")
     result = self._dbt_cli_runner(DBT_SEED, *args, **kwargs)
     logger.info(result)
Ejemplo n.º 24
0
 def run_dbt_source_snapshot_freshness(self, *args, **kwargs) -> None:
     """Run `dbt source snapshot-freshness` command to get freshness of data sources."""
     logger.info("Checking freshness of data source tables...")
     result = self._dbt_cli_runner(DBT_SOURCE_SNAPSHOT_FRESHNESS, *args, **kwargs)
     logger.info(result)
Ejemplo n.º 25
0
def create_lookml_model(
    model_name: str,
    output_to: str = "stdout",
    connection: str = None,
    label: str = None,
    includes: list = None,
    explores: List[dict] = None,
    access_grants: List[dict] = None,
    tests: List[dict] = None,
    datagroups: List[dict] = None,
    map_layers: List[dict] = None,
    named_value_formats: List[dict] = None,
    fiscal_month_offset: int = None,
    persist_for: str = None,
    persist_with: str = None,
    week_start_day: str = None,
    case_sensitive: bool = True,
    output_directory: str = None,
) -> Optional[str]:
    """"""
    assembled_model_dict = dict()
    logger.info("Creating LookML Model: {}".format(model_name))

    # Validate inputs
    if output_to not in OUTPUT_TO_OPTIONS:
        raise DbteaException(
            name="invalid-lookml-model-properties",
            title="Invalid LookML Model Properties",
            detail="You must choose a valid output_to option from the following: {}".format(
                OUTPUT_TO_OPTIONS
            ),
        )
    if output_to == "file" and not output_directory:
        raise DbteaException(
            name="missing-output-directory",
            title="No Model Output Directory Specified",
            detail="You must include an output_directory param if outputting model to a file",
        )

    # Add optional model options
    if connection:
        assembled_model_dict["connection"] = connection
    if label:
        assembled_model_dict["label"] = label
    if includes:
        assembled_model_dict["includes"] = includes
    if persist_for:
        assembled_model_dict["persist_for"] = persist_for
    if persist_with:
        assembled_model_dict["persist_with"] = persist_with
    if fiscal_month_offset:
        assembled_model_dict["fiscal_month_offset"] = fiscal_month_offset
    if week_start_day:
        assembled_model_dict["week_start_day"] = week_start_day
    if not case_sensitive:
        assembled_model_dict["case_sensitive"] = "no"

    # Add body of Model
    if datagroups:
        assembled_model_dict["datagroups"] = datagroups
    if access_grants:
        assembled_model_dict["access_grants"] = access_grants
    if explores:
        assembled_model_dict["explores"] = explores
    if named_value_formats:
        assembled_model_dict["named_value_formats"] = named_value_formats
    if map_layers:
        assembled_model_dict["map_layers"] = map_layers
    if tests:
        assembled_model_dict["tests"] = tests

    if output_to == "stdout":
        return lkml.dump(assembled_model_dict)
    else:
        model_file_name = utils.assemble_path(
            output_directory, model_name + ".model.lkml"
        )
        with open(model_file_name, "w") as output_stream:
            output_stream.write(lkml.dump(assembled_model_dict))
Ejemplo n.º 26
0
    def assemble_model(
        cls,
        model_name: str,
        directory_path: str,
        connection: str = None,
        label: str = None,
        includes: list = None,
        explores: List[dict] = None,
        access_grants: List[dict] = None,
        tests: List[dict] = None,
        datagroups: List[dict] = None,
        map_layers: List[dict] = None,
        named_value_formats: List[dict] = None,
        fiscal_month_offset: int = None,
        persist_for: str = None,
        persist_with: str = None,
        week_start_day: str = None,
        case_sensitive: bool = True,
    ):
        """"""
        assembled_model_dict = dict()
        logger.info("Creating LookML Model: {}".format(model_name))

        # Add optional model options
        if connection:
            assembled_model_dict["connection"] = connection
        if label:
            assembled_model_dict["label"] = label
        if includes:
            assembled_model_dict["includes"] = includes
        if persist_for:
            assembled_model_dict["persist_for"] = persist_for
        if persist_with:
            assembled_model_dict["persist_with"] = persist_with
        if fiscal_month_offset:
            assembled_model_dict["fiscal_month_offset"] = fiscal_month_offset
        if week_start_day:
            assembled_model_dict["week_start_day"] = week_start_day
        if not case_sensitive:
            assembled_model_dict["case_sensitive"] = "no"

        # Add body of Model
        if datagroups:
            assembled_model_dict["datagroups"] = datagroups
        if access_grants:
            assembled_model_dict["access_grants"] = access_grants
        if explores:
            assembled_model_dict["explores"] = explores
        if named_value_formats:
            assembled_model_dict["named_value_formats"] = named_value_formats
        if map_layers:
            assembled_model_dict["map_layers"] = map_layers
        if tests:
            assembled_model_dict["tests"] = tests

        return super().__init__(
            model_name,
            "model",
            directory_path=directory_path,
            lookml_data=assembled_model_dict,
        )
Ejemplo n.º 27
0
 def run_dbt_test(self, *args, **kwargs) -> None:
     """Run `dbt test` command to run dbt models."""
     logger.info("Running dbt tests to validate models...")
     result = self._dbt_cli_runner(DBT_TEST, *args, **kwargs)
     logger.info(result)
Ejemplo n.º 28
0
 def run_dbt_clean(self, *args, **kwargs) -> None:
     """Run `dbt clean` command to remove clean target folders (usually dbt_modules, target) from dbt project."""
     logger.info("Removing dbt clean target folders from dbt project...")
     result = self._dbt_cli_runner(DBT_CLEAN, *args, **kwargs)
     logger.info(result)
Ejemplo n.º 29
0
 def run_dbt_compile(self, *args, **kwargs) -> None:
     """Run `dbt compile` command to compile dbt models."""
     logger.info("Compiling dbt models...")
     result = self._dbt_cli_runner(DBT_COMPILE, *args, **kwargs)
     logger.info(result)
Ejemplo n.º 30
0
 def run_dbt_init(self, *args, **kwargs) -> None:
     """Run `dbt init` command to create a base dbt project."""
     logger.info("Initialize a base dbt project...")
     result = self._dbt_cli_runner(DBT_INIT, *args, **kwargs)
     logger.info(result)