Exemple #1
0
 def _get_query_results(self,
                        query_task_ids: List[str]) -> List[QueryResult]:
     """Returns ID, status, and error message for all query tasks"""
     query_results = []
     results = self.client.get_query_task_multi_results(query_task_ids)
     for query_task_id, result in results.items():
         status = result["status"]
         if status not in ("complete", "error", "running", "added",
                           "expired"):
             raise SpectaclesException(
                 name="unexpected-query-result-status",
                 title="Encountered an unexpected query result status.",
                 detail=(f"Query result status '{status}' was returned "
                         "by the Looker API."),
             )
         logger.debug(f"Query task {query_task_id} status is: {status}")
         query_result = QueryResult(query_task_id, status)
         if status == "error":
             try:
                 error_details = self._extract_error_details(result)
             except (KeyError, TypeError, IndexError) as error:
                 logger.debug(
                     f"Exiting because of unexpected query result format: {result}"
                 )
                 raise SpectaclesException(
                     name="unexpected-query-result-format",
                     title="Encountered an unexpected query result format.",
                     detail=
                     f"Unable to extract error details. The unexpected result has been logged.",
                 ) from error
             else:
                 query_result.error = error_details
         query_results.append(query_result)
     return query_results
Exemple #2
0
    def parse_selectors(selectors: List[str]) -> DefaultDict[str, set]:
        """Parses explore selectors with the format 'model_name.explore_name'.

        Args:
            selectors: List of selector strings in 'model_name.explore_name' format.
                The '*' wildcard selects all models or explores. For instance,
                'model_name.*' would select all explores in the 'model_name' model.

        Returns:
            DefaultDict[str, set]: A hierarchy of selected model names (keys) and
                explore names (values).

        """
        selection: DefaultDict = defaultdict(set)
        for selector in selectors:
            try:
                model, explore = selector.split(".")
            except ValueError:
                raise SpectaclesException(
                    f"Explore selector '{selector}' is not valid.\n"
                    "Instead, use the format 'model_name.explore_name'. "
                    f"Use 'model_name.*' to select all explores in a model.")
            else:
                selection[model].add(explore)
        return selection
Exemple #3
0
    def __call__(self, parser, namespace, values, option_string):
        """Populates argument defaults with values from the config file.

        Args:
            parser: Parent argparse parser that is calling the action.
            namespace: Object where parsed values will be set.
            values: Parsed values to be set to the namespace.
            option_string: Argument string, e.g. "--optional".

        """
        config = self.parse_config(path=values)
        for dest, value in config.items():
            for action in parser._actions:
                if dest == action.dest:
                    """Required actions that are fulfilled by config are no longer
                    required from the command line."""
                    action.required = False
                    # Override default if not previously set by an environment variable.
                    if not isinstance(action,
                                      EnvVarAction) or not os.environ.get(
                                          action.env_var):
                        setattr(namespace, dest, value)
                    break
            else:
                raise SpectaclesException(
                    f"'{dest}' in {values} is not a valid configuration parameter."
                )
        parser.set_defaults(**config)
Exemple #4
0
 def _create_and_run(self, mode: QueryMode = "batch") -> None:
     """Runs a single validation using a specified mode"""
     queries: List[Query] = []
     try:
         queries = self._create_queries(mode)
         self._run_queries(queries)
     except KeyboardInterrupt:
         logger.info(
             "\n\n" + "Please wait, asking Looker to cancel any running queries..."
         )
         query_tasks = self.get_running_query_tasks()
         self._cancel_queries(query_tasks)
         if query_tasks:
             message = (
                 f"Attempted to cancel {len(query_tasks)} running "
                 f"{'query' if len(query_tasks) == 1 else 'queries'}."
             )
         else:
             message = (
                 "No queries were running at the time so nothing was cancelled."
             )
         raise SpectaclesException(
             name="validation-keyboard-interrupt",
             title="SQL validation was manually interrupted.",
             detail=message,
         )
Exemple #5
0
    def __init__(
        self,
        base_url: str,
        client_id: str,
        client_secret: str,
        port: int = 19999,
        api_version: float = 3.1,
    ):
        supported_api_versions = [3.1]
        if api_version not in supported_api_versions:
            raise SpectaclesException(
                name="unsupported-api-version",
                title="Specified API version is not supported.",
                detail=(
                    f"Version '{api_version}' is not supported. "
                    "Please use one of these supported versions instead: "
                    f"{', '.join(str(ver) for ver in sorted(supported_api_versions))}"
                ),
            )

        self.base_url: str = base_url.rstrip("/")
        self.api_url: str = f"{self.base_url}:{port}/api/{api_version}/"
        self.client_id: str = client_id
        self.client_secret: str = client_secret
        self.api_version: float = api_version
        self.access_token: Optional[AccessToken] = None
        self.session: requests.Session = requests.Session()

        self.authenticate()
Exemple #6
0
 def __init__(self, client: LookerClient, project: str):
     super().__init__(client)
     meets_required_version = self.client.validate_looker_release_version(
         required_version=self.MIN_LOOKER_VERSION)
     if not meets_required_version:
         raise SpectaclesException(
             "SQL validation requires version "
             f"{self.MIN_LOOKER_VERSION} of Looker or higher.")
     self.project = Project(project, models=[])
     self.query_tasks: dict = {}
Exemple #7
0
    def validate(self) -> Dict[str, Any]:
        all_tests = self.client.all_lookml_tests(self.project.name)

        # Filter the list of tests to those that are selected
        selected_tests = []
        # The error objects don't contain the name of the explore
        # We create this mapping to help look up the explore from the test name
        test_to_explore = {}

        for test in all_tests:
            model_name = test["model_name"]
            explore_name = test["explore_name"]
            explore: Optional[Explore] = self.project.get_explore(
                model=model_name, name=explore_name)

            # Skip tests that are not associated with a selected explore
            if explore is None:
                continue

            selected_tests.append(test)
            test_to_explore[test["name"]] = explore

        if len(selected_tests) == 0:
            raise SpectaclesException(
                name="no-data-tests-found",
                title="No data tests found.",
                detail=
                ("If you're using --explores or --exclude, make sure your project "
                 "has data tests that reference those models or explores."),
            )

        for test in selected_tests:
            results = self.client.run_lookml_test(self.project.name,
                                                  model=test["model_name"],
                                                  test=test["name"])
            explore = test_to_explore[test["name"]]
            explore.queried = True
            result = results[0]  # For a single test, list with length 1

            for error in result["errors"]:
                project, file_path = error["file_path"].split("/", 1)
                lookml_url = (
                    f"{self.client.base_url}/projects/{self.project.name}"
                    f"/files/{file_path}?line={error['line_number']}")
                explore.errors.append(
                    DataTestError(
                        model=error["model_id"],
                        explore=error["explore"],
                        message=error["message"],
                        test_name=result["test_name"],
                        lookml_url=lookml_url,
                    ))

        return self.project.get_results(validator="data_test")
Exemple #8
0
 def _select(self, choices: Sequence[str],
             select_from: Sequence) -> Sequence:
     unique_choices = set(choices)
     select_from_names = set(each.name for each in select_from)
     difference = unique_choices.difference(select_from_names)
     if difference:
         raise SpectaclesException(
             f"{select_from[0].__class__.__name__}"
             f'{"" if len(difference) == 1 else "s"} ' +
             ", ".join(difference) +
             f" not found in LookML under project '{self.project.name}'")
     return [each for each in select_from if each.name in unique_choices]
Exemple #9
0
    def _get_query_results(
            self,
            query_task_ids: List[str]) -> Tuple[List[str], List[SqlError]]:
        results = self.client.get_query_task_multi_results(query_task_ids)
        still_running = []
        errors = []

        for query_task_id, query_result in results.items():
            query_status = query_result["status"]
            logger.debug("Query task %s status is %s", query_task_id,
                         query_status)

            if query_status in ("running", "added", "expired"):
                still_running.append(query_task_id)
                continue
            elif query_status in ("complete", "error"):
                lookml_object = self.query_tasks[query_task_id]
                lookml_object.queried = True
            else:
                raise SpectaclesException(
                    f'Unexpected query result status "{query_status}" '
                    "returned by the Looker API")

            if query_status == "error":
                try:
                    details = self._extract_error_details(query_result)
                except (KeyError, TypeError, IndexError) as error:
                    raise SpectaclesException(
                        "Encountered an unexpected API query result format, "
                        "unable to extract error details. "
                        f"The query result was: {query_result}") from error
                sql_error = SqlError(
                    path=lookml_object.name,
                    url=getattr(lookml_object, "url", None),
                    **details,
                )
                lookml_object.error = sql_error
                errors.append(sql_error)

        return still_running, errors
Exemple #10
0
    def __init__(
        self,
        base_url: str,
        client_id: str,
        client_secret: str,
        port: Optional[int] = None,
        api_version: float = 3.1,
    ):
        supported_api_versions = [3.1]
        if api_version not in supported_api_versions:
            raise SpectaclesException(
                name="unsupported-api-version",
                title="Specified API version is not supported.",
                detail=
                (f"Version '{api_version}' is not supported. "
                 "Please use one of these supported versions instead: "
                 f"{', '.join(str(ver) for ver in sorted(supported_api_versions))}"
                 ),
            )

        self.base_url: str = base_url.rstrip("/")
        if port is None and self.base_url.endswith("cloud.looker.com"):
            # GCP-hosted instance, so use default port of 443 with HTTPS
            if not self.base_url.startswith("https"):
                raise SpectaclesException(
                    name="invalid-base-url",
                    title="Looker instance base URL is not valid.",
                    detail="The URL must be an HTTPS URL.",
                )
            self.api_url: str = f"{self.base_url}/api/{api_version}/"
        else:
            self.api_url = f"{self.base_url}:{port or 19999}/api/{api_version}/"
        self.client_id: str = client_id
        self.client_secret: str = client_secret
        self.api_version: float = api_version
        self.access_token: Optional[AccessToken] = None
        self.session: requests.Session = requests.Session()

        self.authenticate()
Exemple #11
0
    async def _query(self, mode: str = "batch") -> List[SqlError]:
        session = aiohttp.ClientSession(headers=self.client.session.headers,
                                        timeout=self.timeout)

        query_tasks = []
        for model in self.project.models:
            for explore in model.explores:
                if mode == "batch" or (mode == "hybrid"
                                       and not explore.queried):
                    task = asyncio.create_task(
                        self._query_explore(session, model, explore))
                    query_tasks.append(task)
                elif mode == "single" or (mode == "hybrid"
                                          and explore.errored):
                    for dimension in explore.dimensions:
                        task = asyncio.create_task(
                            self._query_dimension(session, model, explore,
                                                  dimension))
                        query_tasks.append(task)

        queries = asyncio.gather(*query_tasks)
        query_results = asyncio.create_task(
            self._check_for_results(session, query_tasks))
        try:
            results = await asyncio.gather(queries, query_results)
        except asyncio.CancelledError:
            query_task_ids = []
            while not self.running_query_tasks.empty():
                query_task_ids.append(await self.running_query_tasks.get())
            cancel_query_tasks = []
            for query_task_id in query_task_ids:
                task = asyncio.create_task(
                    self.client.cancel_query_task(session, query_task_id))
                cancel_query_tasks.append(task)

            await asyncio.gather(*cancel_query_tasks)

            message = "Spectacles was manually interrupted. "
            if query_task_ids:
                message += (
                    "Spectacles attempted to cancel "
                    f"{len(query_task_ids)} running "
                    f"{'query' if len(query_task_ids) == 1 else 'queries'}.")
            else:
                message += "No queries were running at the time."
            raise SpectaclesException(message)
        else:
            errors = results[1]  # Ignore the results from creating the queries
            return errors
        finally:
            await session.close()
Exemple #12
0
def selector_to_pattern(selector: str) -> str:
    try:
        model, explore = selector.split("/")
        assert model
        assert explore
    except (ValueError, AssertionError):
        raise SpectaclesException(
            name="invalid-selector-format",
            title="Specified explore selector is invalid.",
            detail=(f"'{selector}' is not a valid format. "
                    "Instead, use the format 'model_name/explore_name'. "
                    f"Use 'model_name/*' to select all explores in a model."),
        )
    return f"^{selector.replace('*', '.+?')}$"
Exemple #13
0
 def __init__(self, env_var, required=False, default=False, **kwargs):
     self.env_var = env_var
     if env_var in os.environ:
         value = os.environ[env_var].lower()
         if value not in ("true", "false"):
             raise SpectaclesException(
                 name="invalid-env-var-value",
                 title="Invalid value for environment variable.",
                 detail=(
                     f"Allowed values for {env_var} are 'true' or 'false' "
                     f"(case-insensitive), received '{value}'"),
             )
         default = True if value == "true" else False
     if required and default:
         required = False
     super().__init__(default=default, required=required, **kwargs)
Exemple #14
0
    def __init__(
        self,
        base_url: str,
        client_id: str,
        client_secret: str,
        port: int = 19999,
        api_version: float = 3.1,
    ):
        supported_api_versions = [3.1]
        if api_version not in supported_api_versions:
            raise SpectaclesException(
                f"API version {api_version} is not supported. "
                "Please use one of these supported versions instead: "
                f"{', '.join(str(ver) for ver in sorted(supported_api_versions))}"
            )

        self.base_url: str = base_url.rstrip("/")
        self.api_url: str = f"{self.base_url}:{port}/api/{api_version}/"
        self.session: requests.Session = requests.Session()

        self.authenticate(client_id, client_secret, api_version)
Exemple #15
0
    def validate(
        self,
        selectors: Optional[List[str]] = None,
        exclusions: Optional[List[str]] = None,
    ) -> Dict[str, Any]:
        # Assign default values for selectors and exclusions
        if selectors is None:
            selectors = ["*/*"]
        if exclusions is None:
            exclusions = []

        all_tests = self.client.all_lookml_tests(self.project)
        selected_tests = []
        test_to_explore = {}
        for test in all_tests:
            if is_selected(test["model_name"], test["explore_name"], selectors,
                           exclusions):
                selected_tests.append(test)
                # The error objects don't contain the name of the explore
                # We create this mapping to help look up the explore from the test name
                test_to_explore[test["name"]] = test["explore_name"]

        test_count = len(selected_tests)
        if test_count == 0:
            raise SpectaclesException(
                name="no-data-tests-found",
                title="No data tests found.",
                detail=
                ("If you're using --explores or --exclude, make sure your project "
                 "has data tests that reference those models or explores."),
            )

        printer.print_header(
            f"Running {test_count} {'test' if test_count == 1 else 'tests'}")

        test_results: List[Dict[str, Any]] = []
        for test in selected_tests:
            test_name = test["name"]
            model_name = test["model_name"]
            results = self.client.run_lookml_test(self.project,
                                                  model=model_name,
                                                  test=test_name)
            test_results.extend(results)

        tested = []
        errors = []

        for result in test_results:
            explore = test_to_explore[result["test_name"]]
            test = {
                "model": result["model_name"],
                "explore": explore,
                "passed": result["success"],
            }
            tested.append(test)
            if not result["success"]:
                for error in result["errors"]:
                    project, file_path = error["file_path"].split("/", 1)
                    lookml_url = (
                        f"{self.client.base_url}/projects/{self.project}"
                        f"/files/{file_path}?line={error['line_number']}")
                    errors.append(
                        DataTestError(
                            model=error["model_id"],
                            explore=error["explore"],
                            message=error["message"],
                            test_name=result["test_name"],
                            lookml_url=lookml_url,
                        ).__dict__)

        def reduce_result(results):
            """Aggregate individual test results to get pass/fail by explore"""
            agg = OrderedDict()
            for result in results:
                # Keys by model and explore, adds additional values for `passed` to a set
                agg.setdefault((result["model"], result["explore"]),
                               set()).add(result["passed"])
            reduced = [{
                "model": k[0],
                "explore": k[1],
                "passed": min(v)
            } for k, v in agg.items()]
            return reduced

        tested = reduce_result(tested)
        for test in tested:
            printer.print_validation_result(
                passed=test["passed"],
                source=f"{test['model']}.{test['explore']}")

        passed = min((test["passed"] for test in tested), default=True)
        return {
            "validator": "data_test",
            "status": "passed" if passed else "failed",
            "tested": tested,
            "errors": errors,
        }
Exemple #16
0
def main():
    """Runs main function. This is the entry point."""
    if sys.version_info < (3, 7):
        raise SpectaclesException(
            name="insufficient-python-version",
            title="Spectacles requires Python 3.7 or higher.",
            detail="The current Python version is %s." %
            platform.python_version(),
        )
    parser = create_parser()
    args = parser.parse_args()
    for handler in logger.handlers:
        handler.setLevel(args.log_level)

    set_file_handler(args.log_dir)

    if not args.do_not_track:
        invocation_id = tracking.track_invocation_start(
            args.base_url,
            args.command,
            project=args.project if args.command != "connect" else None,
        )

    if args.command == "connect":
        run_connect(
            args.base_url,
            args.client_id,
            args.client_secret,
            args.port,
            args.api_version,
        )
    elif args.command == "sql":
        run_sql(
            args.log_dir,
            args.project,
            args.branch,
            args.explores,
            args.exclude,
            args.base_url,
            args.client_id,
            args.client_secret,
            args.port,
            args.api_version,
            args.mode,
            args.remote_reset,
            args.import_projects,
            args.concurrency,
            args.commit_ref,
        )
    elif args.command == "assert":
        run_assert(
            args.project,
            args.branch,
            args.explores,
            args.exclude,
            args.base_url,
            args.client_id,
            args.client_secret,
            args.port,
            args.api_version,
            args.remote_reset,
            args.import_projects,
            args.commit_ref,
        )
    elif args.command == "content":
        run_content(
            args.project,
            args.branch,
            args.explores,
            args.exclude,
            args.base_url,
            args.client_id,
            args.client_secret,
            args.port,
            args.api_version,
            args.remote_reset,
            args.import_projects,
            args.commit_ref,
            args.incremental,
            args.exclude_personal,
        )

    if not args.do_not_track:
        tracking.track_invocation_end(
            args.base_url,
            args.command,
            invocation_id,
            args.project if args.command != "connect" else None,
        )
Exemple #17
0
    async def _get_query_results(
            self, session: aiohttp.ClientSession) -> List[SqlError]:
        logger.debug("%d queries running", self.running_query_tasks.qsize())
        try:
            # Empty the queue (up to 250) to get all running query tasks
            query_task_ids: List[str] = []
            while not self.running_query_tasks.empty() and len(
                    query_task_ids) <= 250:
                query_task_ids.append(await self.running_query_tasks.get())

            logger.debug("Getting results for %d query tasks",
                         len(query_task_ids))
            results = await self.client.get_query_task_multi_results(
                session, query_task_ids)
            pending_task_ids = []
            errors = []

            for query_task_id, query_result in results.items():
                query_status = query_result["status"]
                logger.debug("Query task %s status is %s", query_task_id,
                             query_status)
                if query_status in ("running", "added", "expired"):
                    pending_task_ids.append(query_task_id)
                    # Put the running query tasks back in the queue
                    await self.running_query_tasks.put(query_task_id)
                    query_task_ids.remove(query_task_id)
                    continue
                elif query_status in ("complete", "error"):
                    query_task_ids.remove(query_task_id)
                    # We can release a query slot for each completed query
                    self.query_slots.release()
                    lookml_object = self.query_tasks[query_task_id]
                    lookml_object.queried = True

                    if query_status == "error":
                        try:
                            details = self._extract_error_details(query_result)
                        except (KeyError, TypeError, IndexError) as error:
                            raise SpectaclesException(
                                "Encountered an unexpected API query result format, "
                                "unable to extract error details. "
                                f"The query result was: {query_result}"
                            ) from error
                        sql_error = SqlError(
                            path=lookml_object.name,
                            url=getattr(lookml_object, "url", None),
                            **details,
                        )
                        lookml_object.error = sql_error
                        errors.append(sql_error)
                else:
                    raise SpectaclesException(
                        f'Unexpected query result status "{query_status}" '
                        "returned by the Looker API")
        except asyncio.CancelledError:
            logger.debug(
                "Cancelled result fetching, putting "
                f"{self.running_query_tasks.qsize()} query task IDs back in the queue"
            )
            for query_task_id in query_task_ids:
                await self.running_query_tasks.put(query_task_id)
            logger.debug("Restored query task IDs to queue")
            raise

        return errors