Exemple #1
0
    def _build_dataset_mce(self,
                           looker_view: LookerView) -> MetadataChangeEvent:
        """
        Creates MetadataChangeEvent for the dataset, creating upstream lineage links
        """
        logger.debug(f"looker_view = {looker_view.id}")

        dataset_snapshot = DatasetSnapshot(
            urn=looker_view.id.get_urn(self.source_config),
            aspects=[],  # we append to this list later on
        )
        browse_paths = BrowsePaths(
            paths=[looker_view.id.get_browse_path(self.source_config)])
        dataset_snapshot.aspects.append(browse_paths)
        dataset_snapshot.aspects.append(Status(removed=False))
        upstream_lineage = self._get_upstream_lineage(looker_view)
        if upstream_lineage is not None:
            dataset_snapshot.aspects.append(upstream_lineage)
        schema_metadata = LookerUtil._get_schema(
            self.source_config.platform_name,
            looker_view.id.view_name,
            looker_view.fields,
            self.reporter,
        )
        if schema_metadata is not None:
            dataset_snapshot.aspects.append(schema_metadata)
        dataset_snapshot.aspects.append(
            self._get_custom_properties(looker_view))

        mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot)

        return mce
Exemple #2
0
    def get_workunits(self) -> Iterable[MetadataWorkUnit]:
        dashboards = self.client.all_dashboards(fields="id")
        deleted_dashboards = (
            self.client.search_dashboards(fields="id", deleted="true")
            if self.source_config.include_deleted
            else []
        )
        if deleted_dashboards != []:
            logger.debug(f"Deleted Dashboards = {deleted_dashboards}")

        dashboard_ids = [dashboard_base.id for dashboard_base in dashboards]
        dashboard_ids.extend(
            [deleted_dashboard.id for deleted_dashboard in deleted_dashboards]
        )

        for dashboard_id in dashboard_ids:
            assert dashboard_id is not None
            self.reporter.report_dashboards_scanned()
            if not self.source_config.dashboard_pattern.allowed(dashboard_id):
                self.reporter.report_dashboards_dropped(dashboard_id)
                continue
            try:
                fields = [
                    "id",
                    "title",
                    "dashboard_elements",
                    "dashboard_filters",
                    "deleted",
                    "description",
                    "folder",
                    "user_id",
                ]
                dashboard_object = self.client.dashboard(
                    dashboard_id=dashboard_id, fields=",".join(fields)
                )
            except SDKError:
                # A looker dashboard could be deleted in between the list and the get
                self.reporter.report_warning(
                    dashboard_id,
                    f"Error occurred while loading dashboard {dashboard_id}. Skipping.",
                )
                continue

            if self.source_config.skip_personal_folders:
                if dashboard_object.folder is not None and (
                    dashboard_object.folder.is_personal
                    or dashboard_object.folder.is_personal_descendant
                ):
                    self.reporter.report_warning(
                        dashboard_id, "Dropped due to being a personal folder"
                    )
                    self.reporter.report_dashboards_dropped(dashboard_id)
                    continue

            looker_dashboard = self._get_looker_dashboard(dashboard_object, self.client)
            mces = self._make_dashboard_and_chart_mces(looker_dashboard)
            for mce in mces:
                workunit = MetadataWorkUnit(
                    id=f"looker-{mce.proposedSnapshot.urn}", mce=mce
                )
                self.reporter.report_workunit(workunit)
                yield workunit

        if (
            self.source_config.extract_owners
            and self.resolved_user_ids > 0
            and self.email_ids_missing == self.resolved_user_ids
        ):
            # Looks like we tried to extract owners and could not find their email addresses. This is likely a permissions issue
            self.reporter.report_warning(
                "api",
                "Failed to extract owners emails for any dashboards. Please enable the see_users permission for your Looker API key",
            )

        explore_events = self._make_explore_metadata_events()
        for event in explore_events:
            if isinstance(event, MetadataChangeEvent):
                workunit = MetadataWorkUnit(
                    id=f"looker-{event.proposedSnapshot.urn}", mce=event
                )
            elif isinstance(event, MetadataChangeProposalWrapper):
                # We want to treat subtype aspects as optional, so allowing failures in this aspect to be treated as warnings rather than failures
                workunit = MetadataWorkUnit(
                    id=f"looker-{event.entityUrn}-{event.aspectName}",
                    mcp=event,
                    treat_errors_as_warnings=True
                    if event.aspectName in ["subTypes"]
                    else False,
                )
            else:
                raise Exception("Unexpected type of event {}".format(event))

            self.reporter.report_workunit(workunit)
            yield workunit

        if self.source_config.tag_measures_and_dimensions and explore_events != []:
            # Emit tag MCEs for measures and dimensions if we produced any explores:
            for tag_mce in LookerUtil.get_tag_mces():
                workunit = MetadataWorkUnit(
                    id=f"tag-{tag_mce.proposedSnapshot.urn}",
                    mce=tag_mce,
                )
                self.reporter.report_workunit(workunit)
                yield workunit
Exemple #3
0
    def get_workunits(self) -> Iterable[MetadataWorkUnit]:  # noqa: C901
        viewfile_loader = LookerViewFileLoader(
            str(self.source_config.base_folder), self.reporter)

        # some views can be mentioned by multiple 'include' statements, so this set is used to prevent
        # creating duplicate MCE messages
        processed_view_files: Set[str] = set()

        # The ** means "this directory and all subdirectories", and hence should
        # include all the files we want.
        model_files = sorted(
            self.source_config.base_folder.glob("**/*.model.lkml"))
        model_suffix_len = len(".model")

        for file_path in model_files:
            self.reporter.report_models_scanned()
            model_name = file_path.stem[0:-model_suffix_len]

            if not self.source_config.model_pattern.allowed(model_name):
                self.reporter.report_models_dropped(model_name)
                continue
            try:
                logger.debug(f"Attempting to load model: {file_path}")
                model = self._load_model(str(file_path))
            except Exception as e:
                self.reporter.report_warning(
                    model_name,
                    f"unable to load Looker model at {file_path}: {repr(e)}")
                continue

            assert model.connection is not None
            connectionDefinition = self._get_connection_def_based_on_connection_string(
                model.connection)

            if connectionDefinition is None:
                self.reporter.report_warning(
                    f"model-{model_name}",
                    f"Failed to load connection {model.connection}. Check your API key permissions.",
                )
                self.reporter.report_models_dropped(model_name)
                continue

            project_name = self.get_project_name(model_name)

            for include in model.resolved_includes:
                logger.debug(f"Considering {include} for model {model_name}")
                if include in processed_view_files:
                    logger.debug(
                        f"view '{include}' already processed, skipping it")
                    continue

                logger.debug(f"Attempting to load view file: {include}")
                looker_viewfile = viewfile_loader.load_viewfile(
                    include, connectionDefinition, self.reporter)
                if looker_viewfile is not None:
                    for raw_view in looker_viewfile.views:
                        self.reporter.report_views_scanned()
                        try:
                            maybe_looker_view = LookerView.from_looker_dict(
                                project_name,
                                model_name,
                                raw_view,
                                connectionDefinition,
                                looker_viewfile,
                                viewfile_loader,
                                self.reporter,
                                self.source_config.parse_table_names_from_sql,
                                self.source_config.sql_parser,
                            )
                        except Exception as e:
                            self.reporter.report_warning(
                                include,
                                f"unable to load Looker view {raw_view}: {repr(e)}",
                            )
                            continue
                        if maybe_looker_view:
                            if self.source_config.view_pattern.allowed(
                                    maybe_looker_view.id.view_name):
                                mce = self._build_dataset_mce(
                                    maybe_looker_view)
                                workunit = MetadataWorkUnit(
                                    id=f"lookml-view-{maybe_looker_view.id}",
                                    mce=mce,
                                )
                                self.reporter.report_workunit(workunit)
                                processed_view_files.add(include)
                                yield workunit

                                for mcp in self._build_dataset_mcps(
                                        maybe_looker_view):
                                    # We want to treat mcp aspects as optional, so allowing failures in this aspect to be treated as warnings rather than failures
                                    workunit = MetadataWorkUnit(
                                        id=
                                        f"lookml-view-{mcp.aspectName}-{maybe_looker_view.id}",
                                        mcp=mcp,
                                        treat_errors_as_warnings=True,
                                    )
                                    self.reporter.report_workunit(workunit)
                                    yield workunit
                            else:
                                self.reporter.report_views_dropped(
                                    str(maybe_looker_view.id))

        if (self.source_config.tag_measures_and_dimensions
                and self.reporter.workunits_produced != 0):
            # Emit tag MCEs for measures and dimensions:
            for tag_mce in LookerUtil.get_tag_mces():
                workunit = MetadataWorkUnit(
                    id=f"tag-{tag_mce.proposedSnapshot.urn}", mce=tag_mce)
                self.reporter.report_workunit(workunit)
                yield workunit
Exemple #4
0
    def get_workunits(self) -> Iterable[MetadataWorkUnit]:
        dashboards = self.client.all_dashboards(fields="id")
        deleted_dashboards = (self.client.search_dashboards(fields="id",
                                                            deleted="true")
                              if self.source_config.include_deleted else [])
        if deleted_dashboards != []:
            logger.debug(f"Deleted Dashboards = {deleted_dashboards}")

        dashboard_ids = [dashboard_base.id for dashboard_base in dashboards]
        dashboard_ids.extend(
            [deleted_dashboard.id for deleted_dashboard in deleted_dashboards])

        with concurrent.futures.ThreadPoolExecutor(
                max_workers=self.source_config.max_threads) as async_executor:
            async_workunits = [
                async_executor.submit(self.process_dashboard, dashboard_id)
                for dashboard_id in dashboard_ids
            ]
            for async_workunit in concurrent.futures.as_completed(
                    async_workunits):
                work_units, dashboard_id, start_time, end_time = async_workunit.result(
                )
                logger.info(
                    f"Running time of process_dashboard for {dashboard_id} = {(end_time-start_time).total_seconds()}"
                )
                self.reporter.report_upstream_latency(start_time, end_time)
                for mwu in work_units:
                    yield mwu
                    self.reporter.report_workunit(mwu)

        if (self.source_config.extract_owners and self.resolved_user_ids > 0
                and self.email_ids_missing == self.resolved_user_ids):
            # Looks like we tried to extract owners and could not find their email addresses. This is likely a permissions issue
            self.reporter.report_warning(
                "api",
                "Failed to extract owners emails for any dashboards. Please enable the see_users permission for your Looker API key",
            )

        explore_events = self._make_explore_metadata_events()
        for event in explore_events:
            if isinstance(event, MetadataChangeEvent):
                workunit = MetadataWorkUnit(
                    id=f"looker-{event.proposedSnapshot.urn}", mce=event)
            elif isinstance(event, MetadataChangeProposalWrapper):
                # We want to treat subtype aspects as optional, so allowing failures in this aspect to be treated as warnings rather than failures
                workunit = MetadataWorkUnit(
                    id=f"looker-{event.entityUrn}-{event.aspectName}",
                    mcp=event,
                    treat_errors_as_warnings=True
                    if event.aspectName in ["subTypes"] else False,
                )
            else:
                raise Exception("Unexpected type of event {}".format(event))

            self.reporter.report_workunit(workunit)
            yield workunit

        if self.source_config.tag_measures_and_dimensions and explore_events != []:
            # Emit tag MCEs for measures and dimensions if we produced any explores:
            for tag_mce in LookerUtil.get_tag_mces():
                workunit = MetadataWorkUnit(
                    id=f"tag-{tag_mce.proposedSnapshot.urn}",
                    mce=tag_mce,
                )
                self.reporter.report_workunit(workunit)
                yield workunit