コード例 #1
0
    def _make_chart_mce(
        self, dashboard_element: LookerDashboardElement, dashboard: LookerDashboard
    ) -> MetadataChangeEvent:
        chart_urn = builder.make_chart_urn(
            self.source_config.platform_name, dashboard_element.get_urn_element_id()
        )
        chart_snapshot = ChartSnapshot(
            urn=chart_urn,
            aspects=[],
        )

        chart_type = self._get_chart_type(dashboard_element)

        chart_info = ChartInfoClass(
            type=chart_type,
            description=dashboard_element.description or "",
            title=dashboard_element.title or "",
            lastModified=ChangeAuditStamps(),
            chartUrl=dashboard_element.url(self.source_config.base_url),
            inputs=dashboard_element.get_view_urns(self.source_config),
            customProperties={
                "upstream_fields": ",".join(
                    sorted(set(dashboard_element.upstream_fields))
                )
                if dashboard_element.upstream_fields
                else ""
            },
        )
        chart_snapshot.aspects.append(chart_info)

        ownership = self.get_ownership(dashboard)
        if ownership is not None:
            chart_snapshot.aspects.append(ownership)

        return MetadataChangeEvent(proposedSnapshot=chart_snapshot)
コード例 #2
0
ファイル: looker.py プロジェクト: frsann/datahub
    def _make_chart_mce(
            self,
            dashboard_element: LookerDashboardElement) -> MetadataChangeEvent:
        chart_urn = builder.make_chart_urn(
            self.source_config.platform_name,
            dashboard_element.get_urn_element_id())
        chart_snapshot = ChartSnapshot(
            urn=chart_urn,
            aspects=[],
        )

        chart_type = self._get_chart_type(dashboard_element)

        chart_info = ChartInfoClass(
            type=chart_type,
            description=dashboard_element.description or "",
            title=dashboard_element.title or "",
            lastModified=ChangeAuditStamps(),
            chartUrl=dashboard_element.url(self.source_config.base_url),
            inputs=dashboard_element.get_view_urns(
                self.source_config.platform_name, self.source_config.env),
        )
        chart_snapshot.aspects.append(chart_info)

        return MetadataChangeEvent(proposedSnapshot=chart_snapshot)
コード例 #3
0
ファイル: metabase.py プロジェクト: hsheth2/datahub
    def construct_dashboard_from_api_data(
            self, dashboard_info: dict) -> Optional[DashboardSnapshot]:

        dashboard_id = dashboard_info.get("id", "")
        dashboard_url = f"{self.config.connect_uri}/api/dashboard/{dashboard_id}"
        try:
            dashboard_response = self.session.get(dashboard_url)
            dashboard_response.raise_for_status()
            dashboard_details = dashboard_response.json()
        except HTTPError as http_error:
            self.report.report_failure(
                key=f"metabase-dashboard-{dashboard_id}",
                reason=f"Unable to retrieve dashboard. "
                f"Reason: {str(http_error)}",
            )
            return None

        dashboard_urn = builder.make_dashboard_urn(
            self.platform, dashboard_details.get("id", ""))
        dashboard_snapshot = DashboardSnapshot(
            urn=dashboard_urn,
            aspects=[],
        )
        last_edit_by = dashboard_details.get("last-edit-info") or {}
        modified_actor = builder.make_user_urn(
            last_edit_by.get("email", "unknown"))
        modified_ts = self.get_timestamp_millis_from_ts_string(
            f"{last_edit_by.get('timestamp')}")
        title = dashboard_details.get("name", "") or ""
        description = dashboard_details.get("description", "") or ""
        last_modified = ChangeAuditStamps(
            created=AuditStamp(time=modified_ts, actor=modified_actor),
            lastModified=AuditStamp(time=modified_ts, actor=modified_actor),
        )

        chart_urns = []
        cards_data = dashboard_details.get("ordered_cards", "{}")
        for card_info in cards_data:
            chart_urn = builder.make_chart_urn(self.platform,
                                               card_info.get("id", ""))
            chart_urns.append(chart_urn)

        dashboard_info_class = DashboardInfoClass(
            description=description,
            title=title,
            charts=chart_urns,
            lastModified=last_modified,
            dashboardUrl=f"{self.config.connect_uri}/dashboard/{dashboard_id}",
            customProperties={},
        )
        dashboard_snapshot.aspects.append(dashboard_info_class)

        # Ownership
        ownership = self._get_ownership(dashboard_details.get(
            "creator_id", ""))
        if ownership is not None:
            dashboard_snapshot.aspects.append(ownership)

        return dashboard_snapshot
コード例 #4
0
    def emit_dashboards(self, workbook: Dict) -> Iterable[MetadataWorkUnit]:
        for dashboard in workbook.get("dashboards", []):
            dashboard_snapshot = DashboardSnapshot(
                urn=builder.make_dashboard_urn(self.platform, dashboard["id"]),
                aspects=[],
            )

            creator = workbook.get("owner", {}).get("username", "")
            created_at = dashboard.get("createdAt", datetime.now())
            updated_at = dashboard.get("updatedAt", datetime.now())
            last_modified = self.get_last_modified(creator, created_at, updated_at)

            site_part = f"/site/{self.config.site}" if self.config.site else ""
            dashboard_external_url = f"{self.config.connect_uri}/#{site_part}/views/{dashboard.get('path', '')}"
            title = (
                dashboard["name"].replace("/", REPLACE_SLASH_CHAR)
                if dashboard.get("name")
                else ""
            )
            chart_urns = [
                builder.make_chart_urn(self.platform, sheet.get("id"))
                for sheet in dashboard.get("sheets", [])
            ]
            dashboard_info_class = DashboardInfoClass(
                description="",
                title=title,
                charts=chart_urns,
                lastModified=last_modified,
                dashboardUrl=dashboard_external_url,
                customProperties={},
            )
            dashboard_snapshot.aspects.append(dashboard_info_class)

            if workbook.get("projectName") and workbook.get("name"):
                dashboard_name = title if title else dashboard["id"]
                # browse path
                browse_paths = BrowsePathsClass(
                    paths=[
                        f"/{self.platform}/{workbook['projectName'].replace('/', REPLACE_SLASH_CHAR)}"
                        f"/{workbook['name'].replace('/', REPLACE_SLASH_CHAR)}"
                        f"/{dashboard_name}"
                    ]
                )
                dashboard_snapshot.aspects.append(browse_paths)
            else:
                logger.debug(f"Browse path not set for dashboard {dashboard['id']}")

            # Ownership
            owner = self._get_ownership(creator)
            if owner is not None:
                dashboard_snapshot.aspects.append(owner)

            yield self.get_metadata_change_event(dashboard_snapshot)

            yield from add_entity_to_container(
                self.gen_workbook_key(workbook), "dashboard", dashboard_snapshot.urn
            )
コード例 #5
0
ファイル: mode.py プロジェクト: taufiqibrahim/datahub
    def _get_chart_urns(self, report_token: str) -> list:
        chart_urns = []
        queries = self._get_queries(report_token)
        for query in queries:
            charts = self._get_charts(report_token, query.get("token", ""))
            # build chart urns
            for chart in charts:
                chart_urn = builder.make_chart_urn(self.platform,
                                                   chart.get("token", ""))
                chart_urns.append(chart_urn)

        return chart_urns
コード例 #6
0
ファイル: metabase.py プロジェクト: hsheth2/datahub
    def construct_card_from_api_data(
            self, card_data: dict) -> Optional[ChartSnapshot]:
        card_id = card_data.get("id", "")
        card_url = f"{self.config.connect_uri}/api/card/{card_id}"
        try:
            card_response = self.session.get(card_url)
            card_response.raise_for_status()
            card_details = card_response.json()
        except HTTPError as http_error:
            self.report.report_failure(
                key=f"metabase-card-{card_id}",
                reason=f"Unable to retrieve Card info. "
                f"Reason: {str(http_error)}",
            )
            return None

        chart_urn = builder.make_chart_urn(self.platform, card_id)
        chart_snapshot = ChartSnapshot(
            urn=chart_urn,
            aspects=[],
        )

        last_edit_by = card_details.get("last-edit-info") or {}
        modified_actor = builder.make_user_urn(
            last_edit_by.get("email", "unknown"))
        modified_ts = self.get_timestamp_millis_from_ts_string(
            f"{last_edit_by.get('timestamp')}")
        last_modified = ChangeAuditStamps(
            created=AuditStamp(time=modified_ts, actor=modified_actor),
            lastModified=AuditStamp(time=modified_ts, actor=modified_actor),
        )

        chart_type = self._get_chart_type(card_details.get("id", ""),
                                          card_details.get("display"))
        description = card_details.get("description") or ""
        title = card_details.get("name") or ""
        datasource_urn = self.get_datasource_urn(card_details)
        custom_properties = self.construct_card_custom_properties(card_details)

        chart_info = ChartInfoClass(
            type=chart_type,
            description=description,
            title=title,
            lastModified=last_modified,
            chartUrl=f"{self.config.connect_uri}/card/{card_id}",
            inputs=datasource_urn,
            customProperties=custom_properties,
        )
        chart_snapshot.aspects.append(chart_info)

        if card_details.get("query_type", "") == "native":
            raw_query = (card_details.get("dataset_query",
                                          {}).get("native",
                                                  {}).get("query", ""))
            chart_query_native = ChartQueryClass(
                rawQuery=raw_query,
                type=ChartQueryTypeClass.SQL,
            )
            chart_snapshot.aspects.append(chart_query_native)

        # Ownership
        ownership = self._get_ownership(card_details.get("creator_id", ""))
        if ownership is not None:
            chart_snapshot.aspects.append(ownership)

        return chart_snapshot
コード例 #7
0
ファイル: mode.py プロジェクト: taufiqibrahim/datahub
    def construct_chart_from_api_data(self, chart_data: dict, query: dict,
                                      path: str) -> ChartSnapshot:
        chart_urn = builder.make_chart_urn(self.platform,
                                           chart_data.get("token", ""))
        chart_snapshot = ChartSnapshot(
            urn=chart_urn,
            aspects=[],
        )

        last_modified = ChangeAuditStamps()
        creator = self._get_creator(
            chart_data.get("_links", {}).get("creator", {}).get("href", ""))
        if creator is not None:
            modified_actor = builder.make_user_urn(creator)
            created_ts = int(
                dp.parse(chart_data.get("created_at", "now")).timestamp() *
                1000)
            modified_ts = int(
                dp.parse(chart_data.get("updated_at", "now")).timestamp() *
                1000)
            last_modified = ChangeAuditStamps(
                created=AuditStamp(time=created_ts, actor=modified_actor),
                lastModified=AuditStamp(time=modified_ts,
                                        actor=modified_actor),
            )

        chart_detail = (chart_data.get("view", {})
                        if len(chart_data.get("view", {})) != 0 else
                        chart_data.get("view_vegas", {}))

        mode_chart_type = chart_detail.get(
            "chartType", "") or chart_detail.get("selectedChart", "")
        chart_type = self._get_chart_type(chart_data.get("token", ""),
                                          mode_chart_type)
        description = (chart_detail.get("description")
                       or chart_detail.get("chartDescription") or "")
        title = chart_detail.get("title") or chart_detail.get(
            "chartTitle") or ""

        # create datasource urn
        platform, db_name = self._get_platform_and_dbname(
            query.get("data_source_id"))
        source_tables = self._get_source_from_query(query.get("raw_query"))
        datasource_urn = self._get_datasource_urn(platform, db_name,
                                                  source_tables)
        custom_properties = self.construct_chart_custom_properties(
            chart_detail, mode_chart_type)

        # Chart Info
        chart_info = ChartInfoClass(
            type=chart_type,
            description=description,
            title=title,
            lastModified=last_modified,
            chartUrl=f"{self.config.connect_uri}"
            f"{chart_data.get('_links', {}).get('report_viz_web', {}).get('href', '')}",
            inputs=datasource_urn,
            customProperties=custom_properties,
        )
        chart_snapshot.aspects.append(chart_info)

        # Browse Path
        browse_path = BrowsePathsClass(paths=[path])
        chart_snapshot.aspects.append(browse_path)

        # Query
        chart_query = ChartQueryClass(
            rawQuery=query.get("raw_query", ""),
            type=ChartQueryTypeClass.SQL,
        )
        chart_snapshot.aspects.append(chart_query)

        # Ownership
        ownership = self._get_ownership(
            self._get_creator(
                chart_data.get("_links", {}).get("creator",
                                                 {}).get("href", "")))
        if ownership is not None:
            chart_snapshot.aspects.append(ownership)

        return chart_snapshot
コード例 #8
0
    def emit_sheets_as_charts(self,
                              workbook: Dict) -> Iterable[MetadataWorkUnit]:
        sheet_upstream_datasources = self.get_sheetwise_upstream_datasources(
            workbook)
        for sheet in workbook.get("sheets", []):
            chart_snapshot = ChartSnapshot(
                urn=builder.make_chart_urn(self.platform, sheet.get("id")),
                aspects=[],
            )

            creator = workbook.get("owner", {}).get("username", "")
            created_at = sheet.get("createdAt", datetime.now())
            updated_at = sheet.get("updatedAt", datetime.now())
            last_modified = self.get_last_modified(creator, created_at,
                                                   updated_at)

            if sheet.get("path"):
                site_part = f"/site/{self.config.site}" if self.config.site else ""
                sheet_external_url = (
                    f"{self.config.connect_uri}/#{site_part}/views/{sheet.get('path')}"
                )
            elif sheet.get("containedInDashboards"):
                # sheet contained in dashboard
                site_part = f"/t/{self.config.site}" if self.config.site else ""
                dashboard_path = sheet.get("containedInDashboards")[0].get(
                    "path", "")
                sheet_external_url = f"{self.config.connect_uri}{site_part}/authoring/{dashboard_path}/{sheet.get('name', '')}"
            else:
                # hidden or viz-in-tooltip sheet
                sheet_external_url = None
            fields = {}
            for field in sheet.get("datasourceFields", ""):
                description = make_description_from_params(
                    get_field_value_in_sheet(field, "description"),
                    get_field_value_in_sheet(field, "formula"),
                )
                fields[get_field_value_in_sheet(field, "name")] = description

            # datasource urn
            datasource_urn = []
            data_sources = sheet_upstream_datasources.get(
                sheet.get("id"), set())

            for ds_id in data_sources:
                if ds_id is None or not ds_id:
                    continue
                ds_urn = builder.make_dataset_urn(self.platform, ds_id,
                                                  self.config.env)
                datasource_urn.append(ds_urn)
                if ds_id not in self.datasource_ids_being_used:
                    self.datasource_ids_being_used.append(ds_id)

            # Chart Info
            chart_info = ChartInfoClass(
                description="",
                title=sheet.get("name", ""),
                lastModified=last_modified,
                externalUrl=sheet_external_url,
                inputs=sorted(datasource_urn),
                customProperties=fields,
            )
            chart_snapshot.aspects.append(chart_info)

            # Browse path
            browse_path = BrowsePathsClass(paths=[
                f"/{self.platform}/{workbook.get('projectName', '').replace('/', REPLACE_SLASH_CHAR)}"
                f"/{workbook.get('name', '')}"
                f"/{sheet.get('name', '').replace('/', REPLACE_SLASH_CHAR)}"
            ])
            chart_snapshot.aspects.append(browse_path)

            # Ownership
            owner = self._get_ownership(creator)
            if owner is not None:
                chart_snapshot.aspects.append(owner)

            #  Tags
            tag_list = sheet.get("tags", [])
            if tag_list and self.config.ingest_tags:
                tag_list_str = [
                    t.get("name", "").upper() for t in tag_list
                    if t is not None
                ]
                chart_snapshot.aspects.append(
                    builder.make_global_tag_aspect_with_tag_list(tag_list_str))

            yield self.get_metadata_change_event(chart_snapshot)

            yield from add_entity_to_container(self.gen_workbook_key(workbook),
                                               "chart", chart_snapshot.urn)
コード例 #9
0
ファイル: powerbi.py プロジェクト: hsheth2/datahub
    def __to_datahub_chart(
        self, tile: PowerBiAPI.Tile,
        ds_mcps: List[MetadataChangeProposalWrapper]
    ) -> List[MetadataChangeProposalWrapper]:
        """
        Map PowerBi tile to datahub chart
        """
        LOGGER.info("Converting tile {}(id={}) to chart".format(
            tile.title, tile.id))
        # Create an URN for chart
        chart_urn = builder.make_chart_urn(self.__config.platform_name,
                                           tile.get_urn_part())

        LOGGER.info("{}={}".format(Constant.CHART_URN, chart_urn))

        ds_input: List[str] = self.to_urn_set(ds_mcps)

        def tile_custom_properties(tile: PowerBiAPI.Tile) -> dict:
            custom_properties = {
                "datasetId":
                tile.dataset.id if tile.dataset else "",
                "reportId":
                tile.report.id if tile.report else "",
                "datasetWebUrl":
                tile.dataset.webUrl if tile.dataset is not None else "",
                "createdFrom":
                tile.createdFrom.value,
            }

            return custom_properties

        # Create chartInfo mcp
        # Set chartUrl only if tile is created from Report
        chart_info_instance = ChartInfoClass(
            title=tile.title or "",
            description=tile.title or "",
            lastModified=ChangeAuditStamps(),
            inputs=ds_input,
            externalUrl=tile.report.webUrl if tile.report else None,
            customProperties={**tile_custom_properties(tile)},
        )

        info_mcp = self.new_mcp(
            entity_type=Constant.CHART,
            entity_urn=chart_urn,
            aspect_name=Constant.CHART_INFO,
            aspect=chart_info_instance,
        )

        # removed status mcp
        status_mcp = self.new_mcp(
            entity_type=Constant.CHART,
            entity_urn=chart_urn,
            aspect_name=Constant.STATUS,
            aspect=StatusClass(removed=False),
        )

        # ChartKey status
        chart_key_instance = ChartKeyClass(
            dashboardTool=self.__config.platform_name,
            chartId=Constant.CHART_ID.format(tile.id),
        )

        chartkey_mcp = self.new_mcp(
            entity_type=Constant.CHART,
            entity_urn=chart_urn,
            aspect_name=Constant.CHART_KEY,
            aspect=chart_key_instance,
        )

        return [info_mcp, status_mcp, chartkey_mcp]
コード例 #10
0
# Construct the ChartInfo aspect with the input_datasets lineage.
input_datasets: List[str] = [
    builder.make_dataset_urn(platform="hdfs", name="dataset1", env="PROD"),
    builder.make_dataset_urn(platform="hdfs", name="dataset2", env="PROD"),
]

last_modified = ChangeAuditStampsClass()

chart_info = ChartInfoClass(
    title="Baz Chart 1",
    description="Sample Baz chart",
    lastModified=last_modified,
    inputs=input_datasets,
)

# Construct a MetadataChangeProposalWrapper object with the ChartInfo aspect.
# NOTE: This will overwrite all of the existing chartInfo aspect information associated with this chart.
chart_info_mcp = MetadataChangeProposalWrapper(
    entityType="chart",
    changeType=ChangeTypeClass.UPSERT,
    entityUrn=builder.make_chart_urn(platform="looker", name="my_chart_1"),
    aspectName="chartInfo",
    aspect=chart_info,
)

# Create an emitter to the GMS REST API.
emitter = DatahubRestEmitter("http://localhost:8080")

# Emit metadata!
emitter.emit_mcp(chart_info_mcp)