Example #1
0
    def _make_chart_mce(
            self,
            dashboard_element: LookerDashboardElement) -> MetadataChangeEvent:
        actor = self.source_config.actor
        sys_time = get_sys_time()
        chart_urn = f"urn:li:chart:({self.source_config.platform_name},{dashboard_element.get_urn_element_id()})"
        chart_snapshot = ChartSnapshot(
            urn=chart_urn,
            aspects=[],
        )

        last_modified = ChangeAuditStamps(
            created=AuditStamp(time=sys_time, actor=actor),
            lastModified=AuditStamp(time=sys_time, actor=actor),
        )

        chart_type = self._get_chart_type(dashboard_element)

        chart_info = ChartInfoClass(
            type=chart_type,
            description=dashboard_element.description
            if dashboard_element.description is not None else "",
            title=dashboard_element.title
            if dashboard_element.title is not None else "",
            lastModified=last_modified,
            chartUrl=dashboard_element.url(self.source_config.base_url),
            inputs=dashboard_element.get_view_urns(
                self.source_config.platform_name),
        )
        chart_snapshot.aspects.append(chart_info)

        return MetadataChangeEvent(proposedSnapshot=chart_snapshot)
def test_get_unknown_viz_chart_snapshot(mocked_data_source):
    """
    Testing with unmapped visualization type SANKEY
    """
    mocked_data_source.return_value = mock_mysql_data_source_response
    expected = ChartSnapshot(
        urn="urn:li:chart:(redash,9)",
        aspects=[
            ChartInfoClass(
                customProperties={},
                externalUrl=None,
                title="My Query Sankey",
                description="",
                lastModified=ChangeAuditStamps(
                    created=AuditStamp(time=1628882009571,
                                       actor="urn:li:corpuser:unknown"),
                    lastModified=AuditStamp(time=1628882009571,
                                            actor="urn:li:corpuser:unknown"),
                ),
                chartUrl="http://localhost:5000/queries/4#9",
                inputs=[
                    "urn:li:dataset:(urn:li:dataPlatform:mysql,Rfam,PROD)"
                ],
                type="TABLE",
            )
        ],
    )
    viz_data = mock_chart_response.get("visualizations", [])[1]
    result = redash_source()._get_chart_snapshot(mock_chart_response, viz_data)
    assert result == expected
def test_get_chart_snapshot_parse_table_names_from_sql(mocked_data_source):
    mocked_data_source.return_value = mock_mysql_data_source_response
    expected = ChartSnapshot(
        urn="urn:li:chart:(redash,10)",
        aspects=[
            ChartInfoClass(
                customProperties={},
                externalUrl=None,
                title="My Query Chart",
                description="",
                lastModified=ChangeAuditStamps(
                    created=AuditStamp(time=1628882022544,
                                       actor="urn:li:corpuser:unknown"),
                    lastModified=AuditStamp(time=1628882022544,
                                            actor="urn:li:corpuser:unknown"),
                ),
                chartUrl="http://localhost:5000/queries/4#10",
                inputs=[
                    "urn:li:dataset:(urn:li:dataPlatform:mysql,Rfam.order_items,PROD)",
                    "urn:li:dataset:(urn:li:dataPlatform:mysql,Rfam.orders,PROD)",
                    "urn:li:dataset:(urn:li:dataPlatform:mysql,Rfam.staffs,PROD)",
                ],
                type="PIE",
            )
        ],
    )
    viz_data = mock_chart_response.get("visualizations", [])[2]
    result = redash_source_parse_table_names_from_sql()._get_chart_snapshot(
        mock_chart_response, viz_data)

    assert result == expected
Example #4
0
    def _make_chart_mce(
        self, dashboard_element: LookerDashboardElement, dashboard: LookerDashboard
    ) -> MetadataChangeEvent:
        chart_urn = builder.make_chart_urn(
            self.source_config.platform_name, dashboard_element.get_urn_element_id()
        )
        chart_snapshot = ChartSnapshot(
            urn=chart_urn,
            aspects=[],
        )

        chart_type = self._get_chart_type(dashboard_element)

        chart_info = ChartInfoClass(
            type=chart_type,
            description=dashboard_element.description or "",
            title=dashboard_element.title or "",
            lastModified=ChangeAuditStamps(),
            chartUrl=dashboard_element.url(self.source_config.base_url),
            inputs=dashboard_element.get_view_urns(self.source_config),
            customProperties={
                "upstream_fields": ",".join(
                    sorted(set(dashboard_element.upstream_fields))
                )
                if dashboard_element.upstream_fields
                else ""
            },
        )
        chart_snapshot.aspects.append(chart_info)

        ownership = self.get_ownership(dashboard)
        if ownership is not None:
            chart_snapshot.aspects.append(ownership)

        return MetadataChangeEvent(proposedSnapshot=chart_snapshot)
Example #5
0
    def construct_chart_from_chart_data(self, chart_data):
        chart_urn = f"urn:li:chart:({self.platform},{chart_data['id']})"
        chart_snapshot = ChartSnapshot(
            urn=chart_urn,
            aspects=[],
        )

        modified_actor = f"urn:li:corpuser:{(chart_data.get('changed_by') or {}).get('username', 'unknown')}"
        modified_ts = int(
            dp.parse(chart_data.get("changed_on_utc", "now")).timestamp())
        title = chart_data.get("slice_name", "")

        # note: the API does not currently supply created_by usernames due to a bug, but we are required to
        # provide a created AuditStamp to comply with ChangeAuditStamp model. For now, I sub in the last
        # modified actor urn
        last_modified = ChangeAuditStamps(
            created=AuditStamp(time=modified_ts, actor=modified_actor),
            lastModified=AuditStamp(time=modified_ts, actor=modified_actor),
        )
        chart_url = f"{self.config.connect_uri[:-1]}{chart_data.get('url', '')}"

        datasource_id = chart_data.get("datasource_id")
        datasource_urn = self.get_datasource_urn_from_id(datasource_id)

        chart_info = ChartInfoClass(
            description="",
            title=title,
            lastModified=last_modified,
            chartUrl=chart_url,
            inputs=[datasource_urn] if datasource_urn else None,
        )
        chart_snapshot.aspects.append(chart_info)
        return chart_snapshot
Example #6
0
    def _make_chart_mce(
            self,
            dashboard_element: LookerDashboardElement) -> MetadataChangeEvent:
        chart_urn = builder.make_chart_urn(
            self.source_config.platform_name,
            dashboard_element.get_urn_element_id())
        chart_snapshot = ChartSnapshot(
            urn=chart_urn,
            aspects=[],
        )

        chart_type = self._get_chart_type(dashboard_element)

        chart_info = ChartInfoClass(
            type=chart_type,
            description=dashboard_element.description or "",
            title=dashboard_element.title or "",
            lastModified=ChangeAuditStamps(),
            chartUrl=dashboard_element.url(self.source_config.base_url),
            inputs=dashboard_element.get_view_urns(
                self.source_config.platform_name, self.source_config.env),
        )
        chart_snapshot.aspects.append(chart_info)

        return MetadataChangeEvent(proposedSnapshot=chart_snapshot)
Example #7
0
    def construct_chart_from_chart_data(self, chart_data):
        chart_urn = f"urn:li:chart:({self.platform},{chart_data['id']})"
        chart_snapshot = ChartSnapshot(
            urn=chart_urn,
            aspects=[],
        )

        modified_actor = f"urn:li:corpuser:{(chart_data.get('changed_by') or {}).get('username', 'unknown')}"
        modified_ts = int(
            dp.parse(chart_data.get("changed_on_utc", "now")).timestamp() *
            1000)
        title = chart_data.get("slice_name", "")

        # note: the API does not currently supply created_by usernames due to a bug, but we are required to
        # provide a created AuditStamp to comply with ChangeAuditStamp model. For now, I sub in the last
        # modified actor urn
        last_modified = ChangeAuditStamps(
            created=AuditStamp(time=modified_ts, actor=modified_actor),
            lastModified=AuditStamp(time=modified_ts, actor=modified_actor),
        )
        chart_type = chart_type_from_viz_type.get(
            chart_data.get("viz_type", ""))
        chart_url = f"{self.config.connect_uri}{chart_data.get('url', '')}"

        datasource_id = chart_data.get("datasource_id")
        datasource_urn = self.get_datasource_urn_from_id(datasource_id)

        params = json.loads(chart_data.get("params"))
        metrics = [
            get_metric_name(metric)
            for metric in (params.get("metrics", []) or [params.get("metric")])
        ]
        filters = [
            get_filter_name(filter_obj)
            for filter_obj in params.get("adhoc_filters", [])
        ]
        group_bys = params.get("groupby", []) or []
        if isinstance(group_bys, str):
            group_bys = [group_bys]

        custom_properties = {
            "Metrics": ", ".join(metrics),
            "Filters": ", ".join(filters),
            "Dimensions": ", ".join(group_bys),
        }

        chart_info = ChartInfoClass(
            type=chart_type,
            description="",
            title=title,
            lastModified=last_modified,
            chartUrl=chart_url,
            inputs=[datasource_urn] if datasource_urn else None,
            customProperties=custom_properties,
        )
        chart_snapshot.aspects.append(chart_info)
        return chart_snapshot
Example #8
0
    def _get_chart_snapshot(self, query_data: Dict,
                            viz_data: Dict) -> ChartSnapshot:
        viz_id = viz_data["id"]
        chart_urn = f"urn:li:chart:({self.platform},{viz_id})"
        chart_snapshot = ChartSnapshot(
            urn=chart_urn,
            aspects=[],
        )

        modified_actor = f"urn:li:corpuser:{viz_data.get('changed_by', {}).get('username', 'unknown')}"
        modified_ts = int(
            dp.parse(viz_data.get("updated_at", "now")).timestamp() * 1000)
        title = f"{query_data.get('name')} {viz_data.get('name', '')}"

        last_modified = ChangeAuditStamps(
            created=AuditStamp(time=modified_ts, actor=modified_actor),
            lastModified=AuditStamp(time=modified_ts, actor=modified_actor),
        )

        # Getting chart type
        chart_type = self._get_chart_type_from_viz_data(viz_data)
        chart_url = f"{self.config.connect_uri}/queries/{query_data.get('id')}#{viz_id}"
        description = (viz_data.get("description", "") if viz_data.get(
            "description", "") else "")
        data_source_id = query_data.get("data_source_id")
        data_source = self._get_chart_data_source(data_source_id)
        data_source_type = data_source.get("type")

        # TODO: Getting table lineage from SQL parsing
        # Currently we only get database level source from `data_source_id` which returns database name or Bigquery's projectId
        # query = query_data.get("query", "")
        datasource_urn = self._get_datasource_urn_from_data_source(data_source)

        if not datasource_urn:
            self.report.report_warning(
                key=f"redash-chart-{viz_id}",
                reason=
                f"data_source_type={data_source_type} not yet implemented. Setting inputs to None",
            )

        chart_info = ChartInfoClass(
            type=chart_type,
            description=description,
            title=title,
            lastModified=last_modified,
            chartUrl=chart_url,
            inputs=[
                datasource_urn,
            ] if datasource_urn else None,
        )
        chart_snapshot.aspects.append(chart_info)

        return chart_snapshot
Example #9
0
    def construct_card_from_api_data(
            self, card_data: dict) -> Optional[ChartSnapshot]:
        card_id = card_data.get("id", "")
        card_url = f"{self.config.connect_uri}/api/card/{card_id}"
        try:
            card_response = self.session.get(card_url)
            card_response.raise_for_status()
            card_details = card_response.json()
        except HTTPError as http_error:
            self.report.report_failure(
                key=f"metabase-card-{card_id}",
                reason=f"Unable to retrieve Card info. "
                f"Reason: {str(http_error)}",
            )
            return None

        chart_urn = builder.make_chart_urn(self.platform, card_id)
        chart_snapshot = ChartSnapshot(
            urn=chart_urn,
            aspects=[],
        )

        last_edit_by = card_details.get("last-edit-info") or {}
        modified_actor = builder.make_user_urn(
            last_edit_by.get("email", "unknown"))
        modified_ts = self.get_timestamp_millis_from_ts_string(
            f"{last_edit_by.get('timestamp')}")
        last_modified = ChangeAuditStamps(
            created=AuditStamp(time=modified_ts, actor=modified_actor),
            lastModified=AuditStamp(time=modified_ts, actor=modified_actor),
        )

        chart_type = self._get_chart_type(card_details.get("id", ""),
                                          card_details.get("display"))
        description = card_details.get("description") or ""
        title = card_details.get("name") or ""
        datasource_urn = self.get_datasource_urn(card_details)
        custom_properties = self.construct_card_custom_properties(card_details)

        chart_info = ChartInfoClass(
            type=chart_type,
            description=description,
            title=title,
            lastModified=last_modified,
            chartUrl=f"{self.config.connect_uri}/card/{card_id}",
            inputs=datasource_urn,
            customProperties=custom_properties,
        )
        chart_snapshot.aspects.append(chart_info)

        if card_details.get("query_type", "") == "native":
            raw_query = (card_details.get("dataset_query",
                                          {}).get("native",
                                                  {}).get("query", ""))
            chart_query_native = ChartQueryClass(
                rawQuery=raw_query,
                type=ChartQueryTypeClass.SQL,
            )
            chart_snapshot.aspects.append(chart_query_native)

        # Ownership
        ownership = self._get_ownership(card_details.get("creator_id", ""))
        if ownership is not None:
            chart_snapshot.aspects.append(ownership)

        return chart_snapshot
Example #10
0
    def construct_chart_from_api_data(self, chart_data: dict, query: dict,
                                      path: str) -> ChartSnapshot:
        chart_urn = builder.make_chart_urn(self.platform,
                                           chart_data.get("token", ""))
        chart_snapshot = ChartSnapshot(
            urn=chart_urn,
            aspects=[],
        )

        last_modified = ChangeAuditStamps()
        creator = self._get_creator(
            chart_data.get("_links", {}).get("creator", {}).get("href", ""))
        if creator is not None:
            modified_actor = builder.make_user_urn(creator)
            created_ts = int(
                dp.parse(chart_data.get("created_at", "now")).timestamp() *
                1000)
            modified_ts = int(
                dp.parse(chart_data.get("updated_at", "now")).timestamp() *
                1000)
            last_modified = ChangeAuditStamps(
                created=AuditStamp(time=created_ts, actor=modified_actor),
                lastModified=AuditStamp(time=modified_ts,
                                        actor=modified_actor),
            )

        chart_detail = (chart_data.get("view", {})
                        if len(chart_data.get("view", {})) != 0 else
                        chart_data.get("view_vegas", {}))

        mode_chart_type = chart_detail.get(
            "chartType", "") or chart_detail.get("selectedChart", "")
        chart_type = self._get_chart_type(chart_data.get("token", ""),
                                          mode_chart_type)
        description = (chart_detail.get("description")
                       or chart_detail.get("chartDescription") or "")
        title = chart_detail.get("title") or chart_detail.get(
            "chartTitle") or ""

        # create datasource urn
        platform, db_name = self._get_platform_and_dbname(
            query.get("data_source_id"))
        source_tables = self._get_source_from_query(query.get("raw_query"))
        datasource_urn = self._get_datasource_urn(platform, db_name,
                                                  source_tables)
        custom_properties = self.construct_chart_custom_properties(
            chart_detail, mode_chart_type)

        # Chart Info
        chart_info = ChartInfoClass(
            type=chart_type,
            description=description,
            title=title,
            lastModified=last_modified,
            chartUrl=f"{self.config.connect_uri}"
            f"{chart_data.get('_links', {}).get('report_viz_web', {}).get('href', '')}",
            inputs=datasource_urn,
            customProperties=custom_properties,
        )
        chart_snapshot.aspects.append(chart_info)

        # Browse Path
        browse_path = BrowsePathsClass(paths=[path])
        chart_snapshot.aspects.append(browse_path)

        # Query
        chart_query = ChartQueryClass(
            rawQuery=query.get("raw_query", ""),
            type=ChartQueryTypeClass.SQL,
        )
        chart_snapshot.aspects.append(chart_query)

        # Ownership
        ownership = self._get_ownership(
            self._get_creator(
                chart_data.get("_links", {}).get("creator",
                                                 {}).get("href", "")))
        if ownership is not None:
            chart_snapshot.aspects.append(ownership)

        return chart_snapshot
Example #11
0
    def emit_sheets_as_charts(self,
                              workbook: Dict) -> Iterable[MetadataWorkUnit]:
        sheet_upstream_datasources = self.get_sheetwise_upstream_datasources(
            workbook)
        for sheet in workbook.get("sheets", []):
            chart_snapshot = ChartSnapshot(
                urn=builder.make_chart_urn(self.platform, sheet.get("id")),
                aspects=[],
            )

            creator = workbook.get("owner", {}).get("username", "")
            created_at = sheet.get("createdAt", datetime.now())
            updated_at = sheet.get("updatedAt", datetime.now())
            last_modified = self.get_last_modified(creator, created_at,
                                                   updated_at)

            if sheet.get("path"):
                site_part = f"/site/{self.config.site}" if self.config.site else ""
                sheet_external_url = (
                    f"{self.config.connect_uri}/#{site_part}/views/{sheet.get('path')}"
                )
            elif sheet.get("containedInDashboards"):
                # sheet contained in dashboard
                site_part = f"/t/{self.config.site}" if self.config.site else ""
                dashboard_path = sheet.get("containedInDashboards")[0].get(
                    "path", "")
                sheet_external_url = f"{self.config.connect_uri}{site_part}/authoring/{dashboard_path}/{sheet.get('name', '')}"
            else:
                # hidden or viz-in-tooltip sheet
                sheet_external_url = None
            fields = {}
            for field in sheet.get("datasourceFields", ""):
                description = make_description_from_params(
                    get_field_value_in_sheet(field, "description"),
                    get_field_value_in_sheet(field, "formula"),
                )
                fields[get_field_value_in_sheet(field, "name")] = description

            # datasource urn
            datasource_urn = []
            data_sources = sheet_upstream_datasources.get(
                sheet.get("id"), set())

            for ds_id in data_sources:
                if ds_id is None or not ds_id:
                    continue
                ds_urn = builder.make_dataset_urn(self.platform, ds_id,
                                                  self.config.env)
                datasource_urn.append(ds_urn)
                if ds_id not in self.datasource_ids_being_used:
                    self.datasource_ids_being_used.append(ds_id)

            # Chart Info
            chart_info = ChartInfoClass(
                description="",
                title=sheet.get("name", ""),
                lastModified=last_modified,
                externalUrl=sheet_external_url,
                inputs=sorted(datasource_urn),
                customProperties=fields,
            )
            chart_snapshot.aspects.append(chart_info)

            # Browse path
            browse_path = BrowsePathsClass(paths=[
                f"/{self.platform}/{workbook.get('projectName', '').replace('/', REPLACE_SLASH_CHAR)}"
                f"/{workbook.get('name', '')}"
                f"/{sheet.get('name', '').replace('/', REPLACE_SLASH_CHAR)}"
            ])
            chart_snapshot.aspects.append(browse_path)

            # Ownership
            owner = self._get_ownership(creator)
            if owner is not None:
                chart_snapshot.aspects.append(owner)

            #  Tags
            tag_list = sheet.get("tags", [])
            if tag_list and self.config.ingest_tags:
                tag_list_str = [
                    t.get("name", "").upper() for t in tag_list
                    if t is not None
                ]
                chart_snapshot.aspects.append(
                    builder.make_global_tag_aspect_with_tag_list(tag_list_str))

            yield self.get_metadata_change_event(chart_snapshot)

            yield from add_entity_to_container(self.gen_workbook_key(workbook),
                                               "chart", chart_snapshot.urn)
Example #12
0
    def __to_datahub_chart(
        self, tile: PowerBiAPI.Tile,
        ds_mcps: List[MetadataChangeProposalWrapper]
    ) -> List[MetadataChangeProposalWrapper]:
        """
        Map PowerBi tile to datahub chart
        """
        LOGGER.info("Converting tile {}(id={}) to chart".format(
            tile.title, tile.id))
        # Create an URN for chart
        chart_urn = builder.make_chart_urn(self.__config.platform_name,
                                           tile.get_urn_part())

        LOGGER.info("{}={}".format(Constant.CHART_URN, chart_urn))

        ds_input: List[str] = self.to_urn_set(ds_mcps)

        def tile_custom_properties(tile: PowerBiAPI.Tile) -> dict:
            custom_properties = {
                "datasetId":
                tile.dataset.id if tile.dataset else "",
                "reportId":
                tile.report.id if tile.report else "",
                "datasetWebUrl":
                tile.dataset.webUrl if tile.dataset is not None else "",
                "createdFrom":
                tile.createdFrom.value,
            }

            return custom_properties

        # Create chartInfo mcp
        # Set chartUrl only if tile is created from Report
        chart_info_instance = ChartInfoClass(
            title=tile.title or "",
            description=tile.title or "",
            lastModified=ChangeAuditStamps(),
            inputs=ds_input,
            externalUrl=tile.report.webUrl if tile.report else None,
            customProperties={**tile_custom_properties(tile)},
        )

        info_mcp = self.new_mcp(
            entity_type=Constant.CHART,
            entity_urn=chart_urn,
            aspect_name=Constant.CHART_INFO,
            aspect=chart_info_instance,
        )

        # removed status mcp
        status_mcp = self.new_mcp(
            entity_type=Constant.CHART,
            entity_urn=chart_urn,
            aspect_name=Constant.STATUS,
            aspect=StatusClass(removed=False),
        )

        # ChartKey status
        chart_key_instance = ChartKeyClass(
            dashboardTool=self.__config.platform_name,
            chartId=Constant.CHART_ID.format(tile.id),
        )

        chartkey_mcp = self.new_mcp(
            entity_type=Constant.CHART,
            entity_urn=chart_urn,
            aspect_name=Constant.CHART_KEY,
            aspect=chart_key_instance,
        )

        return [info_mcp, status_mcp, chartkey_mcp]