コード例 #1
0
    def emit_workbook_as_container(
            self, workbook: Dict) -> Iterable[MetadataWorkUnit]:

        workbook_container_key = self.gen_workbook_key(workbook)
        creator = workbook.get("owner", {}).get("username")

        owner_urn = (builder.make_user_urn(creator) if
                     (creator and self.config.ingest_owner) else None)

        site_part = f"/site/{self.config.site}" if self.config.site else ""
        workbook_uri = workbook.get("uri", "")
        workbook_part = (workbook_uri[workbook_uri.index("/workbooks/"):]
                         if workbook.get("uri") else None)
        workbook_external_url = (
            f"{self.config.connect_uri}/#{site_part}{workbook_part}"
            if workbook_part else None)

        tag_list = workbook.get("tags", [])
        tag_list_str = (
            [t.get("name", "").upper() for t in tag_list if t is not None] if
            (tag_list and self.config.ingest_tags) else None)

        container_workunits = gen_containers(
            container_key=workbook_container_key,
            name=workbook.get("name", ""),
            sub_types=["Workbook"],
            description=workbook.get("description"),
            owner_urn=owner_urn,
            external_url=workbook_external_url,
            tags=tag_list_str,
        )

        for wu in container_workunits:
            self.report.report_workunit(wu)
            yield wu
コード例 #2
0
    def gen_schema_containers(self, schema: str,
                              db_name: str) -> Iterable[MetadataWorkUnit]:

        assert isinstance(self.config, PrestoOnHiveConfig)

        statement: str = (
            PrestoOnHiveSource._SCHEMAS_POSTGRES_SQL_STATEMENT.format(
                where_clause_suffix=self.config.schemas_where_clause_suffix)
            if "postgresql" in self.config.scheme else
            PrestoOnHiveSource._SCHEMAS_SQL_STATEMENT.format(
                where_clause_suffix=self.config.schemas_where_clause_suffix))

        iter_res = self._alchemy_client.execute_query(statement)
        for row in iter_res:
            schema = row["schema"]
            schema_container_key: PlatformKey = self.gen_schema_key(
                db_name, schema)
            logger.debug(
                "schema_container_key = {} ".format(schema_container_key))
            database_container_key = self.gen_database_key(database=db_name)
            container_workunits: Iterable[MetadataWorkUnit] = gen_containers(
                schema_container_key,
                schema,
                [SqlContainerSubTypes.SCHEMA],
                database_container_key,
            )

            for wu in container_workunits:
                self.report.report_workunit(wu)
                yield wu
コード例 #3
0
    def gen_schema_containers(
            self, schema: str,
            db_name: str) -> typing.Iterable[MetadataWorkUnit]:
        database_container_key = self.gen_database_key(database=schema)

        container_workunits = gen_containers(
            database_container_key,
            schema,
            ["Database"],
        )

        for wu in container_workunits:
            self.report.report_workunit(wu)
            yield wu
コード例 #4
0
    def gen_database_containers(self,
                                database: str) -> Iterable[MetadataWorkUnit]:
        domain_urn = self._gen_domain_urn(database)
        database_container_key = self.gen_database_key(database)
        container_workunits = gen_containers(
            container_key=database_container_key,
            name=database,
            sub_types=["Database"],
            domain_urn=domain_urn,
        )

        for wu in container_workunits:
            self.report.report_workunit(wu)
            yield wu
コード例 #5
0
ファイル: bigquery.py プロジェクト: swaroopjagadish/datahub
    def gen_schema_containers(self, schema: str,
                              db_name: str) -> Iterable[MetadataWorkUnit]:
        schema_container_key = self.gen_schema_key(db_name, schema)

        database_container_key = self.gen_database_key(database=db_name)

        container_workunits = gen_containers(
            schema_container_key,
            schema,
            ["Dataset"],
            database_container_key,
        )

        for wu in container_workunits:
            self.report.report_workunit(wu)
            yield wu
コード例 #6
0
ファイル: sql_common.py プロジェクト: swaroopjagadish/datahub
    def gen_schema_containers(self, schema: str,
                              db_name: str) -> Iterable[MetadataWorkUnit]:
        schema_container_key = self.gen_schema_key(db_name, schema)

        database_container_key: Optional[PlatformKey] = None
        if db_name is not None:
            database_container_key = self.gen_database_key(database=db_name)

        container_workunits = gen_containers(
            schema_container_key,
            schema,
            [SqlContainerSubTypes.SCHEMA],
            database_container_key,
        )

        for wu in container_workunits:
            self.report.report_workunit(wu)
            yield wu
コード例 #7
0
ファイル: __init__.py プロジェクト: arunvasudevan/datahub
 def create_emit_containers(
     self,
     container_key: KeyType,
     name: str,
     sub_types: List[str],
     parent_container_key: Optional[PlatformKey] = None,
     domain_urn: Optional[str] = None,
 ) -> Iterable[MetadataWorkUnit]:
     if container_key.guid() not in self.processed_containers:
         container_wus = gen_containers(
             container_key=container_key,
             name=name,
             sub_types=sub_types,
             parent_container_key=parent_container_key,
             domain_urn=domain_urn,
         )
         self.processed_containers.append(container_key.guid())
         logger.debug(f"Creating container with key: {container_key}")
         for wu in container_wus:
             self.report.report_workunit(wu)
             yield wu