def emit_workbook_as_container( self, workbook: Dict) -> Iterable[MetadataWorkUnit]: workbook_container_key = self.gen_workbook_key(workbook) creator = workbook.get("owner", {}).get("username") owner_urn = (builder.make_user_urn(creator) if (creator and self.config.ingest_owner) else None) site_part = f"/site/{self.config.site}" if self.config.site else "" workbook_uri = workbook.get("uri", "") workbook_part = (workbook_uri[workbook_uri.index("/workbooks/"):] if workbook.get("uri") else None) workbook_external_url = ( f"{self.config.connect_uri}/#{site_part}{workbook_part}" if workbook_part else None) tag_list = workbook.get("tags", []) tag_list_str = ( [t.get("name", "").upper() for t in tag_list if t is not None] if (tag_list and self.config.ingest_tags) else None) container_workunits = gen_containers( container_key=workbook_container_key, name=workbook.get("name", ""), sub_types=["Workbook"], description=workbook.get("description"), owner_urn=owner_urn, external_url=workbook_external_url, tags=tag_list_str, ) for wu in container_workunits: self.report.report_workunit(wu) yield wu
def gen_schema_containers(self, schema: str, db_name: str) -> Iterable[MetadataWorkUnit]: assert isinstance(self.config, PrestoOnHiveConfig) statement: str = ( PrestoOnHiveSource._SCHEMAS_POSTGRES_SQL_STATEMENT.format( where_clause_suffix=self.config.schemas_where_clause_suffix) if "postgresql" in self.config.scheme else PrestoOnHiveSource._SCHEMAS_SQL_STATEMENT.format( where_clause_suffix=self.config.schemas_where_clause_suffix)) iter_res = self._alchemy_client.execute_query(statement) for row in iter_res: schema = row["schema"] schema_container_key: PlatformKey = self.gen_schema_key( db_name, schema) logger.debug( "schema_container_key = {} ".format(schema_container_key)) database_container_key = self.gen_database_key(database=db_name) container_workunits: Iterable[MetadataWorkUnit] = gen_containers( schema_container_key, schema, [SqlContainerSubTypes.SCHEMA], database_container_key, ) for wu in container_workunits: self.report.report_workunit(wu) yield wu
def gen_schema_containers( self, schema: str, db_name: str) -> typing.Iterable[MetadataWorkUnit]: database_container_key = self.gen_database_key(database=schema) container_workunits = gen_containers( database_container_key, schema, ["Database"], ) for wu in container_workunits: self.report.report_workunit(wu) yield wu
def gen_database_containers(self, database: str) -> Iterable[MetadataWorkUnit]: domain_urn = self._gen_domain_urn(database) database_container_key = self.gen_database_key(database) container_workunits = gen_containers( container_key=database_container_key, name=database, sub_types=["Database"], domain_urn=domain_urn, ) for wu in container_workunits: self.report.report_workunit(wu) yield wu
def gen_schema_containers(self, schema: str, db_name: str) -> Iterable[MetadataWorkUnit]: schema_container_key = self.gen_schema_key(db_name, schema) database_container_key = self.gen_database_key(database=db_name) container_workunits = gen_containers( schema_container_key, schema, ["Dataset"], database_container_key, ) for wu in container_workunits: self.report.report_workunit(wu) yield wu
def gen_schema_containers(self, schema: str, db_name: str) -> Iterable[MetadataWorkUnit]: schema_container_key = self.gen_schema_key(db_name, schema) database_container_key: Optional[PlatformKey] = None if db_name is not None: database_container_key = self.gen_database_key(database=db_name) container_workunits = gen_containers( schema_container_key, schema, [SqlContainerSubTypes.SCHEMA], database_container_key, ) for wu in container_workunits: self.report.report_workunit(wu) yield wu
def create_emit_containers( self, container_key: KeyType, name: str, sub_types: List[str], parent_container_key: Optional[PlatformKey] = None, domain_urn: Optional[str] = None, ) -> Iterable[MetadataWorkUnit]: if container_key.guid() not in self.processed_containers: container_wus = gen_containers( container_key=container_key, name=name, sub_types=sub_types, parent_container_key=parent_container_key, domain_urn=domain_urn, ) self.processed_containers.append(container_key.guid()) logger.debug(f"Creating container with key: {container_key}") for wu in container_wus: self.report.report_workunit(wu) yield wu