コード例 #1
0
 def _resolve_dataplex_entity_uri(
     self,
     entity_uri: dq_entity_uri.EntityUri,
     dataplex_client: clouddq_dataplex.CloudDqDataplexClient,
     bigquery_client: BigQueryClient,
 ) -> dq_entity.DqEntity:
     dataplex_entity = dataplex_client.get_dataplex_entity(
         gcp_project_id=entity_uri.get_configs("projects"),
         location_id=entity_uri.get_configs("locations"),
         lake_name=entity_uri.get_configs("lakes"),
         zone_id=entity_uri.get_configs("zones"),
         entity_id=entity_uri.get_entity_id(),
     )
     clouddq_entity = dq_entity.DqEntity.from_dataplex_entity(
         entity_id=entity_uri.get_db_primary_key(),
         dataplex_entity=dataplex_entity,
     )
     entity_uri_primary_key = entity_uri.get_db_primary_key().upper()
     gcs_entity_external_table_name = clouddq_entity.get_table_name()
     logger.debug(
         f"GCS Entity External Table Name is {gcs_entity_external_table_name}"
     )
     bq_table_exists = bigquery_client.is_table_exists(
         table=gcs_entity_external_table_name,
         project_id=clouddq_entity.instance_name,
     )
     if bq_table_exists:
         logger.debug(
             f"The External Table {gcs_entity_external_table_name} for Entity URI "
             f"{entity_uri_primary_key} exists in Bigquery.")
     else:
         raise RuntimeError(
             f"Unable to find Bigquery External Table  {gcs_entity_external_table_name} "
             f"for Entity URI {entity_uri_primary_key}")
     return clouddq_entity
コード例 #2
0
    def from_bq_entity_uri(self, entity_uri: EntityUri,
                           bigquery_client: BigQueryClient) -> DqEntity:
        project_id = entity_uri.get_configs("projects")
        table_name = entity_uri.get_table_name()
        configs = entity_uri.configs_dict
        entity_id = entity_uri.get_entity_id()
        columns_dict = bigquery_client.get_table_schema(table=table_name,
                                                        project_id=project_id)
        entity_configs = {
            "source_database": "BIGQUERY",
            "resource_type": "BIGQUERY",
            "table_name": configs.get("tables"),
            "dataset_name": configs.get("datasets"),
            "project_name": configs.get("projects"),
            "columns": columns_dict.get("columns"),
            "environment_override": {},
            "entity_id": entity_id,
            "dataplex_name": None,
            "dataplex_lake": None,
            "dataplex_zone": None,
            "dataplex_location": None,
            "dataplex_asset_id": None,
            "dataplex_createTime": None,
            "dataplex_updateTime": None,
            "partition_fields": columns_dict.get("partition_fields"),
        }

        return DqEntity.from_dict(entity_id=entity_id.upper(),
                                  kwargs=entity_configs)
コード例 #3
0
 def is_dataplex_entity(
     self,
     entity_uri: dq_entity_uri.EntityUri,
     dataplex_client: clouddq_dataplex.CloudDqDataplexClient,
 ):
     required_arguments = ["projects", "lakes", "locations", "zones"]
     for argument in required_arguments:
         uri_argument = entity_uri.get_configs(argument)
         if not uri_argument:
             logger.info(
                 f"Failed to retrieve default Dataplex '{argument}' for "
                 f"entity_uri: {entity_uri.complete_uri_string}. \n"
                 f"'{argument}' is a required argument to look-up metadata for the entity_uri "
                 "using Dataplex Metadata API.\n"
                 "Ensure the BigQuery dataset containing this table "
                 "is attached as an asset in Dataplex.\n"
                 "You can then specify the corresponding Dataplex "
                 "projects/locations/lakes/zones as part of the "
                 "metadata_default_registries YAML configs, e.g.\n"
                 f"{SAMPLE_DEFAULT_REGISTRIES_YAML}")
             return False
     dataplex_entities_match = dataplex_client.list_dataplex_entities(
         gcp_project_id=entity_uri.get_configs("projects"),
         location_id=entity_uri.get_configs("locations"),
         lake_name=entity_uri.get_configs("lakes"),
         zone_id=entity_uri.get_configs("zones"),
         data_path=entity_uri.get_entity_id(),
     )
     logger.info(
         f"Retrieved Dataplex Entities:\n{pformat(dataplex_entities_match)}"
     )
     if len(dataplex_entities_match) != 1:
         logger.info("Failed to retrieve Dataplex Metadata entry for "
                     f"entity_uri '{entity_uri.complete_uri_string}' "
                     f"with error:\n"
                     f"{pformat(json.dumps(dataplex_entities_match))}\n\n"
                     f"Parsed entity_uri configs:\n"
                     f"{pformat(entity_uri.to_dict())}\n\n")
         return False
     else:
         dataplex_entity = dataplex_entities_match[0]
         clouddq_entity = dq_entity.DqEntity.from_dataplex_entity(
             entity_id=entity_uri.get_db_primary_key(),
             dataplex_entity=dataplex_entity,
         )
     return clouddq_entity
コード例 #4
0
    def _resolve_bigquery_entity_uri(
        self,
        entity_uri: dq_entity_uri.EntityUri,
        dataplex_client: clouddq_dataplex.CloudDqDataplexClient,
        bigquery_client: BigQueryClient,
    ) -> dq_entity.DqEntity:
        required_arguments = ["projects", "datasets", "tables"]
        for argument in required_arguments:
            uri_argument = entity_uri.get_configs(argument)
            if not uri_argument:
                raise RuntimeError(
                    f"Failed to retrieve default Bigquery '{argument}' for "
                    f"entity_uri: {entity_uri.complete_uri_string}. \n"
                    f"'{argument}' is a required argument to look-up metadata for the entity_uri "
                    "using Bigquery API.\n")

        project_id = entity_uri.get_configs("projects")
        table_name = entity_uri.get_table_name()
        bq_table_exists = bigquery_client.is_table_exists(
            table=table_name, project_id=project_id)
        if bq_table_exists:
            logger.debug(f"The Table '{table_name}' in the "
                         f"specified entity_uri '{entity_uri}' "
                         f"exists in Bigquery.")
            dataplex_entity = self.is_dataplex_entity(
                entity_uri=entity_uri, dataplex_client=dataplex_client)
            if dataplex_entity:
                clouddq_entity = dataplex_entity
            else:
                clouddq_entity = dq_entity.DqEntity.from_bq_entity_uri(
                    entity_uri=entity_uri,
                    bigquery_client=bigquery_client,
                )
            return clouddq_entity
        else:
            raise RuntimeError(
                f"Bigquery Table '{table_name}' specified in the "
                f"entity uri '{entity_uri}' does not exist")