Ejemplo n.º 1
0
 def _resolve_dataplex_entity_uri(
     self,
     entity_uri: dq_entity_uri.EntityUri,
     dataplex_client: clouddq_dataplex.CloudDqDataplexClient,
     bigquery_client: BigQueryClient,
 ) -> dq_entity.DqEntity:
     dataplex_entity = dataplex_client.get_dataplex_entity(
         gcp_project_id=entity_uri.get_configs("projects"),
         location_id=entity_uri.get_configs("locations"),
         lake_name=entity_uri.get_configs("lakes"),
         zone_id=entity_uri.get_configs("zones"),
         entity_id=entity_uri.get_entity_id(),
     )
     clouddq_entity = dq_entity.DqEntity.from_dataplex_entity(
         entity_id=entity_uri.get_db_primary_key(),
         dataplex_entity=dataplex_entity,
     )
     entity_uri_primary_key = entity_uri.get_db_primary_key().upper()
     gcs_entity_external_table_name = clouddq_entity.get_table_name()
     logger.debug(
         f"GCS Entity External Table Name is {gcs_entity_external_table_name}"
     )
     bq_table_exists = bigquery_client.is_table_exists(
         table=gcs_entity_external_table_name,
         project_id=clouddq_entity.instance_name,
     )
     if bq_table_exists:
         logger.debug(
             f"The External Table {gcs_entity_external_table_name} for Entity URI "
             f"{entity_uri_primary_key} exists in Bigquery.")
     else:
         raise RuntimeError(
             f"Unable to find Bigquery External Table  {gcs_entity_external_table_name} "
             f"for Entity URI {entity_uri_primary_key}")
     return clouddq_entity
Ejemplo n.º 2
0
 def is_dataplex_entity(
     self,
     entity_uri: dq_entity_uri.EntityUri,
     dataplex_client: clouddq_dataplex.CloudDqDataplexClient,
 ):
     required_arguments = ["projects", "lakes", "locations", "zones"]
     for argument in required_arguments:
         uri_argument = entity_uri.get_configs(argument)
         if not uri_argument:
             logger.info(
                 f"Failed to retrieve default Dataplex '{argument}' for "
                 f"entity_uri: {entity_uri.complete_uri_string}. \n"
                 f"'{argument}' is a required argument to look-up metadata for the entity_uri "
                 "using Dataplex Metadata API.\n"
                 "Ensure the BigQuery dataset containing this table "
                 "is attached as an asset in Dataplex.\n"
                 "You can then specify the corresponding Dataplex "
                 "projects/locations/lakes/zones as part of the "
                 "metadata_default_registries YAML configs, e.g.\n"
                 f"{SAMPLE_DEFAULT_REGISTRIES_YAML}")
             return False
     dataplex_entities_match = dataplex_client.list_dataplex_entities(
         gcp_project_id=entity_uri.get_configs("projects"),
         location_id=entity_uri.get_configs("locations"),
         lake_name=entity_uri.get_configs("lakes"),
         zone_id=entity_uri.get_configs("zones"),
         data_path=entity_uri.get_entity_id(),
     )
     logger.info(
         f"Retrieved Dataplex Entities:\n{pformat(dataplex_entities_match)}"
     )
     if len(dataplex_entities_match) != 1:
         logger.info("Failed to retrieve Dataplex Metadata entry for "
                     f"entity_uri '{entity_uri.complete_uri_string}' "
                     f"with error:\n"
                     f"{pformat(json.dumps(dataplex_entities_match))}\n\n"
                     f"Parsed entity_uri configs:\n"
                     f"{pformat(entity_uri.to_dict())}\n\n")
         return False
     else:
         dataplex_entity = dataplex_entities_match[0]
         clouddq_entity = dq_entity.DqEntity.from_dataplex_entity(
             entity_id=entity_uri.get_db_primary_key(),
             dataplex_entity=dataplex_entity,
         )
     return clouddq_entity