def _resolve_dataplex_entity_uri( self, entity_uri: dq_entity_uri.EntityUri, dataplex_client: clouddq_dataplex.CloudDqDataplexClient, bigquery_client: BigQueryClient, ) -> dq_entity.DqEntity: dataplex_entity = dataplex_client.get_dataplex_entity( gcp_project_id=entity_uri.get_configs("projects"), location_id=entity_uri.get_configs("locations"), lake_name=entity_uri.get_configs("lakes"), zone_id=entity_uri.get_configs("zones"), entity_id=entity_uri.get_entity_id(), ) clouddq_entity = dq_entity.DqEntity.from_dataplex_entity( entity_id=entity_uri.get_db_primary_key(), dataplex_entity=dataplex_entity, ) entity_uri_primary_key = entity_uri.get_db_primary_key().upper() gcs_entity_external_table_name = clouddq_entity.get_table_name() logger.debug( f"GCS Entity External Table Name is {gcs_entity_external_table_name}" ) bq_table_exists = bigquery_client.is_table_exists( table=gcs_entity_external_table_name, project_id=clouddq_entity.instance_name, ) if bq_table_exists: logger.debug( f"The External Table {gcs_entity_external_table_name} for Entity URI " f"{entity_uri_primary_key} exists in Bigquery.") else: raise RuntimeError( f"Unable to find Bigquery External Table {gcs_entity_external_table_name} " f"for Entity URI {entity_uri_primary_key}") return clouddq_entity
def is_dataplex_entity( self, entity_uri: dq_entity_uri.EntityUri, dataplex_client: clouddq_dataplex.CloudDqDataplexClient, ): required_arguments = ["projects", "lakes", "locations", "zones"] for argument in required_arguments: uri_argument = entity_uri.get_configs(argument) if not uri_argument: logger.info( f"Failed to retrieve default Dataplex '{argument}' for " f"entity_uri: {entity_uri.complete_uri_string}. \n" f"'{argument}' is a required argument to look-up metadata for the entity_uri " "using Dataplex Metadata API.\n" "Ensure the BigQuery dataset containing this table " "is attached as an asset in Dataplex.\n" "You can then specify the corresponding Dataplex " "projects/locations/lakes/zones as part of the " "metadata_default_registries YAML configs, e.g.\n" f"{SAMPLE_DEFAULT_REGISTRIES_YAML}") return False dataplex_entities_match = dataplex_client.list_dataplex_entities( gcp_project_id=entity_uri.get_configs("projects"), location_id=entity_uri.get_configs("locations"), lake_name=entity_uri.get_configs("lakes"), zone_id=entity_uri.get_configs("zones"), data_path=entity_uri.get_entity_id(), ) logger.info( f"Retrieved Dataplex Entities:\n{pformat(dataplex_entities_match)}" ) if len(dataplex_entities_match) != 1: logger.info("Failed to retrieve Dataplex Metadata entry for " f"entity_uri '{entity_uri.complete_uri_string}' " f"with error:\n" f"{pformat(json.dumps(dataplex_entities_match))}\n\n" f"Parsed entity_uri configs:\n" f"{pformat(entity_uri.to_dict())}\n\n") return False else: dataplex_entity = dataplex_entities_match[0] clouddq_entity = dq_entity.DqEntity.from_dataplex_entity( entity_id=entity_uri.get_db_primary_key(), dataplex_entity=dataplex_entity, ) return clouddq_entity