def test_update_tag_template(self): # Setup Expected Response name = "name3373707" display_name = "displayName1615086568" expected_response = {"name": name, "display_name": display_name} expected_response = tags_pb2.TagTemplate(**expected_response) # Mock the API response channel = ChannelStub(responses=[expected_response]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = datacatalog_v1.DataCatalogClient() # Setup Request tag_template = {} response = client.update_tag_template(tag_template) assert expected_response == response assert len(channel.requests) == 1 expected_request = datacatalog_pb2.UpdateTagTemplateRequest( tag_template=tag_template) actual_request = channel.requests[0][1] assert expected_request == actual_request
def test_create_entry_group(self): # Setup Expected Response name = "name3373707" display_name = "displayName1615086568" description = "description-1724546052" expected_response = { "name": name, "display_name": display_name, "description": description, } expected_response = datacatalog_pb2.EntryGroup(**expected_response) # Mock the API response channel = ChannelStub(responses=[expected_response]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = datacatalog_v1.DataCatalogClient() # Setup Request parent = client.location_path("[PROJECT]", "[LOCATION]") entry_group_id = "entryGroupId-43122680" response = client.create_entry_group(parent, entry_group_id) assert expected_response == response assert len(channel.requests) == 1 expected_request = datacatalog_pb2.CreateEntryGroupRequest( parent=parent, entry_group_id=entry_group_id) actual_request = channel.requests[0][1] assert expected_request == actual_request
def test_update_entry_group(self): # Setup Expected Response name = "name3373707" display_name = "displayName1615086568" description = "description-1724546052" expected_response = { "name": name, "display_name": display_name, "description": description, } expected_response = datacatalog_pb2.EntryGroup(**expected_response) # Mock the API response channel = ChannelStub(responses=[expected_response]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = datacatalog_v1.DataCatalogClient() # Setup Request entry_group = {} response = client.update_entry_group(entry_group) assert expected_response == response assert len(channel.requests) == 1 expected_request = datacatalog_pb2.UpdateEntryGroupRequest( entry_group=entry_group) actual_request = channel.requests[0][1] assert expected_request == actual_request
def test_get_entry_group(self): # Setup Expected Response name_2 = "name2-1052831874" display_name = "displayName1615086568" description = "description-1724546052" expected_response = { "name": name_2, "display_name": display_name, "description": description, } expected_response = datacatalog_pb2.EntryGroup(**expected_response) # Mock the API response channel = ChannelStub(responses=[expected_response]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = datacatalog_v1.DataCatalogClient() # Setup Request name = client.entry_group_path("[PROJECT]", "[LOCATION]", "[ENTRY_GROUP]") response = client.get_entry_group(name) assert expected_response == response assert len(channel.requests) == 1 expected_request = datacatalog_pb2.GetEntryGroupRequest(name=name) actual_request = channel.requests[0][1] assert expected_request == actual_request
def test_search_catalog(self): # Setup Expected Response next_page_token = "" results_element = {} results = [results_element] expected_response = { "next_page_token": next_page_token, "results": results } expected_response = datacatalog_pb2.SearchCatalogResponse( **expected_response) # Mock the API response channel = ChannelStub(responses=[expected_response]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = datacatalog_v1.DataCatalogClient() # Setup Request scope = {} query = "query107944136" paged_list_response = client.search_catalog(scope, query) resources = list(paged_list_response) assert len(resources) == 1 assert expected_response.results[0] == resources[0] assert len(channel.requests) == 1 expected_request = datacatalog_pb2.SearchCatalogRequest(scope=scope, query=query) actual_request = channel.requests[0][1] assert expected_request == actual_request
def search_assets(override_values): """Searches Data Catalog entries for a given project.""" # [START data_catalog_search_assets] from google.cloud import datacatalog_v1 datacatalog = datacatalog_v1.DataCatalogClient() # TODO: Set these values before running the sample. project_id = "project_id" # Set custom query. search_string = "type=dataset" # [END data_catalog_search_assets] # To facilitate testing, we replace values with alternatives # provided by the testing harness. project_id = override_values.get("project_id", project_id) tag_template_id = override_values.get("tag_template_id", search_string) search_string = f"name:{tag_template_id}" # [START data_catalog_search_assets] scope = datacatalog_v1.types.SearchCatalogRequest.Scope() scope.include_project_ids.append(project_id) # Alternatively, search using organization scopes. # scope.include_org_ids.append("my_organization_id") search_results = datacatalog.search_catalog(scope=scope, query=search_string) print("Results in project:") for result in search_results: print(result)
def test_get_tag_template(self): # Setup Expected Response name_2 = "name2-1052831874" display_name = "displayName1615086568" expected_response = {"name": name_2, "display_name": display_name} expected_response = tags_pb2.TagTemplate(**expected_response) # Mock the API response channel = ChannelStub(responses=[expected_response]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = datacatalog_v1.DataCatalogClient() # Setup Request name = client.tag_template_path("[PROJECT]", "[LOCATION]", "[TAG_TEMPLATE]") response = client.get_tag_template(name) assert expected_response == response assert len(channel.requests) == 1 expected_request = datacatalog_pb2.GetTagTemplateRequest(name=name) actual_request = channel.requests[0][1] assert expected_request == actual_request
def test_rename_tag_template_field(self): # Setup Expected Response name_2 = "name2-1052831874" display_name = "displayName1615086568" is_required = True order = 106006350 expected_response = { "name": name_2, "display_name": display_name, "is_required": is_required, "order": order, } expected_response = tags_pb2.TagTemplateField(**expected_response) # Mock the API response channel = ChannelStub(responses=[expected_response]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = datacatalog_v1.DataCatalogClient() # Setup Request name = client.tag_template_field_path("[PROJECT]", "[LOCATION]", "[TAG_TEMPLATE]", "[FIELD]") new_tag_template_field_id = "newTagTemplateFieldId-1668354591" response = client.rename_tag_template_field(name, new_tag_template_field_id) assert expected_response == response assert len(channel.requests) == 1 expected_request = datacatalog_pb2.RenameTagTemplateFieldRequest( name=name, new_tag_template_field_id=new_tag_template_field_id) actual_request = channel.requests[0][1] assert expected_request == actual_request
def test_create_tag(self): # Setup Expected Response name = "name3373707" template = "template-1321546630" template_display_name = "templateDisplayName-532252787" column = "column-1354837162" expected_response = { "name": name, "template": template, "template_display_name": template_display_name, "column": column, } expected_response = tags_pb2.Tag(**expected_response) # Mock the API response channel = ChannelStub(responses=[expected_response]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = datacatalog_v1.DataCatalogClient() # Setup Request parent = client.tag_path("[PROJECT]", "[LOCATION]", "[ENTRY_GROUP]", "[ENTRY]", "[TAG]") tag = {} response = client.create_tag(parent, tag) assert expected_response == response assert len(channel.requests) == 1 expected_request = datacatalog_pb2.CreateTagRequest(parent=parent, tag=tag) actual_request = channel.requests[0][1] assert expected_request == actual_request
def test_get_iam_policy(self): # Setup Expected Response version = 351608024 etag = b"21" expected_response = {"version": version, "etag": etag} expected_response = policy_pb2.Policy(**expected_response) # Mock the API response channel = ChannelStub(responses=[expected_response]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = datacatalog_v1.DataCatalogClient() # Setup Request resource = "resource-341064690" response = client.get_iam_policy(resource) assert expected_response == response assert len(channel.requests) == 1 expected_request = iam_policy_pb2.GetIamPolicyRequest( resource=resource) actual_request = channel.requests[0][1] assert expected_request == actual_request
def test_test_iam_permissions(self): # Setup Expected Response expected_response = {} expected_response = iam_policy_pb2.TestIamPermissionsResponse( **expected_response) # Mock the API response channel = ChannelStub(responses=[expected_response]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = datacatalog_v1.DataCatalogClient() # Setup Request resource = "resource-341064690" permissions = [] response = client.test_iam_permissions(resource, permissions) assert expected_response == response assert len(channel.requests) == 1 expected_request = iam_policy_pb2.TestIamPermissionsRequest( resource=resource, permissions=permissions) actual_request = channel.requests[0][1] assert expected_request == actual_request
def test_create_tag_template(self): # Setup Expected Response name = "name3373707" display_name = "displayName1615086568" expected_response = {"name": name, "display_name": display_name} expected_response = tags_pb2.TagTemplate(**expected_response) # Mock the API response channel = ChannelStub(responses=[expected_response]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = datacatalog_v1.DataCatalogClient() # Setup Request parent = client.location_path("[PROJECT]", "[LOCATION]") tag_template_id = "tagTemplateId-2020335141" tag_template = {} response = client.create_tag_template(parent, tag_template_id, tag_template) assert expected_response == response assert len(channel.requests) == 1 expected_request = datacatalog_pb2.CreateTagTemplateRequest( parent=parent, tag_template_id=tag_template_id, tag_template=tag_template) actual_request = channel.requests[0][1] assert expected_request == actual_request
def test_list_entries(self): # Setup Expected Response next_page_token = "" entries_element = {} entries = [entries_element] expected_response = { "next_page_token": next_page_token, "entries": entries } expected_response = datacatalog_pb2.ListEntriesResponse( **expected_response) # Mock the API response channel = ChannelStub(responses=[expected_response]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = datacatalog_v1.DataCatalogClient() # Setup Request parent = client.entry_group_path("[PROJECT]", "[LOCATION]", "[ENTRY_GROUP]") paged_list_response = client.list_entries(parent) resources = list(paged_list_response) assert len(resources) == 1 assert expected_response.entries[0] == resources[0] assert len(channel.requests) == 1 expected_request = datacatalog_pb2.ListEntriesRequest(parent=parent) actual_request = channel.requests[0][1] assert expected_request == actual_request
def test_lookup_entry(self): # Setup Expected Response name = "name3373707" linked_resource = "linkedResource1544625012" user_specified_type = "userSpecifiedType-940364963" user_specified_system = "userSpecifiedSystem-1776119406" display_name = "displayName1615086568" description = "description-1724546052" expected_response = { "name": name, "linked_resource": linked_resource, "user_specified_type": user_specified_type, "user_specified_system": user_specified_system, "display_name": display_name, "description": description, } expected_response = datacatalog_pb2.Entry(**expected_response) # Mock the API response channel = ChannelStub(responses=[expected_response]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = datacatalog_v1.DataCatalogClient() response = client.lookup_entry() assert expected_response == response assert len(channel.requests) == 1 expected_request = datacatalog_pb2.LookupEntryRequest() actual_request = channel.requests[0][1] assert expected_request == actual_request
def create_custom_entry(override_values): """Creates a custom entry within an entry group.""" # [START data_catalog_create_custom_entry] # Import required modules. from google.cloud import datacatalog_v1 # Google Cloud Platform project. project_id = "my-project" # Entry group to be created. # For sample code demonstrating entry group creation, see quickstart: # https://cloud.google.com/data-catalog/docs/quickstart-tagging entry_group_name = "my_existing_entry_group" # Entry to be created. entry_id = "my_new_entry_id" # [END data_catalog_create_custom_entry] # To facilitate testing, we replace values with alternatives # provided by the testing harness. project_id = override_values.get("project_id", project_id) entry_group_name = override_values.get("entry_group_name", entry_group_name) entry_id = override_values.get("entry_id", entry_id) # [START data_catalog_create_custom_entry] datacatalog = datacatalog_v1.DataCatalogClient() # Create an Entry. entry = datacatalog_v1.types.Entry() entry.user_specified_system = "onprem_data_system" entry.user_specified_type = "onprem_data_asset" entry.display_name = "My awesome data asset" entry.description = "This data asset is managed by an external system." entry.linked_resource = "//my-onprem-server.com/dataAssets/my-awesome-data-asset" # Create the Schema, this is optional. entry.schema.columns.append( datacatalog_v1.types.ColumnSchema( column="first_column", type_="STRING", description="This columns consists of ....", mode=None, )) entry.schema.columns.append( datacatalog_v1.types.ColumnSchema( column="second_column", type_="DOUBLE", description="This columns consists of ....", mode=None, )) entry = datacatalog.create_entry(parent=entry_group_name, entry_id=entry_id, entry=entry) print("Created entry: {}".format(entry.name))
def lookup_pubsub_topic(project_id, topic_id): """Retrieves Data Catalog entry for the given Pub/Sub Topic.""" from google.cloud import datacatalog_v1 datacatalog = datacatalog_v1.DataCatalogClient() resource_name = '//pubsub.googleapis.com/projects/{}/topics/{}'\ .format(project_id, topic_id) return datacatalog.lookup_entry(linked_resource=resource_name)
def test_lookup_entry_exception(self): # Mock the API response channel = ChannelStub(responses=[CustomException()]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = datacatalog_v1.DataCatalogClient() with pytest.raises(CustomException): client.lookup_entry()
def lookup_bigquery_dataset(project_id, dataset_id): # [START datacatalog_lookup_dataset] """Retrieves Data Catalog entry for the given BigQuery Dataset.""" from google.cloud import datacatalog_v1 datacatalog = datacatalog_v1.DataCatalogClient() resource_name = '//bigquery.googleapis.com/projects/{}/datasets/{}'\ .format(project_id, dataset_id) return datacatalog.lookup_entry(request={'linked_resource': resource_name})
def lookup_bigquery_table(project_id, dataset_id, table_id): """Retrieves Data Catalog entry for the given BigQuery Table.""" from google.cloud import datacatalog_v1 datacatalog = datacatalog_v1.DataCatalogClient() resource_name = '//bigquery.googleapis.com/projects/{}/datasets/{}' \ '/tables/{}'\ .format(project_id, dataset_id, table_id) return datacatalog.lookup_entry(linked_resource=resource_name)
def lookup_bigquery_dataset_sql_resource(project_id, dataset_id): """Retrieves Data Catalog entry for the given BigQuery Dataset by sql_resource. """ from google.cloud import datacatalog_v1 datacatalog = datacatalog_v1.DataCatalogClient() sql_resource = 'bigquery.dataset.`{}`.`{}`'.format(project_id, dataset_id) return datacatalog.lookup_entry(sql_resource=sql_resource)
def lookup_pubsub_topic_sql_resource(project_id, topic_id): """Retrieves Data Catalog entry for the given Pub/Sub Topic by sql_resource. """ from google.cloud import datacatalog_v1 datacatalog = datacatalog_v1.DataCatalogClient() sql_resource = 'pubsub.topic.`{}`.`{}`'.format(project_id, topic_id) return datacatalog.lookup_entry(sql_resource=sql_resource)
def lookup_bigquery_table_sql_resource(project_id, dataset_id, table_id): """Retrieves Data Catalog entry for the given BigQuery Table by sql_resource. """ from google.cloud import datacatalog_v1 datacatalog = datacatalog_v1.DataCatalogClient() sql_resource = 'bigquery.table.`{}`.`{}`.`{}`'.format( project_id, dataset_id, table_id) return datacatalog.lookup_entry(request={'sql_resource': sql_resource})
def run(argv=None, save_main_session=True): logging.info("Starting {}".format(project_name)) logging.info('argv={}'.format(argv)) parser = argparse.ArgumentParser() parser.add_argument('--input', dest='input', default='gs://dataflow-sample', help='Input file to process.') parser.add_argument('--output', dest='output', required=True, help='Output file to write results to.') known_args, pipeline_args = parser.parse_known_args(argv) logging.info('known_args: {}'.format(known_args)) logging.info('pipeline_args: {}'.format(pipeline_args)) bucket = known_args.input.split('/')[2] # 'prod-bucket.renewalytics.io' blob = known_args.input[-len(known_args.input) + len('gs://') + len(bucket) + 1:] metadata = { **{ 'code_module': project_name, 'input': known_args.input, 'output': known_args.output, 'updated': datetime.now() }, **convert_storage_metadata_to_catalog( get_storage_metadata(storage.Client(), bucket, blob)) } # We use the save_main_session option because one or more DoFn's in this # workflow rely on global context (e.g., a module imported at module level). pipeline_options = PipelineOptions(pipeline_args) pipeline_options.view_as( SetupOptions).save_main_session = save_main_session p = beam.Pipeline(options=PipelineOptions(pipeline_args)) (p | 'Read from a File' >> beam.io.ReadFromText(known_args.input) | 'Load JSON' >> beam.Map(json.loads) | 'Custom Parse' >> beam.ParDo(DataIngestion()) | 'Write to BigQuery' >> beam.io.Write( beam.io.WriteToBigQuery( known_args.output, schema=g_schema, create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED, # Deletes all data in the BigQuery table before writing. write_disposition=beam.io.BigQueryDisposition.WRITE_TRUNCATE))) p.run().wait_until_finish() write_metadata(dc=datacatalog_v1.DataCatalogClient(), metadata=metadata, table_id=known_args.output.split('.')[-1])
def sampleentry(): oauth2_session = OAuth2Session(client_id, token=session[OAUTH2_TOKEN]) google_auth_credentials = credentials_from_session(oauth2_session) datacatalog = datacatalog_v1.DataCatalogClient( credentials=google_auth_credentials) resource_name = '//bigquery.googleapis.com/projects/{}/datasets/{}' \ '/tables/{}' \ .format('bigquery-public-data', 'covid19_usafacts', 'summary') data_catalog_entry = datacatalog.lookup_entry( request={'linked_resource': resource_name}) return str(data_catalog_entry)
def entry(project_id, dataset, table): oauth2_session = OAuth2Session(client_id, token=session[OAUTH2_TOKEN]) google_auth_credentials = credentials_from_session(oauth2_session) datacatalog = datacatalog_v1.DataCatalogClient( credentials=google_auth_credentials) resource_name = '//bigquery.googleapis.com/projects/{}/datasets/{}' \ '/tables/{}' \ .format(project_id, dataset, table) data_catalog_entry = datacatalog.lookup_entry( request={'linked_resource': resource_name}) return str(data_catalog_entry)
def test_update_tag_template_exception(self): # Mock the API response channel = ChannelStub(responses=[CustomException()]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = datacatalog_v1.DataCatalogClient() # Setup request tag_template = {} with pytest.raises(CustomException): client.update_tag_template(tag_template)
def test_get_iam_policy_exception(self): # Mock the API response channel = ChannelStub(responses=[CustomException()]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = datacatalog_v1.DataCatalogClient() # Setup request resource = "resource-341064690" with pytest.raises(CustomException): client.get_iam_policy(resource)
def test_list_entries_exception(self): channel = ChannelStub(responses=[CustomException()]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = datacatalog_v1.DataCatalogClient() # Setup request parent = client.entry_group_path("[PROJECT]", "[LOCATION]", "[ENTRY_GROUP]") paged_list_response = client.list_entries(parent) with pytest.raises(CustomException): list(paged_list_response)
def test_delete_entry_exception(self): # Mock the API response channel = ChannelStub(responses=[CustomException()]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = datacatalog_v1.DataCatalogClient() # Setup request name = client.entry_path("[PROJECT]", "[LOCATION]", "[ENTRY_GROUP]", "[ENTRY]") with pytest.raises(CustomException): client.delete_entry(name)
def test_get_tag_template_exception(self): # Mock the API response channel = ChannelStub(responses=[CustomException()]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = datacatalog_v1.DataCatalogClient() # Setup request name = client.tag_template_path("[PROJECT]", "[LOCATION]", "[TAG_TEMPLATE]") with pytest.raises(CustomException): client.get_tag_template(name)