def test_update_tag_template(self):
        # Setup Expected Response
        name = "name3373707"
        display_name = "displayName1615086568"
        expected_response = {"name": name, "display_name": display_name}
        expected_response = tags_pb2.TagTemplate(**expected_response)

        # Mock the API response
        channel = ChannelStub(responses=[expected_response])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = datacatalog_v1.DataCatalogClient()

        # Setup Request
        tag_template = {}

        response = client.update_tag_template(tag_template)
        assert expected_response == response

        assert len(channel.requests) == 1
        expected_request = datacatalog_pb2.UpdateTagTemplateRequest(
            tag_template=tag_template)
        actual_request = channel.requests[0][1]
        assert expected_request == actual_request
    def test_create_entry_group(self):
        # Setup Expected Response
        name = "name3373707"
        display_name = "displayName1615086568"
        description = "description-1724546052"
        expected_response = {
            "name": name,
            "display_name": display_name,
            "description": description,
        }
        expected_response = datacatalog_pb2.EntryGroup(**expected_response)

        # Mock the API response
        channel = ChannelStub(responses=[expected_response])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = datacatalog_v1.DataCatalogClient()

        # Setup Request
        parent = client.location_path("[PROJECT]", "[LOCATION]")
        entry_group_id = "entryGroupId-43122680"

        response = client.create_entry_group(parent, entry_group_id)
        assert expected_response == response

        assert len(channel.requests) == 1
        expected_request = datacatalog_pb2.CreateEntryGroupRequest(
            parent=parent, entry_group_id=entry_group_id)
        actual_request = channel.requests[0][1]
        assert expected_request == actual_request
    def test_update_entry_group(self):
        # Setup Expected Response
        name = "name3373707"
        display_name = "displayName1615086568"
        description = "description-1724546052"
        expected_response = {
            "name": name,
            "display_name": display_name,
            "description": description,
        }
        expected_response = datacatalog_pb2.EntryGroup(**expected_response)

        # Mock the API response
        channel = ChannelStub(responses=[expected_response])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = datacatalog_v1.DataCatalogClient()

        # Setup Request
        entry_group = {}

        response = client.update_entry_group(entry_group)
        assert expected_response == response

        assert len(channel.requests) == 1
        expected_request = datacatalog_pb2.UpdateEntryGroupRequest(
            entry_group=entry_group)
        actual_request = channel.requests[0][1]
        assert expected_request == actual_request
    def test_get_entry_group(self):
        # Setup Expected Response
        name_2 = "name2-1052831874"
        display_name = "displayName1615086568"
        description = "description-1724546052"
        expected_response = {
            "name": name_2,
            "display_name": display_name,
            "description": description,
        }
        expected_response = datacatalog_pb2.EntryGroup(**expected_response)

        # Mock the API response
        channel = ChannelStub(responses=[expected_response])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = datacatalog_v1.DataCatalogClient()

        # Setup Request
        name = client.entry_group_path("[PROJECT]", "[LOCATION]",
                                       "[ENTRY_GROUP]")

        response = client.get_entry_group(name)
        assert expected_response == response

        assert len(channel.requests) == 1
        expected_request = datacatalog_pb2.GetEntryGroupRequest(name=name)
        actual_request = channel.requests[0][1]
        assert expected_request == actual_request
    def test_search_catalog(self):
        # Setup Expected Response
        next_page_token = ""
        results_element = {}
        results = [results_element]
        expected_response = {
            "next_page_token": next_page_token,
            "results": results
        }
        expected_response = datacatalog_pb2.SearchCatalogResponse(
            **expected_response)

        # Mock the API response
        channel = ChannelStub(responses=[expected_response])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = datacatalog_v1.DataCatalogClient()

        # Setup Request
        scope = {}
        query = "query107944136"

        paged_list_response = client.search_catalog(scope, query)
        resources = list(paged_list_response)
        assert len(resources) == 1

        assert expected_response.results[0] == resources[0]

        assert len(channel.requests) == 1
        expected_request = datacatalog_pb2.SearchCatalogRequest(scope=scope,
                                                                query=query)
        actual_request = channel.requests[0][1]
        assert expected_request == actual_request
def search_assets(override_values):
    """Searches Data Catalog entries for a given project."""
    # [START data_catalog_search_assets]
    from google.cloud import datacatalog_v1

    datacatalog = datacatalog_v1.DataCatalogClient()

    # TODO: Set these values before running the sample.
    project_id = "project_id"

    # Set custom query.
    search_string = "type=dataset"
    # [END data_catalog_search_assets]

    # To facilitate testing, we replace values with alternatives
    # provided by the testing harness.
    project_id = override_values.get("project_id", project_id)
    tag_template_id = override_values.get("tag_template_id", search_string)
    search_string = f"name:{tag_template_id}"

    # [START data_catalog_search_assets]
    scope = datacatalog_v1.types.SearchCatalogRequest.Scope()
    scope.include_project_ids.append(project_id)

    # Alternatively, search using organization scopes.
    # scope.include_org_ids.append("my_organization_id")

    search_results = datacatalog.search_catalog(scope=scope, query=search_string)

    print("Results in project:")
    for result in search_results:
        print(result)
    def test_get_tag_template(self):
        # Setup Expected Response
        name_2 = "name2-1052831874"
        display_name = "displayName1615086568"
        expected_response = {"name": name_2, "display_name": display_name}
        expected_response = tags_pb2.TagTemplate(**expected_response)

        # Mock the API response
        channel = ChannelStub(responses=[expected_response])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = datacatalog_v1.DataCatalogClient()

        # Setup Request
        name = client.tag_template_path("[PROJECT]", "[LOCATION]",
                                        "[TAG_TEMPLATE]")

        response = client.get_tag_template(name)
        assert expected_response == response

        assert len(channel.requests) == 1
        expected_request = datacatalog_pb2.GetTagTemplateRequest(name=name)
        actual_request = channel.requests[0][1]
        assert expected_request == actual_request
    def test_rename_tag_template_field(self):
        # Setup Expected Response
        name_2 = "name2-1052831874"
        display_name = "displayName1615086568"
        is_required = True
        order = 106006350
        expected_response = {
            "name": name_2,
            "display_name": display_name,
            "is_required": is_required,
            "order": order,
        }
        expected_response = tags_pb2.TagTemplateField(**expected_response)

        # Mock the API response
        channel = ChannelStub(responses=[expected_response])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = datacatalog_v1.DataCatalogClient()

        # Setup Request
        name = client.tag_template_field_path("[PROJECT]", "[LOCATION]",
                                              "[TAG_TEMPLATE]", "[FIELD]")
        new_tag_template_field_id = "newTagTemplateFieldId-1668354591"

        response = client.rename_tag_template_field(name,
                                                    new_tag_template_field_id)
        assert expected_response == response

        assert len(channel.requests) == 1
        expected_request = datacatalog_pb2.RenameTagTemplateFieldRequest(
            name=name, new_tag_template_field_id=new_tag_template_field_id)
        actual_request = channel.requests[0][1]
        assert expected_request == actual_request
    def test_create_tag(self):
        # Setup Expected Response
        name = "name3373707"
        template = "template-1321546630"
        template_display_name = "templateDisplayName-532252787"
        column = "column-1354837162"
        expected_response = {
            "name": name,
            "template": template,
            "template_display_name": template_display_name,
            "column": column,
        }
        expected_response = tags_pb2.Tag(**expected_response)

        # Mock the API response
        channel = ChannelStub(responses=[expected_response])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = datacatalog_v1.DataCatalogClient()

        # Setup Request
        parent = client.tag_path("[PROJECT]", "[LOCATION]", "[ENTRY_GROUP]",
                                 "[ENTRY]", "[TAG]")
        tag = {}

        response = client.create_tag(parent, tag)
        assert expected_response == response

        assert len(channel.requests) == 1
        expected_request = datacatalog_pb2.CreateTagRequest(parent=parent,
                                                            tag=tag)
        actual_request = channel.requests[0][1]
        assert expected_request == actual_request
    def test_get_iam_policy(self):
        # Setup Expected Response
        version = 351608024
        etag = b"21"
        expected_response = {"version": version, "etag": etag}
        expected_response = policy_pb2.Policy(**expected_response)

        # Mock the API response
        channel = ChannelStub(responses=[expected_response])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = datacatalog_v1.DataCatalogClient()

        # Setup Request
        resource = "resource-341064690"

        response = client.get_iam_policy(resource)
        assert expected_response == response

        assert len(channel.requests) == 1
        expected_request = iam_policy_pb2.GetIamPolicyRequest(
            resource=resource)
        actual_request = channel.requests[0][1]
        assert expected_request == actual_request
    def test_test_iam_permissions(self):
        # Setup Expected Response
        expected_response = {}
        expected_response = iam_policy_pb2.TestIamPermissionsResponse(
            **expected_response)

        # Mock the API response
        channel = ChannelStub(responses=[expected_response])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = datacatalog_v1.DataCatalogClient()

        # Setup Request
        resource = "resource-341064690"
        permissions = []

        response = client.test_iam_permissions(resource, permissions)
        assert expected_response == response

        assert len(channel.requests) == 1
        expected_request = iam_policy_pb2.TestIamPermissionsRequest(
            resource=resource, permissions=permissions)
        actual_request = channel.requests[0][1]
        assert expected_request == actual_request
    def test_create_tag_template(self):
        # Setup Expected Response
        name = "name3373707"
        display_name = "displayName1615086568"
        expected_response = {"name": name, "display_name": display_name}
        expected_response = tags_pb2.TagTemplate(**expected_response)

        # Mock the API response
        channel = ChannelStub(responses=[expected_response])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = datacatalog_v1.DataCatalogClient()

        # Setup Request
        parent = client.location_path("[PROJECT]", "[LOCATION]")
        tag_template_id = "tagTemplateId-2020335141"
        tag_template = {}

        response = client.create_tag_template(parent, tag_template_id,
                                              tag_template)
        assert expected_response == response

        assert len(channel.requests) == 1
        expected_request = datacatalog_pb2.CreateTagTemplateRequest(
            parent=parent,
            tag_template_id=tag_template_id,
            tag_template=tag_template)
        actual_request = channel.requests[0][1]
        assert expected_request == actual_request
    def test_list_entries(self):
        # Setup Expected Response
        next_page_token = ""
        entries_element = {}
        entries = [entries_element]
        expected_response = {
            "next_page_token": next_page_token,
            "entries": entries
        }
        expected_response = datacatalog_pb2.ListEntriesResponse(
            **expected_response)

        # Mock the API response
        channel = ChannelStub(responses=[expected_response])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = datacatalog_v1.DataCatalogClient()

        # Setup Request
        parent = client.entry_group_path("[PROJECT]", "[LOCATION]",
                                         "[ENTRY_GROUP]")

        paged_list_response = client.list_entries(parent)
        resources = list(paged_list_response)
        assert len(resources) == 1

        assert expected_response.entries[0] == resources[0]

        assert len(channel.requests) == 1
        expected_request = datacatalog_pb2.ListEntriesRequest(parent=parent)
        actual_request = channel.requests[0][1]
        assert expected_request == actual_request
    def test_lookup_entry(self):
        # Setup Expected Response
        name = "name3373707"
        linked_resource = "linkedResource1544625012"
        user_specified_type = "userSpecifiedType-940364963"
        user_specified_system = "userSpecifiedSystem-1776119406"
        display_name = "displayName1615086568"
        description = "description-1724546052"
        expected_response = {
            "name": name,
            "linked_resource": linked_resource,
            "user_specified_type": user_specified_type,
            "user_specified_system": user_specified_system,
            "display_name": display_name,
            "description": description,
        }
        expected_response = datacatalog_pb2.Entry(**expected_response)

        # Mock the API response
        channel = ChannelStub(responses=[expected_response])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = datacatalog_v1.DataCatalogClient()

        response = client.lookup_entry()
        assert expected_response == response

        assert len(channel.requests) == 1
        expected_request = datacatalog_pb2.LookupEntryRequest()
        actual_request = channel.requests[0][1]
        assert expected_request == actual_request
def create_custom_entry(override_values):
    """Creates a custom entry within an entry group."""
    # [START data_catalog_create_custom_entry]
    # Import required modules.
    from google.cloud import datacatalog_v1

    # Google Cloud Platform project.
    project_id = "my-project"
    # Entry group to be created.
    # For sample code demonstrating entry group creation, see quickstart:
    # https://cloud.google.com/data-catalog/docs/quickstart-tagging
    entry_group_name = "my_existing_entry_group"
    # Entry to be created.
    entry_id = "my_new_entry_id"

    # [END data_catalog_create_custom_entry]

    # To facilitate testing, we replace values with alternatives
    # provided by the testing harness.
    project_id = override_values.get("project_id", project_id)
    entry_group_name = override_values.get("entry_group_name",
                                           entry_group_name)
    entry_id = override_values.get("entry_id", entry_id)

    # [START data_catalog_create_custom_entry]
    datacatalog = datacatalog_v1.DataCatalogClient()

    # Create an Entry.
    entry = datacatalog_v1.types.Entry()
    entry.user_specified_system = "onprem_data_system"
    entry.user_specified_type = "onprem_data_asset"
    entry.display_name = "My awesome data asset"
    entry.description = "This data asset is managed by an external system."
    entry.linked_resource = "//my-onprem-server.com/dataAssets/my-awesome-data-asset"

    # Create the Schema, this is optional.
    entry.schema.columns.append(
        datacatalog_v1.types.ColumnSchema(
            column="first_column",
            type_="STRING",
            description="This columns consists of ....",
            mode=None,
        ))

    entry.schema.columns.append(
        datacatalog_v1.types.ColumnSchema(
            column="second_column",
            type_="DOUBLE",
            description="This columns consists of ....",
            mode=None,
        ))

    entry = datacatalog.create_entry(parent=entry_group_name,
                                     entry_id=entry_id,
                                     entry=entry)
    print("Created entry: {}".format(entry.name))
Ejemplo n.º 16
0
def lookup_pubsub_topic(project_id, topic_id):
    """Retrieves Data Catalog entry for the given Pub/Sub Topic."""
    from google.cloud import datacatalog_v1

    datacatalog = datacatalog_v1.DataCatalogClient()

    resource_name = '//pubsub.googleapis.com/projects/{}/topics/{}'\
        .format(project_id, topic_id)

    return datacatalog.lookup_entry(linked_resource=resource_name)
    def test_lookup_entry_exception(self):
        # Mock the API response
        channel = ChannelStub(responses=[CustomException()])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = datacatalog_v1.DataCatalogClient()

        with pytest.raises(CustomException):
            client.lookup_entry()
Ejemplo n.º 18
0
def lookup_bigquery_dataset(project_id, dataset_id):
    # [START datacatalog_lookup_dataset]
    """Retrieves Data Catalog entry for the given BigQuery Dataset."""
    from google.cloud import datacatalog_v1

    datacatalog = datacatalog_v1.DataCatalogClient()

    resource_name = '//bigquery.googleapis.com/projects/{}/datasets/{}'\
        .format(project_id, dataset_id)

    return datacatalog.lookup_entry(request={'linked_resource': resource_name})
Ejemplo n.º 19
0
def lookup_bigquery_table(project_id, dataset_id, table_id):
    """Retrieves Data Catalog entry for the given BigQuery Table."""
    from google.cloud import datacatalog_v1

    datacatalog = datacatalog_v1.DataCatalogClient()

    resource_name = '//bigquery.googleapis.com/projects/{}/datasets/{}' \
                    '/tables/{}'\
        .format(project_id, dataset_id, table_id)

    return datacatalog.lookup_entry(linked_resource=resource_name)
Ejemplo n.º 20
0
def lookup_bigquery_dataset_sql_resource(project_id, dataset_id):
    """Retrieves Data Catalog entry for the given BigQuery Dataset by
    sql_resource.
    """
    from google.cloud import datacatalog_v1

    datacatalog = datacatalog_v1.DataCatalogClient()

    sql_resource = 'bigquery.dataset.`{}`.`{}`'.format(project_id, dataset_id)

    return datacatalog.lookup_entry(sql_resource=sql_resource)
Ejemplo n.º 21
0
def lookup_pubsub_topic_sql_resource(project_id, topic_id):
    """Retrieves Data Catalog entry for the given Pub/Sub Topic by
    sql_resource.
    """
    from google.cloud import datacatalog_v1

    datacatalog = datacatalog_v1.DataCatalogClient()

    sql_resource = 'pubsub.topic.`{}`.`{}`'.format(project_id, topic_id)

    return datacatalog.lookup_entry(sql_resource=sql_resource)
Ejemplo n.º 22
0
def lookup_bigquery_table_sql_resource(project_id, dataset_id, table_id):
    """Retrieves Data Catalog entry for the given BigQuery Table by
    sql_resource.
    """
    from google.cloud import datacatalog_v1

    datacatalog = datacatalog_v1.DataCatalogClient()

    sql_resource = 'bigquery.table.`{}`.`{}`.`{}`'.format(
        project_id, dataset_id, table_id)

    return datacatalog.lookup_entry(request={'sql_resource': sql_resource})
Ejemplo n.º 23
0
def run(argv=None, save_main_session=True):
    logging.info("Starting {}".format(project_name))
    logging.info('argv={}'.format(argv))
    parser = argparse.ArgumentParser()
    parser.add_argument('--input',
                        dest='input',
                        default='gs://dataflow-sample',
                        help='Input file to process.')
    parser.add_argument('--output',
                        dest='output',
                        required=True,
                        help='Output file to write results to.')
    known_args, pipeline_args = parser.parse_known_args(argv)
    logging.info('known_args: {}'.format(known_args))
    logging.info('pipeline_args: {}'.format(pipeline_args))

    bucket = known_args.input.split('/')[2]  # 'prod-bucket.renewalytics.io'
    blob = known_args.input[-len(known_args.input) + len('gs://') +
                            len(bucket) + 1:]
    metadata = {
        **{
            'code_module': project_name,
            'input': known_args.input,
            'output': known_args.output,
            'updated': datetime.now()
        },
        **convert_storage_metadata_to_catalog(
            get_storage_metadata(storage.Client(), bucket, blob))
    }

    # We use the save_main_session option because one or more DoFn's in this
    # workflow rely on global context (e.g., a module imported at module level).
    pipeline_options = PipelineOptions(pipeline_args)
    pipeline_options.view_as(
        SetupOptions).save_main_session = save_main_session

    p = beam.Pipeline(options=PipelineOptions(pipeline_args))
    (p
     | 'Read from a File' >> beam.io.ReadFromText(known_args.input)
     | 'Load JSON' >> beam.Map(json.loads)
     | 'Custom Parse' >> beam.ParDo(DataIngestion())
     | 'Write to BigQuery' >> beam.io.Write(
         beam.io.WriteToBigQuery(
             known_args.output,
             schema=g_schema,
             create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED,
             # Deletes all data in the BigQuery table before writing.
             write_disposition=beam.io.BigQueryDisposition.WRITE_TRUNCATE)))

    p.run().wait_until_finish()
    write_metadata(dc=datacatalog_v1.DataCatalogClient(),
                   metadata=metadata,
                   table_id=known_args.output.split('.')[-1])
Ejemplo n.º 24
0
def sampleentry():
    oauth2_session = OAuth2Session(client_id, token=session[OAUTH2_TOKEN])
    google_auth_credentials = credentials_from_session(oauth2_session)
    datacatalog = datacatalog_v1.DataCatalogClient(
        credentials=google_auth_credentials)

    resource_name = '//bigquery.googleapis.com/projects/{}/datasets/{}' \
                    '/tables/{}' \
        .format('bigquery-public-data', 'covid19_usafacts', 'summary')

    data_catalog_entry = datacatalog.lookup_entry(
        request={'linked_resource': resource_name})
    return str(data_catalog_entry)
Ejemplo n.º 25
0
def entry(project_id, dataset, table):
    oauth2_session = OAuth2Session(client_id, token=session[OAUTH2_TOKEN])
    google_auth_credentials = credentials_from_session(oauth2_session)
    datacatalog = datacatalog_v1.DataCatalogClient(
        credentials=google_auth_credentials)

    resource_name = '//bigquery.googleapis.com/projects/{}/datasets/{}' \
                    '/tables/{}' \
        .format(project_id, dataset, table)

    data_catalog_entry = datacatalog.lookup_entry(
        request={'linked_resource': resource_name})
    return str(data_catalog_entry)
    def test_update_tag_template_exception(self):
        # Mock the API response
        channel = ChannelStub(responses=[CustomException()])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = datacatalog_v1.DataCatalogClient()

        # Setup request
        tag_template = {}

        with pytest.raises(CustomException):
            client.update_tag_template(tag_template)
    def test_get_iam_policy_exception(self):
        # Mock the API response
        channel = ChannelStub(responses=[CustomException()])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = datacatalog_v1.DataCatalogClient()

        # Setup request
        resource = "resource-341064690"

        with pytest.raises(CustomException):
            client.get_iam_policy(resource)
    def test_list_entries_exception(self):
        channel = ChannelStub(responses=[CustomException()])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = datacatalog_v1.DataCatalogClient()

        # Setup request
        parent = client.entry_group_path("[PROJECT]", "[LOCATION]",
                                         "[ENTRY_GROUP]")

        paged_list_response = client.list_entries(parent)
        with pytest.raises(CustomException):
            list(paged_list_response)
    def test_delete_entry_exception(self):
        # Mock the API response
        channel = ChannelStub(responses=[CustomException()])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = datacatalog_v1.DataCatalogClient()

        # Setup request
        name = client.entry_path("[PROJECT]", "[LOCATION]", "[ENTRY_GROUP]",
                                 "[ENTRY]")

        with pytest.raises(CustomException):
            client.delete_entry(name)
    def test_get_tag_template_exception(self):
        # Mock the API response
        channel = ChannelStub(responses=[CustomException()])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = datacatalog_v1.DataCatalogClient()

        # Setup request
        name = client.tag_template_path("[PROJECT]", "[LOCATION]",
                                        "[TAG_TEMPLATE]")

        with pytest.raises(CustomException):
            client.get_tag_template(name)