Exemple #1
0
def create_instruction(project_id, data_type, instruction_gcs_uri):
    """ Creates a data labeling PDF instruction for the given Google Cloud
    project. The PDF file should be uploaded to the project in
    Google Cloud Storage.
    """
    from google.cloud import datalabeling_v1beta1 as datalabeling
    client = datalabeling.DataLabelingServiceClient()
    # [END datalabeling_create_instruction_beta]
    # If provided, use a provided test endpoint - this will prevent tests on
    # this snippet from triggering any action by a real human
    if 'DATALABELING_ENDPOINT' in os.environ:
        opts = ClientOptions(api_endpoint=os.getenv('DATALABELING_ENDPOINT'))
        client = datalabeling.DataLabelingServiceClient(client_options=opts)
    # [START datalabeling_create_instruction_beta]

    project_path = client.project_path(project_id)

    pdf_instruction = datalabeling.types.PdfInstruction(
        gcs_file_uri=instruction_gcs_uri)

    instruction = datalabeling.types.Instruction(
        display_name='YOUR_INSTRUCTION_DISPLAY_NAME',
        description='YOUR_DESCRIPTION',
        data_type=data_type,
        pdf_instruction=pdf_instruction)

    operation = client.create_instruction(project_path, instruction)

    result = operation.result()

    # The format of the resource name:
    # project_id/{project_id}/instruction/{instruction_id}
    print('The instruction resource name: {}'.format(result.name))
    print('Display name: {}'.format(result.display_name))
    print('Description: {}'.format(result.description))
    print('Create time:')
    print('\tseconds: {}'.format(result.create_time.seconds))
    print('\tnanos: {}'.format(result.create_time.nanos))
    print('Data type: {}'.format(
        datalabeling.enums.DataType(result.data_type).name))
    print('Pdf instruction:')
    print('\tGcs file uri: {}\n'.format(result.pdf_instruction.gcs_file_uri))

    return result
def test_create_instruction(capsys):
    result = create_instruction.create_instruction(PROJECT_ID, 'IMAGE',
                                                   INSTRUCTION_GCS_URI)
    out, _ = capsys.readouterr()
    assert 'The instruction resource name: ' in out

    # Delete the created instruction.
    instruction_name = result.name
    client = datalabeling.DataLabelingServiceClient()
    client.delete_instruction(instruction_name)
Exemple #3
0
def label_image(dataset_resource_name, instruction_resource_name,
                annotation_spec_set_resource_name):
    """Labels an image dataset."""
    from google.cloud import datalabeling_v1beta1 as datalabeling

    client = datalabeling.DataLabelingServiceClient()
    # [END datalabeling_label_image_beta]
    # If provided, use a provided test endpoint - this will prevent tests on
    # this snippet from triggering any action by a real human
    if "DATALABELING_ENDPOINT" in os.environ:
        opts = ClientOptions(api_endpoint=os.getenv("DATALABELING_ENDPOINT"))
        client = datalabeling.DataLabelingServiceClient(client_options=opts)
    # [START datalabeling_label_image_beta]

    basic_config = datalabeling.HumanAnnotationConfig(
        instruction=instruction_resource_name,
        annotated_dataset_display_name="YOUR_ANNOTATED_DATASET_DISPLAY_NAME",
        label_group="YOUR_LABEL_GROUP",
        replica_count=1,
    )

    feature = datalabeling.LabelImageRequest.Feature.CLASSIFICATION

    # annotation_spec_set_resource_name needs to be created beforehand.
    # See the examples in the following:
    # https://cloud.google.com/ai-platform/data-labeling/docs/label-sets
    config = datalabeling.ImageClassificationConfig(
        annotation_spec_set=annotation_spec_set_resource_name,
        allow_multi_label=False,
        answer_aggregation_type=datalabeling.StringAggregationType.
        MAJORITY_VOTE,
    )

    response = client.label_image(
        request={
            "parent": dataset_resource_name,
            "basic_config": basic_config,
            "feature": feature,
            "image_classification_config": config,
        })

    print("Label_image operation name: {}".format(response.operation.name))
    return response
Exemple #4
0
def annotation_spec_set():
    # create a temporary annotation_spec_set
    response = create_annotation_spec_set.create_annotation_spec_set(
        PROJECT_ID)

    yield response

    # tear down
    client = datalabeling.DataLabelingServiceClient()
    client.delete_annotation_spec_set(response.name)
    def test_delete_annotated_dataset_exception(self):
        # Mock the API response
        channel = ChannelStub(responses=[CustomException()])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = datalabeling_v1beta1.DataLabelingServiceClient()

        with pytest.raises(CustomException):
            client.delete_annotated_dataset()
def test_create_annotation_spec_set(capsys):
    response = create_annotation_spec_set.create_annotation_spec_set(
        PROJECT_ID)
    out, _ = capsys.readouterr()
    if 'The annotation_spec_set resource name:' not in out:
        raise AssertionError

    # Delete the created annotation spec set.
    annotation_spec_set_name = response.name
    client = datalabeling.DataLabelingServiceClient()
    client.delete_annotation_spec_set(annotation_spec_set_name)
Exemple #7
0
def instruction():
    # create a temporary instruction
    instruction = create_instruction.create_instruction(
        PROJECT_ID, 'TEXT',
        'gs://cloud-samples-data/datalabeling/instruction/test.pdf')

    yield instruction

    # tear down
    client = datalabeling.DataLabelingServiceClient()
    client.delete_instruction(instruction.name)
Exemple #8
0
def get_dataset(dataset_resource_name):
    """Gets a dataset for the given Google Cloud project."""
    from google.cloud import datalabeling_v1beta1 as datalabeling
    client = datalabeling.DataLabelingServiceClient()
    # [END datalabeling_get_dataset_beta]
    # If provided, use a provided test endpoint - this will prevent tests on
    # this snippet from triggering any action by a real human
    if 'DATALABELING_ENDPOINT' in os.environ:
        opts = ClientOptions(api_endpoint=os.getenv('DATALABELING_ENDPOINT'))
        client = datalabeling.DataLabelingServiceClient(client_options=opts)
    # [START datalabeling_get_dataset_beta]

    response = client.get_dataset(dataset_resource_name)

    print('The dataset resource name: {}\n'.format(response.name))
    print('Display name: {}'.format(response.display_name))
    print('Description: {}'.format(response.description))
    print('Create time:')
    print('\tseconds: {}'.format(response.create_time.seconds))
    print('\tnanos: {}'.format(response.create_time.nanos))
Exemple #9
0
def create_annotation_spec_set(project_id):
    """Creates a data labeling annotation spec set for the given
    Google Cloud project.
    """
    from google.cloud import datalabeling_v1beta1 as datalabeling
    client = datalabeling.DataLabelingServiceClient()
    # [END datalabeling_create_annotation_spec_set_beta]
    # If provided, use a provided test endpoint - this will prevent tests on
    # this snippet from triggering any action by a real human
    if 'DATALABELING_ENDPOINT' in os.environ:
        opts = ClientOptions(api_endpoint=os.getenv('DATALABELING_ENDPOINT'))
        client = datalabeling.DataLabelingServiceClient(client_options=opts)
    # [START datalabeling_create_annotation_spec_set_beta]

    project_path = client.project_path(project_id)

    annotation_spec_1 = datalabeling.types.AnnotationSpec(
        display_name='label_1', description='label_description_1')

    annotation_spec_2 = datalabeling.types.AnnotationSpec(
        display_name='label_2', description='label_description_2')

    annotation_spec_set = datalabeling.types.AnnotationSpecSet(
        display_name='YOUR_ANNOTATION_SPEC_SET_DISPLAY_NAME',
        description='YOUR_DESCRIPTION',
        annotation_specs=[annotation_spec_1, annotation_spec_2])

    response = client.create_annotation_spec_set(project_path,
                                                 annotation_spec_set)

    # The format of the resource name:
    # project_id/{project_id}/annotationSpecSets/{annotationSpecSets_id}
    print('The annotation_spec_set resource name: {}'.format(response.name))
    print('Display name: {}'.format(response.display_name))
    print('Description: {}'.format(response.description))
    print('Annotation specs:')
    for annotation_spec in response.annotation_specs:
        print('\tDisplay name: {}'.format(annotation_spec.display_name))
        print('\tDescription: {}\n'.format(annotation_spec.description))

    return response
    def test_get_data_item_exception(self):
        # Mock the API response
        channel = ChannelStub(responses=[CustomException()])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = datalabeling_v1beta1.DataLabelingServiceClient()

        # Setup request
        name = client.data_item_path("[PROJECT]", "[DATASET]", "[DATA_ITEM]")

        with pytest.raises(CustomException):
            client.get_data_item(name)
def get_dataset(dataset_resource_name):
    """Gets a dataset for the given Google Cloud project."""
    from google.cloud import datalabeling_v1beta1 as datalabeling
    client = datalabeling.DataLabelingServiceClient()

    response = client.get_dataset(dataset_resource_name)

    print('The dataset resource name: {}\n'.format(response.name))
    print('Display name: {}'.format(response.display_name))
    print('Description: {}'.format(response.description))
    print('Create time:')
    print('\tseconds: {}'.format(response.create_time.seconds))
    print('\tnanos: {}'.format(response.create_time.nanos))
Exemple #12
0
def label_video(
    dataset_resource_name, instruction_resource_name, annotation_spec_set_resource_name
):
    """Labels a video dataset."""
    from google.cloud import datalabeling_v1beta1 as datalabeling

    client = datalabeling.DataLabelingServiceClient()
    # [END datalabeling_label_video_beta]
    # If provided, use a provided test endpoint - this will prevent tests on
    # this snippet from triggering any action by a real human
    if "DATALABELING_ENDPOINT" in os.environ:
        opts = ClientOptions(api_endpoint=os.getenv("DATALABELING_ENDPOINT"))
        client = datalabeling.DataLabelingServiceClient(client_options=opts)
    # [START datalabeling_label_video_beta]

    basic_config = datalabeling.HumanAnnotationConfig(
        instruction=instruction_resource_name,
        annotated_dataset_display_name="YOUR_ANNOTATED_DATASET_DISPLAY_NAME",
        label_group="YOUR_LABEL_GROUP",
        replica_count=1,
    )

    feature = datalabeling.LabelVideoRequest.Feature.OBJECT_TRACKING

    config = datalabeling.ObjectTrackingConfig(
        annotation_spec_set=annotation_spec_set_resource_name
    )

    response = client.label_video(
        request={
            "parent": dataset_resource_name,
            "basic_config": basic_config,
            "feature": feature,
            "object_tracking_config": config,
        }
    )

    print("Label_video operation name: {}".format(response.operation.name))
    return response
Exemple #13
0
def test_label_image(capsys, annotation_spec_set, instruction, dataset):

    # Start labeling.
    response = label_image.label_image(dataset.name, instruction.name,
                                       annotation_spec_set.name)
    out, _ = capsys.readouterr()
    assert 'Label_image operation name: ' in out
    operation_name = response.operation.name

    # Cancels the labeling operation.
    response.cancel()
    assert response.cancelled() is True

    client = datalabeling.DataLabelingServiceClient()

    # If provided, use a provided test endpoint - this will prevent tests on
    # this snippet from triggering any action by a real human
    if 'DATALABELING_ENDPOINT' in os.environ:
        opts = ClientOptions(api_endpoint=os.getenv('DATALABELING_ENDPOINT'))
        client = datalabeling.DataLabelingServiceClient(client_options=opts)

    client.transport._operations_client.cancel_operation(operation_name)
    def test_delete_instruction_exception(self):
        # Mock the API response
        channel = ChannelStub(responses=[CustomException()])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = datalabeling_v1beta1.DataLabelingServiceClient()

        # Setup request
        name = client.instruction_path("[PROJECT]", "[INSTRUCTION]")

        with pytest.raises(CustomException):
            client.delete_instruction(name)
    def test_list_annotated_datasets_exception(self):
        channel = ChannelStub(responses=[CustomException()])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = datalabeling_v1beta1.DataLabelingServiceClient()

        # Setup request
        parent = client.dataset_path("[PROJECT]", "[DATASET]")

        paged_list_response = client.list_annotated_datasets(parent)
        with pytest.raises(CustomException):
            list(paged_list_response)
    def test_get_annotation_spec_set_exception(self):
        # Mock the API response
        channel = ChannelStub(responses=[CustomException()])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = datalabeling_v1beta1.DataLabelingServiceClient()

        # Setup request
        name = client.annotation_spec_set_path("[PROJECT]",
                                               "[ANNOTATION_SPEC_SET]")

        with pytest.raises(CustomException):
            client.get_annotation_spec_set(name)
    def test_create_annotation_spec_set_exception(self):
        # Mock the API response
        channel = ChannelStub(responses=[CustomException()])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = datalabeling_v1beta1.DataLabelingServiceClient()

        # Setup request
        parent = client.project_path("[PROJECT]")
        annotation_spec_set = {}

        with pytest.raises(CustomException):
            client.create_annotation_spec_set(parent, annotation_spec_set)
    def test_delete_annotated_dataset(self):
        channel = ChannelStub()
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = datalabeling_v1beta1.DataLabelingServiceClient()

        client.delete_annotated_dataset()

        assert len(channel.requests) == 1
        expected_request = data_labeling_service_pb2.DeleteAnnotatedDatasetRequest(
        )
        actual_request = channel.requests[0][1]
        assert expected_request == actual_request
Exemple #19
0
def test_label_text(capsys, annotation_spec_set, instruction, dataset):

    # Start labeling.
    response = label_text.label_text(dataset.name, instruction.name,
                                     annotation_spec_set.name)
    out, _ = capsys.readouterr()
    assert 'Label_text operation name: ' in out
    operation_name = response.operation.name

    # Cancels the labeling operation.
    response.cancel()
    assert response.cancelled() is True

    client = datalabeling.DataLabelingServiceClient()
    client.transport._operations_client.cancel_operation(operation_name)
Exemple #20
0
def list_datasets(project_id):
    """Lists datasets for the given Google Cloud project."""
    from google.cloud import datalabeling_v1beta1 as datalabeling
    client = datalabeling.DataLabelingServiceClient()
    # [END datalabeling_list_datasets_beta]
    # If provided, use a provided test endpoint - this will prevent tests on
    # this snippet from triggering any action by a real human
    if 'DATALABELING_ENDPOINT' in os.environ:
        opts = ClientOptions(api_endpoint=os.getenv('DATALABELING_ENDPOINT'))
        client = datalabeling.DataLabelingServiceClient(client_options=opts)
    # [START datalabeling_list_datasets_beta]

    formatted_project_name = client.project_path(project_id)

    response = client.list_datasets(formatted_project_name)
    for element in response:
        # The format of resource name:
        # project_id/{project_id}/datasets/{dataset_id}
        print('The dataset resource name: {}\n'.format(element.name))
        print('Display name: {}'.format(element.display_name))
        print('Description: {}'.format(element.description))
        print('Create time:')
        print('\tseconds: {}'.format(element.create_time.seconds))
        print('\tnanos: {}'.format(element.create_time.nanos))
def list_datasets(project_id):
    """Lists datasets for the given Google Cloud project."""
    from google.cloud import datalabeling_v1beta1 as datalabeling
    client = datalabeling.DataLabelingServiceClient()

    formatted_project_name = client.project_path(project_id)

    response = client.list_datasets(formatted_project_name)
    for element in response:
        # The format of resource name:
        # project_id/{project_id}/datasets/{dataset_id}
        print('The dataset resource name: {}\n'.format(element.name))
        print('Display name: {}'.format(element.display_name))
        print('Description: {}'.format(element.description))
        print('Create time:')
        print('\tseconds: {}'.format(element.create_time.seconds))
        print('\tnanos: {}'.format(element.create_time.nanos))
    def test_delete_dataset(self):
        channel = ChannelStub()
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = datalabeling_v1beta1.DataLabelingServiceClient()

        # Setup Request
        name = client.dataset_path("[PROJECT]", "[DATASET]")

        client.delete_dataset(name)

        assert len(channel.requests) == 1
        expected_request = data_labeling_service_pb2.DeleteDatasetRequest(
            name=name)
        actual_request = channel.requests[0][1]
        assert expected_request == actual_request
Exemple #23
0
def import_data(dataset_resource_name, data_type, input_gcs_uri):
    """Imports data to the given Google Cloud project and dataset."""
    from google.cloud import datalabeling_v1beta1 as datalabeling
    client = datalabeling.DataLabelingServiceClient()

    gcs_source = datalabeling.types.GcsSource(input_uri=input_gcs_uri,
                                              mime_type='text/csv')

    csv_input_config = datalabeling.types.InputConfig(data_type=data_type,
                                                      gcs_source=gcs_source)

    response = client.import_data(dataset_resource_name, csv_input_config)

    result = response.result()

    # The format of resource name:
    # project_id/{project_id}/datasets/{dataset_id}
    print('Dataset resource name: {}\n'.format(result.dataset))

    return result
Exemple #24
0
def export_data(dataset_resource_name, annotated_dataset_resource_name,
                export_gcs_uri):
    """Exports a dataset from the given Google Cloud project."""
    from google.cloud import datalabeling_v1beta1 as datalabeling
    client = datalabeling.DataLabelingServiceClient()

    gcs_destination = datalabeling.types.GcsDestination(
        output_uri=export_gcs_uri, mime_type='text/csv')

    output_config = datalabeling.types.OutputConfig(
        gcs_destination=gcs_destination)

    response = client.export_data(dataset_resource_name,
                                  annotated_dataset_resource_name,
                                  output_config)

    print('Dataset ID: {}\n'.format(response.result().dataset))
    print('Output config:')
    print('\tGcs destination:')
    print('\t\tOutput URI: {}\n'.format(
        response.result().output_config.gcs_destination.output_uri))
    def test_import_data_exception(self):
        # Setup Response
        error = status_pb2.Status()
        operation = longrunning_operations_pb2.Operation(
            name="operations/test_import_data_exception", done=True)
        operation.error.CopyFrom(error)

        # Mock the API response
        channel = ChannelStub(responses=[operation])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = datalabeling_v1beta1.DataLabelingServiceClient()

        # Setup Request
        name = client.dataset_path("[PROJECT]", "[DATASET]")
        input_config = {}

        response = client.import_data(name, input_config)
        exception = response.exception()
        assert exception.errors[0] == error
    def test_label_audio(self):
        # Setup Expected Response
        name = "name3373707"
        display_name = "displayName1615086568"
        description = "description-1724546052"
        example_count = 1517063674
        completed_example_count = 612567290
        expected_response = {
            "name": name,
            "display_name": display_name,
            "description": description,
            "example_count": example_count,
            "completed_example_count": completed_example_count,
        }
        expected_response = dataset_pb2.AnnotatedDataset(**expected_response)
        operation = longrunning_operations_pb2.Operation(
            name="operations/test_label_audio", done=True)
        operation.response.Pack(expected_response)

        # Mock the API response
        channel = ChannelStub(responses=[operation])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = datalabeling_v1beta1.DataLabelingServiceClient()

        # Setup Request
        parent = client.dataset_path("[PROJECT]", "[DATASET]")
        basic_config = {}
        feature = enums.LabelAudioRequest.Feature.FEATURE_UNSPECIFIED

        response = client.label_audio(parent, basic_config, feature)
        result = response.result()
        assert expected_response == result

        assert len(channel.requests) == 1
        expected_request = data_labeling_service_pb2.LabelAudioRequest(
            parent=parent, basic_config=basic_config, feature=feature)
        actual_request = channel.requests[0][1]
        assert expected_request == actual_request
    def test_label_audio_exception(self):
        # Setup Response
        error = status_pb2.Status()
        operation = longrunning_operations_pb2.Operation(
            name="operations/test_label_audio_exception", done=True)
        operation.error.CopyFrom(error)

        # Mock the API response
        channel = ChannelStub(responses=[operation])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = datalabeling_v1beta1.DataLabelingServiceClient()

        # Setup Request
        parent = client.dataset_path("[PROJECT]", "[DATASET]")
        basic_config = {}
        feature = enums.LabelAudioRequest.Feature.FEATURE_UNSPECIFIED

        response = client.label_audio(parent, basic_config, feature)
        exception = response.exception()
        assert exception.errors[0] == error
    def test_export_data(self):
        # Setup Expected Response
        dataset = "dataset1443214456"
        total_count = 407761836
        export_count = 529256252
        expected_response = {
            "dataset": dataset,
            "total_count": total_count,
            "export_count": export_count,
        }
        expected_response = proto_operations_pb2.ExportDataOperationResponse(
            **expected_response)
        operation = longrunning_operations_pb2.Operation(
            name="operations/test_export_data", done=True)
        operation.response.Pack(expected_response)

        # Mock the API response
        channel = ChannelStub(responses=[operation])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = datalabeling_v1beta1.DataLabelingServiceClient()

        # Setup Request
        name = client.dataset_path("[PROJECT]", "[DATASET]")
        annotated_dataset = "annotatedDataset-1407812655"
        output_config = {}

        response = client.export_data(name, annotated_dataset, output_config)
        result = response.result()
        assert expected_response == result

        assert len(channel.requests) == 1
        expected_request = data_labeling_service_pb2.ExportDataRequest(
            name=name,
            annotated_dataset=annotated_dataset,
            output_config=output_config)
        actual_request = channel.requests[0][1]
        assert expected_request == actual_request
Exemple #29
0
def create_annotation_spec_set(project_id):
    """Creates a data labeling annotation spec set for the given
    Google Cloud project.
    """
    from google.cloud import datalabeling_v1beta1 as datalabeling
    client = datalabeling.DataLabelingServiceClient()

    project_path = client.project_path(project_id)

    input_data = json.loads(
        '["Apply Eye Makeup", "Apply Lipstick", "Archery", "Baby Crawling", "Balance Beam", "Band Marching", "Baseball Pitch", "Basketball Shooting", "Basketball Dunk", "Bench Press", "Biking", "Billiards Shot", "Blow Dry Hair", "Blowing Candles", "Body Weight Squats", "Bowling", "Boxing Punching Bag", "Boxing Speed Bag", "Breaststroke", "Brushing Teeth", "Clean and Jerk", "Cliff Diving", "Cricket Bowling", "Cricket Shot", "Cutting In Kitchen", "Diving", "Drumming", "Fencing", "Field Hockey Penalty", "Floor Gymnastics", "Frisbee Catch", "Front Crawl", "Golf Swing", "Haircut", "Hammer Throw", "Hammering", "Handstand Pushups", "Handstand Walking", "Head Massage", "High Jump", "Horse Race", "Horse Riding", "Hula Hoop", "Ice Dancing", "Javelin Throw", "Juggling Balls", "Jump Rope", "Jumping Jack", "Kayaking", "Knitting", "Long Jump", "Lunges", "Military Parade", "Mixing Batter", "Mopping Floor", "Nun chucks", "Parallel Bars", "Pizza Tossing", "Playing Guitar", "Playing Piano", "Playing Tabla", "Playing Violin", "Playing Cello", "Playing Daf", "Playing Dhol", "Playing Flute", "Playing Sitar", "Pole Vault", "Pommel Horse", "Pull Ups", "Punch", "Push Ups", "Rafting", "Rock Climbing Indoor", "Rope Climbing", "Rowing", "Salsa Spins", "Shaving Beard", "Shotput", "Skate Boarding", "Skiing", "Skijet", "Sky Diving", "Soccer Juggling", "Soccer Penalty", "Still Rings", "Sumo Wrestling", "Surfing", "Swing", "Table Tennis Shot", "Tai Chi", "Tennis Swing", "Throw Discus", "Trampoline Jumping", "Typing", "Uneven Bars", "Volleyball Spiking", "Walking with a dog", "Wall Pushups", "Writing On Board", "Yo Yo"]'
    )

    annotation_specs = [
        datalabeling.types.AnnotationSpec(
            display_name=n,
            description=n,
        ) for n in input_data
    ]

    annotation_spec_set = datalabeling.types.AnnotationSpecSet(
        display_name='UCF-101 Full Label Set',
        description='Labels for the UCF-101 dataset',
        annotation_specs=annotation_specs)

    response = client.create_annotation_spec_set(project_path,
                                                 annotation_spec_set)

    # The format of the resource name:
    # project_id/{project_id}/annotationSpecSets/{annotationSpecSets_id}
    print('The annotation_spec_set resource name: {}'.format(response.name))
    print('Display name: {}'.format(response.display_name))
    print('Description: {}'.format(response.description))
    print('Annotation specs:')
    for annotation_spec in response.annotation_specs:
        print('\tDisplay name: {}'.format(annotation_spec.display_name))
        print('\tDescription: {}\n'.format(annotation_spec.description))

    return response
def create_dataset(project_id):
    """Creates a dataset for the given Google Cloud project."""
    from google.cloud import datalabeling_v1beta1 as datalabeling
    client = datalabeling.DataLabelingServiceClient()

    formatted_project_name = client.project_path(project_id)

    dataset = datalabeling.types.Dataset(
        display_name='YOUR_ANNOTATION_SPEC_SET_DISPLAY_NAME',
        description='YOUR_DESCRIPTION')

    response = client.create_dataset(formatted_project_name, dataset)

    # The format of resource name:
    # project_id/{project_id}/datasets/{dataset_id}
    print('The dataset resource name: {}\n'.format(response.name))
    print('Display name: {}'.format(response.display_name))
    print('Description: {}'.format(response.description))
    print('Create time:')
    print('\tseconds: {}'.format(response.create_time.seconds))
    print('\tnanos: {}'.format(response.create_time.nanos))

    return response