Beispiel #1
0
    def test_annotate_video(self):
        # Setup Expected Response
        expected_response = {}
        expected_response = video_intelligence_pb2.AnnotateVideoResponse(
            **expected_response)
        operation = operations_pb2.Operation(
            name='operations/test_annotate_video', done=True)
        operation.response.Pack(expected_response)

        # Mock the API response
        channel = ChannelStub(responses=[operation])
        client = videointelligence_v1p1beta1.VideoIntelligenceServiceClient(
            channel=channel)

        # Setup Request
        input_uri = 'gs://demomaker/cat.mp4'
        features_element = enums.Feature.LABEL_DETECTION
        features = [features_element]

        response = client.annotate_video(
            input_uri=input_uri, features=features)
        result = response.result()
        assert expected_response == result

        assert len(channel.requests) == 1
        expected_request = video_intelligence_pb2.AnnotateVideoRequest(
            input_uri=input_uri, features=features)
        actual_request = channel.requests[0][1]
        assert expected_request == actual_request
Beispiel #2
0
def analyze_all(path):
    video_client = videointelligence.VideoIntelligenceServiceClient()

    features = [
        videointelligence.enums.Feature.SPEECH_TRANSCRIPTION,
        videointelligence.enums.Feature.EXPLICIT_CONTENT_DETECTION,
        videointelligence.enums.Feature.LABEL_DETECTION,
        videointelligence.enums.Feature.SHOT_CHANGE_DETECTION
    ]

    config = videointelligence.types.SpeechTranscriptionConfig(
        language_code='en-US', enable_automatic_punctuation=True)
    video_context = videointelligence.types.VideoContext(
        speech_transcription_config=config)

    operation = video_client.annotate_video(path,
                                            features=features,
                                            video_context=video_context)

    print('\nProcessing video for all features')

    result = operation.result(timeout=600)

    # There is only one annotation_result since only
    # one video is processed.
    with open('C:\\temp\\cloud-intelligence-results-otezla.json', 'wb') as out:
        for ar in result.annotation_results:
            out.write(str(ar))
    def test_annotate_video(self):
        # Setup Expected Response
        expected_response = {}
        expected_response = video_intelligence_pb2.AnnotateVideoResponse(
            **expected_response)
        operation = operations_pb2.Operation(
            name="operations/test_annotate_video", done=True)
        operation.response.Pack(expected_response)

        # Mock the API response
        channel = ChannelStub(responses=[operation])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = videointelligence_v1p1beta1.VideoIntelligenceServiceClient(
            )

        # Setup Request
        features_element = enums.Feature.LABEL_DETECTION
        features = [features_element]
        input_uri = "gs://cloud-samples-data/video/cat.mp4"

        response = client.annotate_video(features, input_uri=input_uri)
        result = response.result()
        assert expected_response == result

        assert len(channel.requests) == 1
        expected_request = video_intelligence_pb2.AnnotateVideoRequest(
            features=features, input_uri=input_uri)
        actual_request = channel.requests[0][1]
        assert expected_request == actual_request
def speech_transcription(input_uri):
    """Transcribe speech from a video stored on GCS."""
    video_client = videointelligence.VideoIntelligenceServiceClient()

    features = [videointelligence.enums.Feature.SPEECH_TRANSCRIPTION]

    config = videointelligence.types.SpeechTranscriptionConfig(
        language_code='en-US', enable_automatic_punctuation=True)
    video_context = videointelligence.types.VideoContext(
        speech_transcription_config=config)

    operation = video_client.annotate_video(input_uri,
                                            features=features,
                                            video_context=video_context)

    print('\nProcessing video for speech transcription.')

    result = operation.result(timeout=300)

    # There is only one annotation_result since only
    # one video is processed.
    annotation_results = result.annotation_results[0]
    speech_transcription = annotation_results.speech_transcriptions[0]
    alternative = speech_transcription.alternatives[0]

    return alternative
def track_objects(path):
    # [START video_object_tracking_beta]
    """Object Tracking."""
    from google.cloud import videointelligence_v1p2beta1 as videointelligence

    video_client = videointelligence.VideoIntelligenceServiceClient()
    features = [videointelligence.Feature.OBJECT_TRACKING]

    with io.open(path, "rb") as file:
        input_content = file.read()

    # It is recommended to use location_id as 'us-east1' for the best latency
    # due to different types of processors used in this region and others.
    operation = video_client.annotate_video(
        request={
            "features": features,
            "input_content": input_content,
            "location_id": "us-east1",
        })
    print("\nProcessing video for object annotations.")

    result = operation.result(timeout=500)
    print("\nFinished processing.\n")

    # The first result is retrieved because a single video was processed.
    object_annotations = result.annotation_results[0].object_annotations

    # Get only the first annotation for demo purposes.
    object_annotation = object_annotations[0]
    # description is in Unicode
    print(u"Entity description: {}".format(
        object_annotation.entity.description))
    if object_annotation.entity.entity_id:
        print("Entity id: {}".format(object_annotation.entity.entity_id))

    print("Segment: {}s to {}s".format(
        object_annotation.segment.start_time_offset.seconds +
        object_annotation.segment.start_time_offset.microseconds / 1e6,
        object_annotation.segment.end_time_offset.seconds +
        object_annotation.segment.end_time_offset.microseconds / 1e6,
    ))

    print("Confidence: {}".format(object_annotation.confidence))

    # Here we print only the bounding box of the first frame in this segment
    frame = object_annotation.frames[0]
    box = frame.normalized_bounding_box
    print("Time offset of the first frame: {}s".format(
        frame.time_offset.seconds + frame.time_offset.microseconds / 1e6))
    print("Bounding box position:")
    print("\tleft  : {}".format(box.left))
    print("\ttop   : {}".format(box.top))
    print("\tright : {}".format(box.right))
    print("\tbottom: {}".format(box.bottom))
    print("\n")
    # [END video_object_tracking_beta]
    return object_annotations
Beispiel #6
0
def track_objects(path):
    # [START video_object_tracking_beta]
    """Object Tracking."""
    from google.cloud import videointelligence_v1p2beta1 as videointelligence

    video_client = videointelligence.VideoIntelligenceServiceClient()
    features = [videointelligence.enums.Feature.OBJECT_TRACKING]

    with io.open(path, 'rb') as file:
        input_content = file.read()

    # It is recommended to use location_id as 'us-east1' for the best latency
    # due to different types of processors used in this region and others.
    operation = video_client.annotate_video(input_content=input_content,
                                            features=features,
                                            location_id='us-east1')
    print('\nProcessing video for object annotations.')

    result = operation.result(timeout=300)
    print('\nFinished processing.\n')

    # The first result is retrieved because a single video was processed.
    object_annotations = result.annotation_results[0].object_annotations

    # Get only the first annotation for demo purposes.
    object_annotation = object_annotations[0]
    print('Entity description: {}'.format(
        object_annotation.entity.description))
    if object_annotation.entity.entity_id:
        print('Entity id: {}'.format(object_annotation.entity.entity_id))

    print('Segment: {}s to {}s'.format(
        object_annotation.segment.start_time_offset.seconds +
        object_annotation.segment.start_time_offset.nanos / 1e9,
        object_annotation.segment.end_time_offset.seconds +
        object_annotation.segment.end_time_offset.nanos / 1e9))

    print('Confidence: {}'.format(object_annotation.confidence))

    # Here we print only the bounding box of the first frame in this segment
    frame = object_annotation.frames[0]
    box = frame.normalized_bounding_box
    print('Time offset of the first frame: {}s'.format(
        frame.time_offset.seconds + frame.time_offset.nanos / 1e9))
    print('Bounding box position:')
    print('\tleft  : {}'.format(box.left))
    print('\ttop   : {}'.format(box.top))
    print('\tright : {}'.format(box.right))
    print('\tbottom: {}'.format(box.bottom))
    print('\n')
    # [END video_object_tracking_beta]
    return object_annotations
def video_detect_text(path):
    # [START video_detect_text_beta]
    """Detect text in a local video."""
    from google.cloud import videointelligence_v1p2beta1 as videointelligence

    video_client = videointelligence.VideoIntelligenceServiceClient()
    features = [videointelligence.Feature.TEXT_DETECTION]
    video_context = videointelligence.VideoContext()

    with io.open(path, "rb") as file:
        input_content = file.read()

    operation = video_client.annotate_video(
        request={
            "features": features,
            "input_content": input_content,
            "video_context": video_context,
        })

    print("\nProcessing video for text detection.")
    result = operation.result(timeout=300)

    # The first result is retrieved because a single video was processed.
    annotation_result = result.annotation_results[0]

    # Get only the first result
    text_annotation = annotation_result.text_annotations[0]
    print("\nText: {}".format(text_annotation.text))

    # Get the first text segment
    text_segment = text_annotation.segments[0]
    start_time = text_segment.segment.start_time_offset
    end_time = text_segment.segment.end_time_offset
    print("start_time: {}, end_time: {}".format(
        start_time.seconds + start_time.microseconds * 1e-6,
        end_time.seconds + end_time.microseconds * 1e-6,
    ))

    print("Confidence: {}".format(text_segment.confidence))

    # Show the result for the first frame in this segment.
    frame = text_segment.frames[0]
    time_offset = frame.time_offset
    print(
        "Time offset for the first frame: {}".format(time_offset.seconds +
                                                     time_offset.microseconds *
                                                     1e-6))
    print("Rotated Bounding Box Vertices:")
    for vertex in frame.rotated_bounding_box.vertices:
        print("\tVertex.x: {}, Vertex.y: {}".format(vertex.x, vertex.y))
    # [END video_detect_text_beta]
    return annotation_result.text_annotations
def speech_transcription(input_uri, timeout=180):
    # [START video_speech_transcription_gcs_beta]
    """Transcribe speech from a video stored on GCS."""
    from google.cloud import videointelligence_v1p1beta1 as videointelligence

    video_client = videointelligence.VideoIntelligenceServiceClient()

    features = [videointelligence.Feature.SPEECH_TRANSCRIPTION]

    config = videointelligence.SpeechTranscriptionConfig(
        language_code="en-US", enable_automatic_punctuation=True)
    video_context = videointelligence.VideoContext(
        speech_transcription_config=config)

    operation = video_client.annotate_video(
        request={
            "features": features,
            "input_uri": input_uri,
            "video_context": video_context,
        })

    print("\nProcessing video for speech transcription.")

    result = operation.result(timeout)

    # There is only one annotation_result since only
    # one video is processed.
    annotation_results = result.annotation_results[0]
    for speech_transcription in annotation_results.speech_transcriptions:

        # The number of alternatives for each transcription is limited by
        # SpeechTranscriptionConfig.max_alternatives.
        # Each alternative is a different possible transcription
        # and has its own confidence score.
        for alternative in speech_transcription.alternatives:
            print("Alternative level information:")

            print("Transcript: {}".format(alternative.transcript))
            print("Confidence: {}\n".format(alternative.confidence))

            print("Word level information:")
            for word_info in alternative.words:
                word = word_info.word
                start_time = word_info.start_time
                end_time = word_info.end_time
                print("\t{}s - {}s: {}".format(
                    start_time.seconds + start_time.microseconds * 1e-6,
                    end_time.seconds + end_time.microseconds * 1e-6,
                    word,
                ))
Beispiel #9
0
def video_detect_text(path):
    # [START video_detect_text_beta]
    """Detect text in a local video."""
    from google.cloud import videointelligence_v1p2beta1 as videointelligence

    video_client = videointelligence.VideoIntelligenceServiceClient()
    features = [videointelligence.enums.Feature.TEXT_DETECTION]
    video_context = videointelligence.types.VideoContext()

    with io.open(path, 'rb') as file:
        input_content = file.read()

    operation = video_client.annotate_video(
        input_content=input_content,  # the bytes of the video file
        features=features,
        video_context=video_context)

    print('\nProcessing video for text detection.')
    result = operation.result(timeout=300)

    # The first result is retrieved because a single video was processed.
    annotation_result = result.annotation_results[0]

    # Get only the first result
    text_annotation = annotation_result.text_annotations[0]
    print('\nText: {}'.format(text_annotation.text))

    # Get the first text segment
    text_segment = text_annotation.segments[0]
    start_time = text_segment.segment.start_time_offset
    end_time = text_segment.segment.end_time_offset
    print('start_time: {}, end_time: {}'.format(
        start_time.seconds + start_time.nanos * 1e-9,
        end_time.seconds + end_time.nanos * 1e-9))

    print('Confidence: {}'.format(text_segment.confidence))

    # Show the result for the first frame in this segment.
    frame = text_segment.frames[0]
    time_offset = frame.time_offset
    print(
        'Time offset for the first frame: {}'.format(time_offset.seconds +
                                                     time_offset.nanos * 1e-9))
    print('Rotated Bounding Box Vertices:')
    for vertex in frame.rotated_bounding_box.vertices:
        print('\tVertex.x: {}, Vertex.y: {}'.format(vertex.x, vertex.y))
    # [END video_detect_text_beta]
    return annotation_result.text_annotations
Beispiel #10
0
def analyze_shots(path):
    # [START video_analyze_shots]
    """ Detects camera shot changes. """
    video_client = videointelligence.VideoIntelligenceServiceClient()
    features = [videointelligence.enums.Feature.SHOT_CHANGE_DETECTION]
    operation = video_client.annotate_video(path, features=features)
    print('\nProcessing video for shot change annotations:')

    result = operation.result(timeout=90)
    print('\nFinished processing.')

    # first result is retrieved because a single video was processed
    for i, shot in enumerate(result.annotation_results[0].shot_annotations):
        start_time = (shot.start_time_offset.seconds +
                      shot.start_time_offset.nanos / 1e9)
        end_time = (shot.end_time_offset.seconds +
                    shot.end_time_offset.nanos / 1e9)
        print('\tShot {}: {} to {}'.format(i, start_time, end_time))
Beispiel #11
0
def process_videoaudio_in_gcs(gcs_filepath):
    print('[ INFO ] Transcribing video audio from {}'.format(gcs_filepath))

    video_client = videointelligence.VideoIntelligenceServiceClient()
    features = [videointelligence.enums.Feature.SPEECH_TRANSCRIPTION]

    config = videointelligence.types.SpeechTranscriptionConfig(
        language_code='en-US',
        #maxAlternatives=1,
        #filterProfanity=False,
        #speechContexts=...,
        #audioTracks=0,
        enable_automatic_punctuation=True)

    video_context = videointelligence.types.VideoContext(
        speech_transcription_config=config)

    operation = video_client.annotate_video(gcs_filepath,
                                            features=features,
                                            video_context=video_context)

    result = operation.result(timeout=180)

    # There is only one annotation_result since only one video is processed.
    annotation_results = result.annotation_results[0]
    speech_transcription = annotation_results.speech_transcriptions[0]
    alternatives = speech_transcription.alternatives

    text_blob = ''
    for alternative in alternatives:
        print('Transcript: {}'.format(alternative.transcript))
        print('Confidence: {}\n'.format(alternative.confidence))
        print('Word level information:')
        for word_info in alternative.words:
            word = word_info.word
            start_time = word_info.start_time
            end_time = word_info.end_time
            print('\t{}s - {}s: {}'.format(
                start_time.seconds + start_time.nanos * 1e-9,
                end_time.seconds + end_time.nanos * 1e-9, word))

        text_blob = text_blob + ' ' + alternative.transcript
    return annotation_results, text_blob
Beispiel #12
0
def video_detect_text_gcs(input_uri):
    # [START video_detect_text_gcs_beta]
    """Detect text in a video stored on GCS."""
    from google.cloud import videointelligence_v1p2beta1 as videointelligence

    video_client = videointelligence.VideoIntelligenceServiceClient()
    features = [videointelligence.enums.Feature.TEXT_DETECTION]

    operation = video_client.annotate_video(input_uri=input_uri,
                                            features=features)

    print("\nProcessing video for text detection.")
    result = operation.result(timeout=300)

    # The first result is retrieved because a single video was processed.
    annotation_result = result.annotation_results[0]

    # Get only the first result
    text_annotation = annotation_result.text_annotations[0]
    print("\nText: {}".format(text_annotation.text))

    # Get the first text segment
    text_segment = text_annotation.segments[0]
    start_time = text_segment.segment.start_time_offset
    end_time = text_segment.segment.end_time_offset
    print("start_time: {}, end_time: {}".format(
        start_time.seconds + start_time.nanos * 1e-9,
        end_time.seconds + end_time.nanos * 1e-9,
    ))

    print("Confidence: {}".format(text_segment.confidence))

    # Show the result for the first frame in this segment.
    frame = text_segment.frames[0]
    time_offset = frame.time_offset
    print(
        "Time offset for the first frame: {}".format(time_offset.seconds +
                                                     time_offset.nanos * 1e-9))
    print("Rotated Bounding Box Vertices:")
    for vertex in frame.rotated_bounding_box.vertices:
        print("\tVertex.x: {}, Vertex.y: {}".format(vertex.x, vertex.y))
    # [END video_detect_text_gcs_beta]
    return annotation_result.text_annotations
Beispiel #13
0
def analyze_explicit_content(path):
    # [START video_analyze_explicit_content]
    """ Detects explicit content from the GCS path to a video. """
    video_client = videointelligence.VideoIntelligenceServiceClient()
    features = [videointelligence.enums.Feature.EXPLICIT_CONTENT_DETECTION]

    operation = video_client.annotate_video(path, features=features)
    print('\nProcessing video for explicit content annotations:')

    result = operation.result(timeout=90)
    print('\nFinished processing.')

    # first result is retrieved because a single video was processed
    for frame in result.annotation_results[0].explicit_annotation.frames:
        likelihood = videointelligence.enums.Likelihood(
            frame.pornography_likelihood)
        frame_time = frame.time_offset.seconds + frame.time_offset.nanos / 1e9
        print('Time: {}s'.format(frame_time))
        print('\tpornography: {}'.format(likelihood.name))
Beispiel #14
0
    def test_annotate_video_exception(self):
        # Setup Response
        error = status_pb2.Status()
        operation = operations_pb2.Operation(
            name='operations/test_annotate_video_exception', done=True)
        operation.error.CopyFrom(error)

        # Mock the API response
        channel = ChannelStub(responses=[operation])
        client = videointelligence_v1p1beta1.VideoIntelligenceServiceClient(
            channel=channel)

        # Setup Request
        input_uri = 'gs://demomaker/cat.mp4'
        features_element = enums.Feature.LABEL_DETECTION
        features = [features_element]

        response = client.annotate_video(
            input_uri=input_uri, features=features)
        exception = response.exception()
        assert exception.errors[0] == error
Beispiel #15
0
def speech_transcription(input_uri):
    # [START video_speech_transcription_gcs_beta]
    """Transcribe speech from a video stored on GCS."""
    from google.cloud import videointelligence_v1p1beta1 as videointelligence

    video_client = videointelligence.VideoIntelligenceServiceClient()

    features = [videointelligence.enums.Feature.SPEECH_TRANSCRIPTION]

    config = videointelligence.types.SpeechTranscriptionConfig(
        language_code='en-US', enable_automatic_punctuation=True)
    video_context = videointelligence.types.VideoContext(
        speech_transcription_config=config)

    operation = video_client.annotate_video(input_uri,
                                            features=features,
                                            video_context=video_context)

    print('\nProcessing video for speech transcription.')

    result = operation.result(timeout=180)

    # There is only one annotation_result since only
    # one video is processed.
    annotation_results = result.annotation_results[0]
    speech_transcription = annotation_results.speech_transcriptions[0]
    alternative = speech_transcription.alternatives[0]

    print('Transcript: {}'.format(alternative.transcript))
    print('Confidence: {}\n'.format(alternative.confidence))

    print('Word level information:')
    for word_info in alternative.words:
        word = word_info.word
        start_time = word_info.start_time
        end_time = word_info.end_time
        print('\t{}s - {}s: {}'.format(
            start_time.seconds + start_time.nanos * 1e-9,
            end_time.seconds + end_time.nanos * 1e-9, word))
    def test_annotate_video_exception(self):
        # Setup Response
        error = status_pb2.Status()
        operation = operations_pb2.Operation(
            name="operations/test_annotate_video_exception", done=True)
        operation.error.CopyFrom(error)

        # Mock the API response
        channel = ChannelStub(responses=[operation])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = videointelligence_v1p1beta1.VideoIntelligenceServiceClient(
            )

        # Setup Request
        features_element = enums.Feature.LABEL_DETECTION
        features = [features_element]
        input_uri = "gs://cloud-samples-data/video/cat.mp4"

        response = client.annotate_video(features, input_uri=input_uri)
        exception = response.exception()
        assert exception.errors[0] == error
Beispiel #17
0
def analyze_labels_file(path):
    # [START video_analyze_labels]
    """Detect labels given a file path."""
    video_client = videointelligence.VideoIntelligenceServiceClient()
    features = [videointelligence.enums.Feature.LABEL_DETECTION]

    with io.open(path, 'rb') as movie:
        input_content = movie.read()

    operation = video_client.annotate_video(features=features,
                                            input_content=input_content)
    print('\nProcessing video for label annotations:')

    result = operation.result(timeout=90)
    print('\nFinished processing.')

    # Process video/segment level label annotations
    segment_labels = result.annotation_results[0].segment_label_annotations
    for i, segment_label in enumerate(segment_labels):
        print('Video label description: {}'.format(
            segment_label.entity.description))
        for category_entity in segment_label.category_entities:
            print('\tLabel category description: {}'.format(
                category_entity.description))

        for i, segment in enumerate(segment_label.segments):
            start_time = (segment.segment.start_time_offset.seconds +
                          segment.segment.start_time_offset.nanos / 1e9)
            end_time = (segment.segment.end_time_offset.seconds +
                        segment.segment.end_time_offset.nanos / 1e9)
            positions = '{}s to {}s'.format(start_time, end_time)
            confidence = segment.confidence
            print('\tSegment {}: {}'.format(i, positions))
            print('\tConfidence: {}'.format(confidence))
        print('\n')

    # Process shot level label annotations
    shot_labels = result.annotation_results[0].shot_label_annotations
    for i, shot_label in enumerate(shot_labels):
        print('Shot label description: {}'.format(
            shot_label.entity.description))
        for category_entity in shot_label.category_entities:
            print('\tLabel category description: {}'.format(
                category_entity.description))

        for i, shot in enumerate(shot_label.segments):
            start_time = (shot.segment.start_time_offset.seconds +
                          shot.segment.start_time_offset.nanos / 1e9)
            end_time = (shot.segment.end_time_offset.seconds +
                        shot.segment.end_time_offset.nanos / 1e9)
            positions = '{}s to {}s'.format(start_time, end_time)
            confidence = shot.confidence
            print('\tSegment {}: {}'.format(i, positions))
            print('\tConfidence: {}'.format(confidence))
        print('\n')

    # Process frame level label annotations
    frame_labels = result.annotation_results[0].frame_label_annotations
    for i, frame_label in enumerate(frame_labels):
        print('Frame label description: {}'.format(
            frame_label.entity.description))
        for category_entity in frame_label.category_entities:
            print('\tLabel category description: {}'.format(
                category_entity.description))

        # Each frame_label_annotation has many frames,
        # here we print information only about the first frame.
        frame = frame_label.frames[0]
        time_offset = frame.time_offset.seconds + frame.time_offset.nanos / 1e9
        print('\tFirst frame time offset: {}s'.format(time_offset))
        print('\tFirst frame confidence: {}'.format(frame.confidence))
        print('\n')
Beispiel #18
0
def analyze_labels(path):
    # [START video_analyze_labels_gcs]
    """ Detects labels given a GCS path. """
    video_client = videointelligence.VideoIntelligenceServiceClient()
    features = [videointelligence.enums.Feature.LABEL_DETECTION]

    mode = videointelligence.enums.LabelDetectionMode.SHOT_AND_FRAME_MODE
    config = videointelligence.types.LabelDetectionConfig(
        label_detection_mode=mode)
    context = videointelligence.types.VideoContext(
        label_detection_config=config)

    operation = video_client.annotate_video(path,
                                            features=features,
                                            video_context=context)
    print('\nProcessing video for label annotations:')

    result = operation.result(timeout=90)
    print('\nFinished processing.')

    # Process video/segment level label annotations
    segment_labels = result.annotation_results[0].segment_label_annotations
    for i, segment_label in enumerate(segment_labels):
        print('Video label description: {}'.format(
            segment_label.entity.description))
        for category_entity in segment_label.category_entities:
            print('\tLabel category description: {}'.format(
                category_entity.description))

        for i, segment in enumerate(segment_label.segments):
            start_time = (segment.segment.start_time_offset.seconds +
                          segment.segment.start_time_offset.nanos / 1e9)
            end_time = (segment.segment.end_time_offset.seconds +
                        segment.segment.end_time_offset.nanos / 1e9)
            positions = '{}s to {}s'.format(start_time, end_time)
            confidence = segment.confidence
            print('\tSegment {}: {}'.format(i, positions))
            print('\tConfidence: {}'.format(confidence))
        print('\n')

    # Process shot level label annotations
    shot_labels = result.annotation_results[0].shot_label_annotations
    for i, shot_label in enumerate(shot_labels):
        print('Shot label description: {}'.format(
            shot_label.entity.description))
        for category_entity in shot_label.category_entities:
            print('\tLabel category description: {}'.format(
                category_entity.description))

        for i, shot in enumerate(shot_label.segments):
            start_time = (shot.segment.start_time_offset.seconds +
                          shot.segment.start_time_offset.nanos / 1e9)
            end_time = (shot.segment.end_time_offset.seconds +
                        shot.segment.end_time_offset.nanos / 1e9)
            positions = '{}s to {}s'.format(start_time, end_time)
            confidence = shot.confidence
            print('\tSegment {}: {}'.format(i, positions))
            print('\tConfidence: {}'.format(confidence))
        print('\n')

    # Process frame level label annotations
    frame_labels = result.annotation_results[0].frame_label_annotations
    for i, frame_label in enumerate(frame_labels):
        print('Frame label description: {}'.format(
            frame_label.entity.description))
        for category_entity in frame_label.category_entities:
            print('\tLabel category description: {}'.format(
                category_entity.description))

        # Each frame_label_annotation has many frames,
        # here we print information only about the first frame.
        frame = frame_label.frames[0]
        time_offset = (frame.time_offset.seconds +
                       frame.time_offset.nanos / 1e9)
        print('\tFirst frame time offset: {}s'.format(time_offset))
        print('\tFirst frame confidence: {}'.format(frame.confidence))
        print('\n')