Esempio n. 1
0
    def test_annotate_video(self):
        # Setup Expected Response
        expected_response = {}
        expected_response = video_intelligence_pb2.AnnotateVideoResponse(
            **expected_response)
        operation = operations_pb2.Operation(
            name="operations/test_annotate_video", done=True)
        operation.response.Pack(expected_response)

        # Mock the API response
        channel = ChannelStub(responses=[operation])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = videointelligence_v1p2beta1.VideoIntelligenceServiceClient(
            )

        # Setup Request
        input_uri = "gs://demomaker/cat.mp4"
        features_element = enums.Feature.LABEL_DETECTION
        features = [features_element]

        response = client.annotate_video(input_uri=input_uri,
                                         features=features)
        result = response.result()
        assert expected_response == result

        assert len(channel.requests) == 1
        expected_request = video_intelligence_pb2.AnnotateVideoRequest(
            input_uri=input_uri, features=features)
        actual_request = channel.requests[0][1]
        assert expected_request == actual_request
Esempio n. 2
0
def recognize_by_google_ocr(video):
    storage_uri = f'gs://{settings.GS_BUCKET_NAME}/{video.name}'
    video_client = videointelligence.VideoIntelligenceServiceClient()
    features = [videointelligence.enums.Feature.TEXT_DETECTION]

    operation = video_client.annotate_video(input_uri=storage_uri,
                                            features=features)
    result = operation.result()
    return result
Esempio n. 3
0
def textDetection(path):

    with io.open(path, 'rb') as file:
        input_content = file.read()
    ResultList = []

    video_client = videointelligence.VideoIntelligenceServiceClient()
    features = [videointelligence.enums.Feature.TEXT_DETECTION]
    video_context = videointelligence.types.VideoContext()

    operation = video_client.annotate_video(
        input_content=input_content,  # the bytes of the video file
        features=features,
        video_context=video_context)

    print('\nProcessing video for text detection. timeout is 300s')
    result = operation.result(timeout=300)

    # TODO: batch process is possible
    # The first result is retrieved because a single video was processed.
    annotation_result = result.annotation_results[0]

    #extract results from data structure annotation_result
    for text_annotation in annotation_result.text_annotations:
        text_segment = text_annotation.segments[0]
        start_time = text_segment.segment.start_time_offset

        # get rid of results which confidence of text detection is less than 0.5
        if text_segment.confidence < 0.5:
            continue

        # Data item: [Text, Time, Confi]
        # TODO: checking all utf - 8 text
        d = Data(text_annotation.text.encode('utf-8'), start_time.seconds + start_time.nanos * 1e-9, text_segment.confidence)
        ResultList.append(d)

    # ResultList.sort(key=setSortedKey)
    ResultList.sort(key=lambda x: x[1])
    return ResultList
Esempio n. 4
0
    def test_annotate_video_exception(self):
        # Setup Response
        error = status_pb2.Status()
        operation = operations_pb2.Operation(
            name="operations/test_annotate_video_exception", done=True)
        operation.error.CopyFrom(error)

        # Mock the API response
        channel = ChannelStub(responses=[operation])
        patch = mock.patch("google.api_core.grpc_helpers.create_channel")
        with patch as create_channel:
            create_channel.return_value = channel
            client = videointelligence_v1p2beta1.VideoIntelligenceServiceClient(
            )

        # Setup Request
        features_element = enums.Feature.LABEL_DETECTION
        features = [features_element]
        input_uri = "gs://cloud-samples-data/video/cat.mp4"

        response = client.annotate_video(features, input_uri=input_uri)
        exception = response.exception()
        assert exception.errors[0] == error
Esempio n. 5
0
    def test_annotate_video_exception(self):
        # Setup Response
        error = status_pb2.Status()
        operation = operations_pb2.Operation(
            name='operations/test_annotate_video_exception', done=True)
        operation.error.CopyFrom(error)

        # Mock the API response
        channel = ChannelStub(responses=[operation])
        patch = mock.patch('google.api_core.grpc_helpers.create_channel')
        with patch as create_channel:
            create_channel.return_value = channel
            client = videointelligence_v1p2beta1.VideoIntelligenceServiceClient(
            )

        # Setup Request
        input_uri = 'gs://demomaker/cat.mp4'
        features_element = enums.Feature.LABEL_DETECTION
        features = [features_element]

        response = client.annotate_video(input_uri=input_uri,
                                         features=features)
        exception = response.exception()
        assert exception.errors[0] == error
Esempio n. 6
0
"""
Recognize text.

Detection performs Optical Character Recognition (OCR). It detects and extracts
text within an input video.

Text detection is available for all of the languages supported by the Cloud
Vision API.
"""
"""Detect text in a video stored on GCS."""
from google.cloud import videointelligence_v1p2beta1 as videointelligence

video_client = videointelligence.VideoIntelligenceServiceClient()
features = [videointelligence.enums.Feature.TEXT_DETECTION]

operation = video_client.annotate_video(
    input_uri='gs://deeplens-videos/my_desk.mp4',
    features=features)

print('\nProcessing video for text detection.')
result = operation.result(timeout=300)

# The first result is retrieved because a single video was processed.
annotation_result = result.annotation_results[0]

# Get only the first result
text_annotation = annotation_result.text_annotations[0]
print('\nText: {}'.format(text_annotation.text))

# Get the first text segment
text_segment = text_annotation.segments[0]
Esempio n. 7
0
def track_objects_gcs(gcs_uri):
    # [START video_object_tracking_gcs_beta]
    """Object Tracking."""
    from google.cloud import videointelligence_v1p2beta1 as videointelligence

    # It is recommended to use location_id as 'us-east1' for the best latency
    # due to different types of processors used in this region and others.
    video_client = videointelligence.VideoIntelligenceServiceClient()
    features = [videointelligence.enums.Feature.OBJECT_TRACKING]
    operation = video_client.annotate_video(input_uri=gcs_uri,
                                            features=features,
                                            location_id='us-east1')
    print('\nProcessing video for object annotations.')

    result = operation.result(timeout=300)
    print('\nFinished processing.\n')

    #Array of JSON data by frame
    frameData = {"frames": []}

    # Get only the first annotation for demo purposes.

    object_annotation = object_annotations[0]
    for o in object_annotations:
        print('Entity id: {}'.format(o.entity.description))
        print(o.entity.description == 'ball'
              or o.entity.description == 'basketball')
        if o.entity.description == 'ball' or o.entity.description == 'basketball':
            object_annotation = o
            # description is in Unicode
            print('Entity description: {}'.format(
                object_annotation.entity.description))
            if object_annotation.entity.entity_id:
                print('Entity id: {}'.format(
                    object_annotation.entity.entity_id))

            print('Segment: {}s to {}s'.format(
                object_annotation.segment.start_time_offset.seconds +
                object_annotation.segment.start_time_offset.nanos / 1e9,
                object_annotation.segment.end_time_offset.seconds +
                object_annotation.segment.end_time_offset.nanos / 1e9))

            print('Confidence: {}'.format(object_annotation.confidence))
            #video = cv2.VideoWriter('video.avi',-1,1,(1920,1080))

            # Here we print only the bounding box of the first frame in this segment
            for frame in object_annotation.frames:
                box = frame.normalized_bounding_box
                time = frame.time_offset.seconds + frame.time_offset.nanos / 1e9
                print('Time offset of the first frame: {}s'.format(time))
                """print('Bounding box position:')
                print('\tleft  : {}'.format(box.left))
                print('\ttop   : {}'.format(box.top))
                print('\tright : {}'.format(box.right))
                print('\tbottom: {}'.format(box.bottom))
                print('\n')"""

                frameData["frames"].append({
                    "time": time,
                    "left": box.left,
                    "top": box.top,
                    "right": box.right,
                    "bottom": box.bottom
                })

        # [END video_object_tracking_beta]

    with open("frameData.json", "w") as write_file:
        json.dump(frameData, write_file)
    return object_annotations
Esempio n. 8
0
def track_objects_gcs_all(gcs_uri):
    # [START video_object_tracking_gcs_beta]
    """Object Tracking."""
    from google.cloud import videointelligence_v1p2beta1 as videointelligence

    # It is recommended to use location_id as 'us-east1' for the best latency
    # due to different types of processors used in this region and others.
    video_client = videointelligence.VideoIntelligenceServiceClient()
    features = [videointelligence.enums.Feature.OBJECT_TRACKING]
    operation = video_client.annotate_video(input_uri=gcs_uri,
                                            features=features,
                                            location_id='us-east1')
    print('\nProcessing video for object annotations.')

    result = operation.result(timeout=300)
    print('\nFinished processing.\n')
    # The first result is retrieved because a single video was processed.
    object_annotations = result.annotation_results[0].object_annotations

    #Array of JSON data by frame
    objectsData = {}

    # Get only the first annotation for demo purposes.

    for object_annotation in object_annotations:
        print('Entity id: {}'.format(object_annotation.entity.description))
        # description is in Unicode
        print('Entity description: {}'.format(
            object_annotation.entity.description))
        if object_annotation.entity.entity_id:
            print('Entity id: {}'.format(object_annotation.entity.entity_id))

        print('Segment: {}s to {}s'.format(
            object_annotation.segment.start_time_offset.seconds +
            object_annotation.segment.start_time_offset.nanos / 1e9,
            object_annotation.segment.end_time_offset.seconds +
            object_annotation.segment.end_time_offset.nanos / 1e9))

        print('Confidence: {}'.format(object_annotation.confidence))
        if object_annotation.entity.description not in objectsData:
            objectsData[object_annotation.entity.description] = []
        # Here we print only the bounding box of the first frame in this segment
        for frame in object_annotation.frames:
            box = frame.normalized_bounding_box
            time = frame.time_offset.seconds + frame.time_offset.nanos / 1e9
            print('Time offset of the first frame: {}s'.format(time))
            objectsData[object_annotation.entity.description].append({
                "time":
                time,
                "left":
                box.left,
                "top":
                box.top,
                "right":
                box.right,
                "bottom":
                box.bottom
            })

    # [END video_object_tracking_beta]
    for obj in objectsData.keys():
        objectsData[obj] = sorted(
            objectsData[obj],
            key=lambda i: i['time'])  #sort by time for each object
    print(objectsData.keys())  # Descriptions for all objects
    with open("objectsData.json", "w") as write_file:
        json.dump(objectsData, write_file)

    return object_annotations
Esempio n. 9
0
def track_objects_all(path):
    # [START video_object_tracking_beta]
    """Object Tracking."""
    """Creates JSON with keys being the object descriptions and values being an array of frames"""
    from google.cloud import videointelligence_v1p2beta1 as videointelligence

    video_client = videointelligence.VideoIntelligenceServiceClient()
    features = [videointelligence.enums.Feature.OBJECT_TRACKING]
    with io.open(path, 'rb') as file:
        input_content = file.read()
    try:
        #if not os.path.exists('data'):
        os.makedirs('data')
    except OSError:
        print('Error: Creating directory of data')
    # When everything done, release the capture

    # It is recommended to use location_id as 'us-east1' for the best latency
    # due to different types of processors used in this region and others.
    operation = video_client.annotate_video(input_content=input_content,
                                            features=features,
                                            location_id='us-east1')
    print('\nProcessing video for object annotations.')

    result = operation.result(timeout=300)
    print('\nFinished processing.\n')
    # The first result is retrieved because a single video was processed.
    object_annotations = result.annotation_results[0].object_annotations

    #Array of JSON data by frame
    objectsData = {}

    # Get only the first annotation for demo purposes.

    for object_annotation in object_annotations:
        print('Entity id: {}'.format(object_annotation.entity.description))
        # description is in Unicode
        print('Entity description: {}'.format(
            object_annotation.entity.description))
        if object_annotation.entity.entity_id:
            print('Entity id: {}'.format(object_annotation.entity.entity_id))

        print('Segment: {}s to {}s'.format(
            object_annotation.segment.start_time_offset.seconds +
            object_annotation.segment.start_time_offset.nanos / 1e9,
            object_annotation.segment.end_time_offset.seconds +
            object_annotation.segment.end_time_offset.nanos / 1e9))

        print('Confidence: {}'.format(object_annotation.confidence))
        if object_annotation.entity.description not in objectsData:
            objectsData[object_annotation.entity.description] = []
        # Here we print only the bounding box of the first frame in this segment
        for frame in object_annotation.frames:
            box = frame.normalized_bounding_box
            time = frame.time_offset.seconds + frame.time_offset.nanos / 1e9
            print('Time offset of the first frame: {}s'.format(time))
            objectsData[object_annotation.entity.description].append({
                "time":
                time,
                "left":
                box.left,
                "top":
                box.top,
                "right":
                box.right,
                "bottom":
                box.bottom
            })

    # [END video_object_tracking_beta]
    for obj in objectsData.keys():
        objectsData[obj] = sorted(objectsData[obj], key=lambda i: i['time'])
    print(objectsData.keys())
    with open("objectsData.json", "w") as write_file:
        json.dump(objectsData, write_file)

    return object_annotations
Esempio n. 10
0
def track_objects(path):
    # [START video_object_tracking_beta]
    """Object Tracking."""
    from google.cloud import videointelligence_v1p2beta1 as videointelligence

    video_client = videointelligence.VideoIntelligenceServiceClient()
    features = [videointelligence.enums.Feature.OBJECT_TRACKING]

    vid = cv2.VideoCapture(path)
    success, image = vid.read()
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    v = cv2.VideoWriter('slowed_video.mp4', fourcc, vid.get(cv2.CAP_PROP_FPS),
                        (len(image[0]), len(image)), True)
    while success:
        success, image = vid.read()
        print('Read a new frame: ', success)
        for _ in range(3):
            v.write(image)
    v.release()
    with io.open('slowed_video.mp4', 'rb') as file:
        input_content = file.read()
    cap = cv2.VideoCapture('slowed_video.mp4')
    numFrames = cap.get(cv2.CAP_PROP_FRAME_COUNT)  #count_frames(cap)
    cap.set(cv2.CAP_PROP_POS_AVI_RATIO, 1)
    length = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000
    fps = cap.get(cv2.CAP_PROP_FPS)  #numFrames/length
    print(fps)
    print(cap.get(cv2.CAP_PROP_FPS))
    print(length)
    try:
        #if not os.path.exists('data'):
        os.makedirs('data')
    except OSError:
        print('Error: Creating directory of data')

    # When everything done, release the capture

    # It is recommended to use location_id as 'us-east1' for the best latency
    # due to different types of processors used in this region and others.
    operation = video_client.annotate_video(input_content=input_content,
                                            features=features,
                                            location_id='us-east1')
    print('\nProcessing video for object annotations.')

    result = operation.result(timeout=300)
    print('\nFinished processing.\n')

    cap.set(1, 1)
    ret1, img1 = cap.read()
    #print(img1)
    y, x = len(img1), len(img1[0])
    print(str(x) + "x" + str(y))

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    #cv2.VideoWriter_fourcc(*'avc1')
    video = cv2.VideoWriter(
        'tracked_object.mp4', fourcc, 10, (x, y), True
    )  # MUST CHANGE HARD-CODED 10 INTO WHATEVER GOOGLE'S FRAME PROCESSING EXACT RATE IS
    # The first result is retrieved because a single video was processed.
    object_annotations = result.annotation_results[0].object_annotations

    #Array of JSON data by frame
    frameData = {"frames": []}

    # Get only the first annotation for demo purposes.
    object_annotation = object_annotations[0]
    for o in object_annotations:
        print('Entity id: {}'.format(o.entity.description))
        print(o.entity.description == 'ball'
              or o.entity.description == 'basketball')
        if o.entity.description in [
                'ball', 'basketball', 'orange', 'fruit', 'lemon', 'food'
        ]:
            object_annotation = o
            # description is in Unicode
            print('Entity description: {}'.format(
                object_annotation.entity.description))
            if object_annotation.entity.entity_id:
                print('Entity id: {}'.format(
                    object_annotation.entity.entity_id))

            print('Segment: {}s to {}s'.format(
                object_annotation.segment.start_time_offset.seconds +
                object_annotation.segment.start_time_offset.nanos / 1e9,
                object_annotation.segment.end_time_offset.seconds +
                object_annotation.segment.end_time_offset.nanos / 1e9))

            print('Confidence: {}'.format(object_annotation.confidence))
            #video = cv2.VideoWriter('video.avi',-1,1,(1920,1080))

            # Here we print only the bounding box of the first frame in this segment
            for frame in object_annotation.frames:
                box = frame.normalized_bounding_box
                time = frame.time_offset.seconds + frame.time_offset.nanos / 1e9
                print('Time offset of the first frame: {}s'.format(time))
                """print('Bounding box position:')
                print('\tleft  : {}'.format(box.left))
                print('\ttop   : {}'.format(box.top))
                print('\tright : {}'.format(box.right))
                print('\tbottom: {}'.format(box.bottom))
                print('\n')"""

                frameData["frames"].append({
                    "time": time,
                    "left": box.left,
                    "top": box.top,
                    "right": box.right,
                    "bottom": box.bottom
                })
                frame_no = round(fps * time)
                print(str(fps) + " " + str(time) + " " + str(frame_no))
                total_frames = cap.get(7)
                cap.set(1, frame_no)
                ret, img = cap.read()

                name = './data/frame' + str(frame_no) + '.jpg'
                print('Creating...' + name)
                print((box.left, box.top))
                cv2.rectangle(img, (int(box.left * x), int(box.top * y)),
                              (int(box.right * x), int(box.bottom * y)),
                              (0, 255, 0), 3)
                #print(img)
                #cv2.imwrite(name, img)
                video.write(img)

        # [END video_object_tracking_beta]

    with open("frameData.json", "w") as write_file:
        json.dump(frameData, write_file)

    cap.release()
    cv2.destroyAllWindows()
    video.release()
    vid.release()
    return object_annotations
Esempio n. 11
0
"""Object Tracking."""
from google.cloud import videointelligence_v1p2beta1 as videointelligence
from google.oauth2 import service_account
import io

path = 'Priya-Help-3da0ec6b2c1e.json'

creds = service_account.Credentials.from_service_account_file(path)
video_client = videointelligence.VideoIntelligenceServiceClient(
    credentials=creds)
features = [videointelligence.enums.Feature.OBJECT_TRACKING]

path = "Tikal.mp4"

with io.open(path, 'rb') as file:
    input_content = file.read()

# It is recommended to use location_id as 'us-east1' for the best latency
# due to different types of processors used in this region and others.
operation = video_client.annotate_video(input_content=input_content,
                                        features=features,
                                        location_id='us-east1')
print('\nProcessing video for object annotations.')

result = operation.result(timeout=300)
print('\nFinished processing.\n')

# The first result is retrieved because a single video was processed.
object_annotations = result.annotation_results[0].object_annotations

# Get only the first annotation for demo purposes.