Python ComputerVisionClient.read_in_stream 예제들, azure.cognitiveservices.vision.computervision.ComputerVisionClient.read_in_stream Python 예제들

예제 #1

0

파일 보기

파일: ocr-extract.py 프로젝트: PranavViswanathan/Azure-Cognitive-Services-with-Python---MLSA

def azure_get_data():
    subscription_key = "ACCESS KEY"
    endpoint = "ENDPOINT URL"
    computervision_client = ComputerVisionClient(
        endpoint, CognitiveServicesCredentials(subscription_key))
    #remote_image_url = "https://raw.githubusercontent.com/MicrosoftDocs/azure-docs/master/articles/cognitive-services/Computer-vision/Images/readsample.jpg"

    # Provide the image path
    local_image_handwritten_path = "sample4.jpg"
    local_image_handwritten = open(local_image_handwritten_path, "rb")
    recognize_handwriting_results = computervision_client.read_in_stream(
        local_image_handwritten, raw=True)
    operation_location_remote = recognize_handwriting_results.headers[
        "Operation-Location"]
    # Get Operation ID
    operation_id = operation_location_remote.split("/")[-1]

    # Repeat if Operation ID result status is 'notStarted' or 'running'
    while True:
        get_handw_text_results = computervision_client.get_read_result(
            operation_id)
        if get_handw_text_results.status not in ['notStarted', 'running']:
            break
        time.sleep(1)

    # Print the detected text, line by line
    if get_handw_text_results.status == OperationStatusCodes.succeeded:
        print(type(get_handw_text_results.analyze_result.read_results))
        for text_result in get_handw_text_results.analyze_result.read_results:
            for line in text_result.lines:
                print(line.text)

예제 #2

0

파일 보기

    def image_to_text(self, imagestream):

        result = []
        computervision_client = ComputerVisionClient(
            self.endpoint, CognitiveServicesCredentials(self.subscription_key))
        # Get an image with text
        recognize_handw_results = computervision_client.read_in_stream(
            imagestream, raw=True)
        # Get the operation location (URL with an ID at the end) from the response
        operation_location_remote = recognize_handw_results.headers[
            "Operation-Location"]
        # Grab the ID from the URL
        operation_id = operation_location_remote.split("/")[-1]
        while True:
            get_handw_text_results = computervision_client.get_read_result(
                operation_id)
            if get_handw_text_results.status not in ['notStarted', 'running']:
                break
            time.sleep(1)
        if get_handw_text_results.status == OperationStatusCodes.succeeded:
            for text_result in get_handw_text_results.analyze_result.read_results:
                for line in text_result.lines:
                    result.append(line.text)

        return result

예제 #3

0

파일 보기

파일: convert.py 프로젝트: Bhavna288/ComputerVision

def extractFromHandwritten(file_name):
    # Read the image file
    image_path = os.path.join('static', 'uploads', file_name)
    image_stream = open(image_path, "rb")
    resultString = ""

    # Get a client for the computer vision service
    computervision_client = ComputerVisionClient(
        cog_endpoint, CognitiveServicesCredentials(cog_key))

    # Submit a request to read printed text in the image and get the operation ID
    read_operation = computervision_client.read_in_stream(image_stream,
                                                          raw=True)
    operation_location = read_operation.headers["Operation-Location"]
    operation_id = operation_location.split("/")[-1]

    # Wait for the asynchronous operation to complete
    while True:
        read_results = computervision_client.get_read_result(operation_id)
        if read_results.status not in [OperationStatusCodes.running]:
            break
        time.sleep(1)

    # If the operation was successfuly, process the text line by line
    if read_results.status == OperationStatusCodes.succeeded:
        for result in read_results.analyze_result.read_results:
            for line in result.lines:
                resultString += line.text + "\n"
                print(line.text)

    return resultString

예제 #4

0

파일 보기

파일: ocr_file_img.py 프로젝트: BayernMuller/Examples

from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
from msrest.authentication import CognitiveServicesCredentials
from time import sleep
import cv2

subscription_key = "7a2607ca622b4e449763a5417111f329"
endpoint = "https://bayern.cognitiveservices.azure.com/"
img_path = 'C:\\Users\\cjjun\\Desktop\\img2.jpg'

client = ComputerVisionClient(endpoint,
                              CognitiveServicesCredentials(subscription_key))

img = open(img_path, 'rb')
recognize_results = client.read_in_stream(img, raw=True)
img.close()

location_remote = recognize_results.headers["Operation-Location"]
operation_id = location_remote.split("/")[-1]

while True:
    result = client.get_read_result(operation_id)
    if result.status not in ['notStarted', 'running']:
        break
    sleep(0.5)

img_text = cv2.imread(img_path)
if result.status == OperationStatusCodes.succeeded:
    for text_result in result.analyze_result.read_results:
        for line in text_result.lines:
            box = [int(i) for i in line.bounding_box]

예제 #5

0

파일 보기

END - Read File - remote
'''

'''
OCR: Read File using the Read API, extract text - local
This example extracts text from a local image, then prints results.
This API call can also recognize remote image text (shown in next example, Read File - remote).
'''
print("===== Read File - local =====")
# Get image path
read_image_path = os.path.join (images_folder, "printed_text.jpg")
# Open the image
read_image = open(read_image_path, "rb")

# Call API with image and raw response (allows you to get the operation location)
read_response = computervision_client.read_in_stream(read_image, raw=True)
# Get the operation location (URL with ID as last appendage)
read_operation_location = read_response.headers["Operation-Location"]
# Take the ID off and use to get results
operation_id = read_operation_location.split("/")[-1]

# Call the "GET" API and wait for the retrieval of the results
while True:
    read_result = computervision_client.get_read_result(operation_id)
    if read_result.status.lower () not in ['notstarted', 'running']:
        break
    print ('Waiting for result...')
    time.sleep(10)

# Print results, line by line
if read_result.status == OperationStatusCodes.succeeded:

예제 #6

0

파일 보기

def main():
    tello = Tello()
    tello.connect()
    tello.streamon()

    frame_read = tello.get_frame_read()

    tello.takeoff()
    time.sleep(4)
    tello.send_rc_control(0, 0, 70, 0)
    time.sleep(1.5)
    tello.send_rc_control(0, 0, 0, 0)

    try:
        # Configure Azure Computer vision
        computervision_client = ComputerVisionClient(
            COGNITIVESVC_ENDPOINT,
            CognitiveServicesCredentials(SUBSCRIPTION_KEY))

        # Configure MediaPipe hands recognizer
        mp_drawing = mp.solutions.drawing_utils
        mp_hands = mp.solutions.hands
        hands = mp_hands.Hands(max_num_hands=2,
                               min_detection_confidence=0.8,
                               min_tracking_confidence=0.5)

        while True:
            # Get frame
            original_frame = frame_read.frame

            frame = cv2.flip(original_frame, 1)
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame.flags.writeable = False  # Enabled pass by reference and improves performance

            num_of_fingers = -1
            results = hands.process(frame)
            if results.multi_hand_landmarks:
                for hand_landmarks in results.multi_hand_landmarks:
                    mp_drawing.draw_landmarks(frame, hand_landmarks,
                                              mp_hands.HAND_CONNECTIONS)

                    num_of_fingers = num_of_fingers + count_finger(
                        hand_landmarks, mp_hands.HandLandmark.PINKY_MCP,
                        mp_hands.HandLandmark.THUMB_TIP,
                        mp_hands.HandLandmark.THUMB_IP)

                    num_of_fingers = num_of_fingers + count_finger(
                        hand_landmarks, mp_hands.HandLandmark.WRIST,
                        mp_hands.HandLandmark.INDEX_FINGER_TIP,
                        mp_hands.HandLandmark.INDEX_FINGER_PIP)

                    num_of_fingers = num_of_fingers + count_finger(
                        hand_landmarks, mp_hands.HandLandmark.WRIST,
                        mp_hands.HandLandmark.MIDDLE_FINGER_TIP,
                        mp_hands.HandLandmark.MIDDLE_FINGER_PIP)

                    num_of_fingers = num_of_fingers + count_finger(
                        hand_landmarks, mp_hands.HandLandmark.WRIST,
                        mp_hands.HandLandmark.RING_FINGER_TIP,
                        mp_hands.HandLandmark.RING_FINGER_PIP)

                    num_of_fingers = num_of_fingers + count_finger(
                        hand_landmarks, mp_hands.HandLandmark.WRIST,
                        mp_hands.HandLandmark.PINKY_TIP,
                        mp_hands.HandLandmark.PINKY_PIP)

            print("Number of fingers: " + str(num_of_fingers))

            # Show image
            cv2.imshow('Webcam', frame)

            # Exit when user press ESC key
            k = cv2.waitKey(3) & 0xFF
            if k == 27:  # ESC Key
                break

            tello.send_rc_control(0, 0, 0, 0)

            time.sleep(1)

            if num_of_fingers > 0 and num_of_fingers < 10:
                while num_of_fingers > 0:
                    # Get frame
                    ocr_frame = frame_read.frame
                    ocr_frame.flags.writeable = False

                    # Send frame to Microsoft Azure Cognitive Services to detect text in the image
                    _, buf = cv2.imencode(".jpg", ocr_frame)
                    stream = io.BytesIO(buf)
                    recognize_handw_results = computervision_client.read_in_stream(
                        stream, raw=True)

                    # OCR is async. Wait until is completed.
                    operation_location_remote = recognize_handw_results.headers[
                        "Operation-Location"]
                    operation_id = operation_location_remote.split("/")[-1]
                    while True:
                        get_handw_text_results = computervision_client.get_read_result(
                            operation_id)
                        if get_handw_text_results.status not in [
                                'notStarted', 'running'
                        ]:
                            break
                        tello.send_rc_control(0, 0, 0, 0)
                        time.sleep(1)

                    # Mark the detected text, line by line
                    xg = yg = wg = hg = None
                    if get_handw_text_results.status == OperationStatusCodes.succeeded:
                        for text_result in get_handw_text_results.analyze_result.read_results:
                            for line in text_result.lines:
                                for word in line.words:
                                    boundingbox = word.bounding_box
                                    if str(num_of_fingers) in word.text:
                                        xg, yg, wg, hg = (int(boundingbox[0]),
                                                          int(boundingbox[1]),
                                                          int(boundingbox[2] -
                                                              boundingbox[0]),
                                                          int(boundingbox[7] -
                                                              boundingbox[1]))
                                        cv2.rectangle(ocr_frame, (xg, yg),
                                                      (xg + wg, yg + hg),
                                                      (0, 255, 0), 2)
                                    else:
                                        nxg, nyg, nwg, nhg = (
                                            int(boundingbox[0]),
                                            int(boundingbox[1]),
                                            int(boundingbox[2] -
                                                boundingbox[0]),
                                            int(boundingbox[7] -
                                                boundingbox[1]))
                                        cv2.rectangle(ocr_frame, (nxg, nyg),
                                                      (nxg + nwg, nyg + nhg),
                                                      (0, 0, 255), 2)

                    cv2.imshow('Webcam', ocr_frame)

                    # Exit when user press ESC key
                    k = cv2.waitKey(3) & 0xFF
                    if k == 27:  # ESC Key
                        break

                    velocity_fb = velocity_lr = velocity_ud = velocity_yaw = 0
                    if not xg is None:
                        # Move the drone
                        object_center_x = int(xg + (wg / 2))
                        object_center_y = int(yg + (hg / 2))
                        object_size = ((wg**2) + (hg**2))**0.5  # Fast sqrt

                        object_distance = DESIRED_OBJECT_SIZE - object_size
                        if not object_distance == 0:
                            velocity_fb = int(
                                MAX_SPEED_FORWARDBACK *
                                (object_distance / DESIRED_OBJECT_SIZE))

                        frame_shape = ocr_frame.shape
                        # I wrote 'object_center_y + 200' because the camera of Tello drone is slightly inclined to down and that causes the drone to go too high
                        velocity_ud = calculate_velocity(
                            frame_shape[1], object_center_y + 200,
                            MAX_SPEED_UPDOWN * -1)
                        velocity_lr = calculate_velocity(
                            frame_shape[0], object_center_x, MAX_SPEED_LR)

                        if abs(velocity_fb) < 5 and abs(
                                velocity_ud) < 5 and abs(velocity_yaw) < 5:
                            time.sleep(5)
                            break

                        if not velocity_lr == velocity_fb == velocity_ud == velocity_yaw == 0:
                            tello.send_rc_control(velocity_lr, velocity_fb,
                                                  velocity_ud, velocity_yaw)

                time.sleep(MOV_TIME)
                tello.send_rc_control(0, 0, 0, 0)
    finally:
        tello.land()
        tello.streamoff()
        tello.end()

        # When everything done, release the capture
        cv2.destroyAllWindows()

예제 #7

0

파일 보기

파일: ComputerVisionQuickstart.py 프로젝트: zarish7860/cognitive-services-quickstart-code

END - Generate Thumbnail
'''

'''
Read File, recognize handwritten text - local
This example extracts text from a handwritten local image, then prints results.
This API call can also recognize remote image text (shown in next example, Read File - remote).
'''
print("===== Read File - local =====")
# Get image of handwriting
local_image_handwritten_path = os.path.join (images_folder, "handwritten_text.jpg")
# Open the image
local_image_handwritten = open(local_image_handwritten_path, "rb")

# Call API with image and raw response (allows you to get the operation location)
recognize_handwriting_results = computervision_client.read_in_stream(local_image_handwritten, raw=True)
# Get the operation location (URL with ID as last appendage)
operation_location_local = recognize_handwriting_results.headers["Operation-Location"]
# Take the ID off and use to get results
operation_id_local = operation_location_local.split("/")[-1]

# Call the "GET" API and wait for the retrieval of the results
while True:
    recognize_handwriting_result = computervision_client.get_read_result(operation_id_local)
    if recognize_handwriting_result.status.lower () not in ['notstarted', 'running']:
        break
    print ('Waiting for result...')
    time.sleep(10)

# Print results, line by line
if recognize_handwriting_result.status == OperationStatusCodes.succeeded:

예제 #8

0

파일 보기

# Asynchronous call.

if is_url(url):
    request = requests.get(url)
    if request.status_code != 200:
        sys.exit("The URL does not appear to exist. Please check.\n" f"{url}")
    try:
        rawHttpResponse = client.read(url, raw=raw)
    except Exception as e:
        catch_exception(e, url)

else:
    path = os.path.join(get_cmd_cwd(), url)
    with open(path, 'rb') as fstream:
        try:
            rawHttpResponse = client.read_in_stream(fstream, raw=raw)
        except Exception as e:
            catch_exception(e, path)

# Get ID from returned headers.

operationLocation = rawHttpResponse.headers["Operation-Location"]
idLocation = len(operationLocation) - numberOfCharsInOperationId
operationId = operationLocation[idLocation:]

# Get the result.

while True:
    result = client.get_read_result(operationId)
    if result.status not in [
            OperationStatusCodes.not_started, OperationStatusCodes.running

예제 #9

0

파일 보기

파일: recipe_start.py 프로젝트: puthurr/ocrlayout-recipes

def azure_batch_read_in_stream(filename=None, callOCR=True, verbose=False):
    """RecognizeTextUsingBatchReadAPI.
    This will recognize text of the given image using the Batch Read API.
    """
    import time
    #
    # Azure Specific
    #
    SUBSCRIPTION_KEY_ENV_NAME = os.environ.get(
        "COMPUTERVISION_SUBSCRIPTION_KEY", None)
    COMPUTERVISION_LOCATION = os.environ.get("COMPUTERVISION_LOCATION",
                                             "westeurope")

    azure_client = ComputerVisionClient(
        endpoint="https://" + COMPUTERVISION_LOCATION +
        ".api.cognitive.microsoft.com/",
        credentials=CognitiveServicesCredentials(SUBSCRIPTION_KEY_ENV_NAME))
    print("AZURE Image Name {}".format(filename))
    p = Path(filename)
    (imgname, imgext) = os.path.splitext(p.name)

    # Check if we have a cached ocr response already for this provider
    invokeOCR = callOCR
    if not callOCR:
        if not os.path.exists(
                os.path.join(RESULTS_FOLDER, imgname + ".azure.read.json")):
            invokeOCR = True

    if invokeOCR:
        # Azure Computer Vision Call
        # with open(os.path.join(IMAGES_FOLDER, filename), "rb") as image_stream:
        with open(filename, "rb") as image_stream:
            job = azure_client.read_in_stream(image=image_stream, raw=True)
        operation_id = job.headers['Operation-Location'].split('/')[-1]

        image_analysis = azure_client.get_read_result(operation_id, raw=True)
        while image_analysis.output.status in ['notstarted', 'running']:
            time.sleep(1)
            image_analysis = azure_client.get_read_result(
                operation_id=operation_id, raw=True)
        print("\tJob completion is: {}".format(image_analysis.output.status))
        print("\tRecognized {} page(s)".format(
            len(image_analysis.output.analyze_result.read_results)))

        with open(os.path.join(RESULTS_FOLDER, imgname + ".azure.read.json"),
                  'w') as outfile:
            outfile.write(image_analysis.response.content.decode("utf-8"))
        ocrresponse = image_analysis.response.content.decode("utf-8")
    else:
        # Use local OCR cached response when available
        with open(os.path.join(RESULTS_FOLDER, imgname + ".azure.read.json"),
                  'r') as cachefile:
            ocrresponse = cachefile.read().replace('\n', '')

    # Convert the original ocrresponse into proper object
    ocrresponse = BBOXOCRResponse.from_azure(json.loads(ocrresponse))

    # load the original response to get the text as-is
    original_text = ""
    for page in ocrresponse.pages:
        for line in page.lines:
            original_text += (line.text)
            original_text += ('\n')

    with open(os.path.join(RESULTS_FOLDER, imgname + ".before.azure.read.txt"),
              'w') as outfile:
        outfile.write(original_text)

    # Create BBOX OCR Response from Azure CV string response
    bboxresponse = BBoxHelper(
        verbose=verbose).processAzureOCRResponse(ocrresponse)
    with open(os.path.join(RESULTS_FOLDER, imgname + ".azure.bbox.json"),
              'w') as outfile:
        outfile.write(
            json.dumps(bboxresponse.__dict__,
                       default=lambda o: o.__dict__,
                       indent=4))

    with open(os.path.join(RESULTS_FOLDER, imgname + ".after.azure.read.txt"),
              'w') as outfile:
        outfile.write(bboxresponse.text)

    return (original_text, bboxresponse.text)