def azure_get_data(): subscription_key = "ACCESS KEY" endpoint = "ENDPOINT URL" computervision_client = ComputerVisionClient( endpoint, CognitiveServicesCredentials(subscription_key)) #remote_image_url = "https://raw.githubusercontent.com/MicrosoftDocs/azure-docs/master/articles/cognitive-services/Computer-vision/Images/readsample.jpg" # Provide the image path local_image_handwritten_path = "sample4.jpg" local_image_handwritten = open(local_image_handwritten_path, "rb") recognize_handwriting_results = computervision_client.read_in_stream( local_image_handwritten, raw=True) operation_location_remote = recognize_handwriting_results.headers[ "Operation-Location"] # Get Operation ID operation_id = operation_location_remote.split("/")[-1] # Repeat if Operation ID result status is 'notStarted' or 'running' while True: get_handw_text_results = computervision_client.get_read_result( operation_id) if get_handw_text_results.status not in ['notStarted', 'running']: break time.sleep(1) # Print the detected text, line by line if get_handw_text_results.status == OperationStatusCodes.succeeded: print(type(get_handw_text_results.analyze_result.read_results)) for text_result in get_handw_text_results.analyze_result.read_results: for line in text_result.lines: print(line.text)
def image_to_text(self, imagestream): result = [] computervision_client = ComputerVisionClient( self.endpoint, CognitiveServicesCredentials(self.subscription_key)) # Get an image with text recognize_handw_results = computervision_client.read_in_stream( imagestream, raw=True) # Get the operation location (URL with an ID at the end) from the response operation_location_remote = recognize_handw_results.headers[ "Operation-Location"] # Grab the ID from the URL operation_id = operation_location_remote.split("/")[-1] while True: get_handw_text_results = computervision_client.get_read_result( operation_id) if get_handw_text_results.status not in ['notStarted', 'running']: break time.sleep(1) if get_handw_text_results.status == OperationStatusCodes.succeeded: for text_result in get_handw_text_results.analyze_result.read_results: for line in text_result.lines: result.append(line.text) return result
def extractFromHandwritten(file_name): # Read the image file image_path = os.path.join('static', 'uploads', file_name) image_stream = open(image_path, "rb") resultString = "" # Get a client for the computer vision service computervision_client = ComputerVisionClient( cog_endpoint, CognitiveServicesCredentials(cog_key)) # Submit a request to read printed text in the image and get the operation ID read_operation = computervision_client.read_in_stream(image_stream, raw=True) operation_location = read_operation.headers["Operation-Location"] operation_id = operation_location.split("/")[-1] # Wait for the asynchronous operation to complete while True: read_results = computervision_client.get_read_result(operation_id) if read_results.status not in [OperationStatusCodes.running]: break time.sleep(1) # If the operation was successfuly, process the text line by line if read_results.status == OperationStatusCodes.succeeded: for result in read_results.analyze_result.read_results: for line in result.lines: resultString += line.text + "\n" print(line.text) return resultString
from azure.cognitiveservices.vision.computervision import ComputerVisionClient from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes from msrest.authentication import CognitiveServicesCredentials from time import sleep import cv2 subscription_key = "7a2607ca622b4e449763a5417111f329" endpoint = "https://bayern.cognitiveservices.azure.com/" img_path = 'C:\\Users\\cjjun\\Desktop\\img2.jpg' client = ComputerVisionClient(endpoint, CognitiveServicesCredentials(subscription_key)) img = open(img_path, 'rb') recognize_results = client.read_in_stream(img, raw=True) img.close() location_remote = recognize_results.headers["Operation-Location"] operation_id = location_remote.split("/")[-1] while True: result = client.get_read_result(operation_id) if result.status not in ['notStarted', 'running']: break sleep(0.5) img_text = cv2.imread(img_path) if result.status == OperationStatusCodes.succeeded: for text_result in result.analyze_result.read_results: for line in text_result.lines: box = [int(i) for i in line.bounding_box]
END - Read File - remote ''' ''' OCR: Read File using the Read API, extract text - local This example extracts text from a local image, then prints results. This API call can also recognize remote image text (shown in next example, Read File - remote). ''' print("===== Read File - local =====") # Get image path read_image_path = os.path.join (images_folder, "printed_text.jpg") # Open the image read_image = open(read_image_path, "rb") # Call API with image and raw response (allows you to get the operation location) read_response = computervision_client.read_in_stream(read_image, raw=True) # Get the operation location (URL with ID as last appendage) read_operation_location = read_response.headers["Operation-Location"] # Take the ID off and use to get results operation_id = read_operation_location.split("/")[-1] # Call the "GET" API and wait for the retrieval of the results while True: read_result = computervision_client.get_read_result(operation_id) if read_result.status.lower () not in ['notstarted', 'running']: break print ('Waiting for result...') time.sleep(10) # Print results, line by line if read_result.status == OperationStatusCodes.succeeded:
def main(): tello = Tello() tello.connect() tello.streamon() frame_read = tello.get_frame_read() tello.takeoff() time.sleep(4) tello.send_rc_control(0, 0, 70, 0) time.sleep(1.5) tello.send_rc_control(0, 0, 0, 0) try: # Configure Azure Computer vision computervision_client = ComputerVisionClient( COGNITIVESVC_ENDPOINT, CognitiveServicesCredentials(SUBSCRIPTION_KEY)) # Configure MediaPipe hands recognizer mp_drawing = mp.solutions.drawing_utils mp_hands = mp.solutions.hands hands = mp_hands.Hands(max_num_hands=2, min_detection_confidence=0.8, min_tracking_confidence=0.5) while True: # Get frame original_frame = frame_read.frame frame = cv2.flip(original_frame, 1) frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame.flags.writeable = False # Enabled pass by reference and improves performance num_of_fingers = -1 results = hands.process(frame) if results.multi_hand_landmarks: for hand_landmarks in results.multi_hand_landmarks: mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS) num_of_fingers = num_of_fingers + count_finger( hand_landmarks, mp_hands.HandLandmark.PINKY_MCP, mp_hands.HandLandmark.THUMB_TIP, mp_hands.HandLandmark.THUMB_IP) num_of_fingers = num_of_fingers + count_finger( hand_landmarks, mp_hands.HandLandmark.WRIST, mp_hands.HandLandmark.INDEX_FINGER_TIP, mp_hands.HandLandmark.INDEX_FINGER_PIP) num_of_fingers = num_of_fingers + count_finger( hand_landmarks, mp_hands.HandLandmark.WRIST, mp_hands.HandLandmark.MIDDLE_FINGER_TIP, mp_hands.HandLandmark.MIDDLE_FINGER_PIP) num_of_fingers = num_of_fingers + count_finger( hand_landmarks, mp_hands.HandLandmark.WRIST, mp_hands.HandLandmark.RING_FINGER_TIP, mp_hands.HandLandmark.RING_FINGER_PIP) num_of_fingers = num_of_fingers + count_finger( hand_landmarks, mp_hands.HandLandmark.WRIST, mp_hands.HandLandmark.PINKY_TIP, mp_hands.HandLandmark.PINKY_PIP) print("Number of fingers: " + str(num_of_fingers)) # Show image cv2.imshow('Webcam', frame) # Exit when user press ESC key k = cv2.waitKey(3) & 0xFF if k == 27: # ESC Key break tello.send_rc_control(0, 0, 0, 0) time.sleep(1) if num_of_fingers > 0 and num_of_fingers < 10: while num_of_fingers > 0: # Get frame ocr_frame = frame_read.frame ocr_frame.flags.writeable = False # Send frame to Microsoft Azure Cognitive Services to detect text in the image _, buf = cv2.imencode(".jpg", ocr_frame) stream = io.BytesIO(buf) recognize_handw_results = computervision_client.read_in_stream( stream, raw=True) # OCR is async. Wait until is completed. operation_location_remote = recognize_handw_results.headers[ "Operation-Location"] operation_id = operation_location_remote.split("/")[-1] while True: get_handw_text_results = computervision_client.get_read_result( operation_id) if get_handw_text_results.status not in [ 'notStarted', 'running' ]: break tello.send_rc_control(0, 0, 0, 0) time.sleep(1) # Mark the detected text, line by line xg = yg = wg = hg = None if get_handw_text_results.status == OperationStatusCodes.succeeded: for text_result in get_handw_text_results.analyze_result.read_results: for line in text_result.lines: for word in line.words: boundingbox = word.bounding_box if str(num_of_fingers) in word.text: xg, yg, wg, hg = (int(boundingbox[0]), int(boundingbox[1]), int(boundingbox[2] - boundingbox[0]), int(boundingbox[7] - boundingbox[1])) cv2.rectangle(ocr_frame, (xg, yg), (xg + wg, yg + hg), (0, 255, 0), 2) else: nxg, nyg, nwg, nhg = ( int(boundingbox[0]), int(boundingbox[1]), int(boundingbox[2] - boundingbox[0]), int(boundingbox[7] - boundingbox[1])) cv2.rectangle(ocr_frame, (nxg, nyg), (nxg + nwg, nyg + nhg), (0, 0, 255), 2) cv2.imshow('Webcam', ocr_frame) # Exit when user press ESC key k = cv2.waitKey(3) & 0xFF if k == 27: # ESC Key break velocity_fb = velocity_lr = velocity_ud = velocity_yaw = 0 if not xg is None: # Move the drone object_center_x = int(xg + (wg / 2)) object_center_y = int(yg + (hg / 2)) object_size = ((wg**2) + (hg**2))**0.5 # Fast sqrt object_distance = DESIRED_OBJECT_SIZE - object_size if not object_distance == 0: velocity_fb = int( MAX_SPEED_FORWARDBACK * (object_distance / DESIRED_OBJECT_SIZE)) frame_shape = ocr_frame.shape # I wrote 'object_center_y + 200' because the camera of Tello drone is slightly inclined to down and that causes the drone to go too high velocity_ud = calculate_velocity( frame_shape[1], object_center_y + 200, MAX_SPEED_UPDOWN * -1) velocity_lr = calculate_velocity( frame_shape[0], object_center_x, MAX_SPEED_LR) if abs(velocity_fb) < 5 and abs( velocity_ud) < 5 and abs(velocity_yaw) < 5: time.sleep(5) break if not velocity_lr == velocity_fb == velocity_ud == velocity_yaw == 0: tello.send_rc_control(velocity_lr, velocity_fb, velocity_ud, velocity_yaw) time.sleep(MOV_TIME) tello.send_rc_control(0, 0, 0, 0) finally: tello.land() tello.streamoff() tello.end() # When everything done, release the capture cv2.destroyAllWindows()
END - Generate Thumbnail ''' ''' Read File, recognize handwritten text - local This example extracts text from a handwritten local image, then prints results. This API call can also recognize remote image text (shown in next example, Read File - remote). ''' print("===== Read File - local =====") # Get image of handwriting local_image_handwritten_path = os.path.join (images_folder, "handwritten_text.jpg") # Open the image local_image_handwritten = open(local_image_handwritten_path, "rb") # Call API with image and raw response (allows you to get the operation location) recognize_handwriting_results = computervision_client.read_in_stream(local_image_handwritten, raw=True) # Get the operation location (URL with ID as last appendage) operation_location_local = recognize_handwriting_results.headers["Operation-Location"] # Take the ID off and use to get results operation_id_local = operation_location_local.split("/")[-1] # Call the "GET" API and wait for the retrieval of the results while True: recognize_handwriting_result = computervision_client.get_read_result(operation_id_local) if recognize_handwriting_result.status.lower () not in ['notstarted', 'running']: break print ('Waiting for result...') time.sleep(10) # Print results, line by line if recognize_handwriting_result.status == OperationStatusCodes.succeeded:
# Asynchronous call. if is_url(url): request = requests.get(url) if request.status_code != 200: sys.exit("The URL does not appear to exist. Please check.\n" f"{url}") try: rawHttpResponse = client.read(url, raw=raw) except Exception as e: catch_exception(e, url) else: path = os.path.join(get_cmd_cwd(), url) with open(path, 'rb') as fstream: try: rawHttpResponse = client.read_in_stream(fstream, raw=raw) except Exception as e: catch_exception(e, path) # Get ID from returned headers. operationLocation = rawHttpResponse.headers["Operation-Location"] idLocation = len(operationLocation) - numberOfCharsInOperationId operationId = operationLocation[idLocation:] # Get the result. while True: result = client.get_read_result(operationId) if result.status not in [ OperationStatusCodes.not_started, OperationStatusCodes.running
def azure_batch_read_in_stream(filename=None, callOCR=True, verbose=False): """RecognizeTextUsingBatchReadAPI. This will recognize text of the given image using the Batch Read API. """ import time # # Azure Specific # SUBSCRIPTION_KEY_ENV_NAME = os.environ.get( "COMPUTERVISION_SUBSCRIPTION_KEY", None) COMPUTERVISION_LOCATION = os.environ.get("COMPUTERVISION_LOCATION", "westeurope") azure_client = ComputerVisionClient( endpoint="https://" + COMPUTERVISION_LOCATION + ".api.cognitive.microsoft.com/", credentials=CognitiveServicesCredentials(SUBSCRIPTION_KEY_ENV_NAME)) print("AZURE Image Name {}".format(filename)) p = Path(filename) (imgname, imgext) = os.path.splitext(p.name) # Check if we have a cached ocr response already for this provider invokeOCR = callOCR if not callOCR: if not os.path.exists( os.path.join(RESULTS_FOLDER, imgname + ".azure.read.json")): invokeOCR = True if invokeOCR: # Azure Computer Vision Call # with open(os.path.join(IMAGES_FOLDER, filename), "rb") as image_stream: with open(filename, "rb") as image_stream: job = azure_client.read_in_stream(image=image_stream, raw=True) operation_id = job.headers['Operation-Location'].split('/')[-1] image_analysis = azure_client.get_read_result(operation_id, raw=True) while image_analysis.output.status in ['notstarted', 'running']: time.sleep(1) image_analysis = azure_client.get_read_result( operation_id=operation_id, raw=True) print("\tJob completion is: {}".format(image_analysis.output.status)) print("\tRecognized {} page(s)".format( len(image_analysis.output.analyze_result.read_results))) with open(os.path.join(RESULTS_FOLDER, imgname + ".azure.read.json"), 'w') as outfile: outfile.write(image_analysis.response.content.decode("utf-8")) ocrresponse = image_analysis.response.content.decode("utf-8") else: # Use local OCR cached response when available with open(os.path.join(RESULTS_FOLDER, imgname + ".azure.read.json"), 'r') as cachefile: ocrresponse = cachefile.read().replace('\n', '') # Convert the original ocrresponse into proper object ocrresponse = BBOXOCRResponse.from_azure(json.loads(ocrresponse)) # load the original response to get the text as-is original_text = "" for page in ocrresponse.pages: for line in page.lines: original_text += (line.text) original_text += ('\n') with open(os.path.join(RESULTS_FOLDER, imgname + ".before.azure.read.txt"), 'w') as outfile: outfile.write(original_text) # Create BBOX OCR Response from Azure CV string response bboxresponse = BBoxHelper( verbose=verbose).processAzureOCRResponse(ocrresponse) with open(os.path.join(RESULTS_FOLDER, imgname + ".azure.bbox.json"), 'w') as outfile: outfile.write( json.dumps(bboxresponse.__dict__, default=lambda o: o.__dict__, indent=4)) with open(os.path.join(RESULTS_FOLDER, imgname + ".after.azure.read.txt"), 'w') as outfile: outfile.write(bboxresponse.text) return (original_text, bboxresponse.text)