def image_analysis_in_stream(subscription_key): """ImageAnalysisInStream. This will analysis an image from a stream and return all available features. """ client = ComputerVisionAPI(COMPUTERVISION_LOCATION, CognitiveServicesCredentials(subscription_key)) with open(os.path.join(IMAGES_FOLDER, "house.jpg"), "rb") as image_stream: image_analysis = client.analyze_image_in_stream( image_stream, visual_features=[ VisualFeatureTypes.image_type, # Could use simple str "ImageType" VisualFeatureTypes.faces, # Could use simple str "Faces" VisualFeatureTypes.categories, # Could use simple str "Categories" VisualFeatureTypes.color, # Could use simple str "Color" VisualFeatureTypes.tags, # Could use simple str "Tags" VisualFeatureTypes.description # Could use simple str "Description" ] ) print("This image can be described as: {}\n".format(image_analysis.description.captions[0].text)) print("Tags associated with this image:\nTag\t\tConfidence") for tag in image_analysis.tags: print("{}\t\t{}".format(tag.name, tag.confidence)) print("\nThe primary colors of this image are: {}".format(image_analysis.color.dominant_colors))
def recognize_text(subscription_key): """RecognizeTextUsingRecognizeAPI. This will recognize text of the given image using the recognizeText API. """ import time client = ComputerVisionAPI(COMPUTERVISION_LOCATION, CognitiveServicesCredentials(subscription_key)) with open(os.path.join(IMAGES_FOLDER, "make_things_happen.jpg"), "rb") as image_stream: job = client.recognize_text_in_stream( image_stream, mode="Printed", raw=True ) operation_id = job.headers['Operation-Location'].split('/')[-1] image_analysis = client.get_text_operation_result(operation_id) while image_analysis.status in ['NotStarted', 'Running']: time.sleep(1) image_analysis = client.get_text_operation_result(operation_id) print("Job completion is: {}\n".format(image_analysis.status)) print("Recognized:\n") lines = image_analysis.recognition_result.lines print(lines[0].words[0].text) # "make" print(lines[1].words[0].text) # "things" print(lines[2].words[0].text) # "happen"
def whatsthat(): try: now = datetime.datetime.now() timer = str(now.date()) + str(now.hour) + str(now.minute) + str( now.second) name_docu = os.path.join(os.getcwd(), 'tempimages', timer + '.jpeg') cap = cv2.VideoCapture(0) r, image = cap.read() cv2.imwrite(name_docu, image) cap.release() # import cv2 # img = cv2.imread('makeharvard.png') # cv2.imwrite(timer+'.jpeg',img) source = tinify.from_file( os.path.join(os.getcwd(), 'tempimages', timer + '.jpeg')) url = source.url # Get region and key from environment variables region = 'westcentralus' key = '3202352b4d3e49678aa066c513ae0ef2' # Set credentials credentials = CognitiveServicesCredentials(key) # Create client client = ComputerVisionAPI(region, credentials=credentials) # url = "http://www.public-domain-photos.com/free-stock-photos-4/travel/san-francisco/golden-gate-bridge-in-san-francisco.jpg" language = "en" max_descriptions = str(3) analysis = client.describe_image(url, max_descriptions, language) if len(analysis.captions) > 0: captionn = analysis.captions[0].text print(captionn) modular_speech(captionn) else: save_speech('unknownError') except Exception as e: print(e)
def main(): client = ComputerVisionAPI( COMPUTER_VISION_API_LOCATION, CognitiveServicesCredentials(COMPUTER_VISION_API_KEY)) with open('image.jpg', 'rb') as image_stream: image_analysis = client.analyze_image_in_stream( image_stream, visual_features=[ VisualFeatureTypes.tags, VisualFeatureTypes.description ]) print("Image description: {}".format( image_analysis.description.captions[0].text)) print("Tags:\nTag\t\tConfidence") for tag in image_analysis.tags: print("{}\t\t{}".format(tag.name, tag.confidence))
def recognize_printed_text_in_stream(subscription_key): """RecognizedPrintedTextUsingOCR_API. This will do an OCR analysis of the given image. """ import time client = ComputerVisionAPI(COMPUTERVISION_LOCATION, CognitiveServicesCredentials(subscription_key)) with open(os.path.join(IMAGES_FOLDER, "computer_vision_ocr.png"), "rb") as image_stream: image_analysis = client.recognize_printed_text_in_stream( image_stream, language="en" ) lines = image_analysis.regions[0].lines print("Recognized:\n") for line in lines: line_text = " ".join([word.text for word in line.words]) print(line_text)
def retrieve_text_from_url(imgurl): client = ComputerVisionAPI(COMPUTERVISION_LOCATION, CognitiveServicesCredentials(COMP_VIS_SUBSCRIPTION_KEY)) txt_analysis2=client.recognize_text(imgurl,raw=True, mode='Printed') #give Computer Vision some time to process image, could also be a while loop checking status (20s is arbitrary) time.sleep(20) #Operation-Location contains url to results, use it to get the processed JSON results headers = {'Ocp-Apim-Subscription-Key':COMP_VIS_SUBSCRIPTION_KEY} url = txt_analysis2.response.headers['Operation-Location'] return json.loads(requests.get(url, headers=headers).text)
def whatsthat(): now = datetime.datetime.now() timer = str(now.date()) + str(now.hour) + str(now.minute) + str(now.second) with picamera.PiCamera() as camera: # set camera resolution camera.resolution = cameraResolution # print("Starting camera preview...") camera.capture(timer+ '.jpeg', format='jpeg') camera.close() # import cv2 # img = cv2.imread('makeharvard.png') # cv2.imwrite(timer+'.jpeg',img) source = tinify.from_file(os.path.join(os.getcwd(), timer + '.jpeg')) url = source.url # Get region and key from environment variables region = 'westcentralus' key = '43977e2279b849c1bb5c463387b37307' # Set credentials credentials = CognitiveServicesCredentials(key) # Create client client = ComputerVisionAPI(region, credentials=credentials) # url = "http://www.public-domain-photos.com/free-stock-photos-4/travel/san-francisco/golden-gate-bridge-in-san-francisco.jpg" language = "en" max_descriptions = str(3) analysis = client.describe_image(url,max_descriptions, language) captionn = analysis.captions[0].text modular_speech(captionn)
def retrieve_text_from_img(img): client = ComputerVisionAPI(COMPUTERVISION_LOCATION, CognitiveServicesCredentials(COMP_VIS_SUBSCRIPTION_KEY)) #raw - returns the direct response alongside the deserialized response with open(os.path.join(IMAGES_FOLDER, img), "rb") as image_stream: txt_analysis2=client.recognize_text_in_stream(image_stream,raw=True) #give Computer Vision some time to process image, could also be a while loop checking status (20s is arbitrary) time.sleep(20) #Operation-Location contains url to results, use it to get the processed JSON results headers = {'Ocp-Apim-Subscription-Key':COMP_VIS_SUBSCRIPTION_KEY} url = txt_analysis2.response.headers['Operation-Location'] return json.loads(requests.get(url, headers=headers).text)
def readit(): try: region = 'westcentralus' key = '3202352b4d3e49678aa066c513ae0ef2' # Set credentials credentials = CognitiveServicesCredentials(key) # Create client client = ComputerVisionAPI(region, credentials=credentials) name_docu = 'tempread.jpeg' cap = cv2.VideoCapture(0) r, image = cap.read() cv2.imwrite(name_docu, image) cap.release() source = tinify.from_file(os.path.join(os.getcwd(), name_docu)) url = source.url mode = TextRecognitionMode.handwritten raw = True custom_headers = None numberOfCharsInOperationId = 36 save_speech('waitForText') # Async SDK call rawHttpResponse = client.recognize_text(url, mode, custom_headers, raw) # Get ID from returned headers operationLocation = rawHttpResponse.headers["Operation-Location"] idLocation = len(operationLocation) - numberOfCharsInOperationId operationId = operationLocation[idLocation:] result = client.get_text_operation_result(operationId) # SDK call while result.status in ['NotStarted', 'Running']: time.sleep(1) result = client.get_text_operation_result(operationId) # Get data main_string = '' if result.status == TextOperationStatusCodes.succeeded: for line in result.recognition_result.lines: main_string = main_string + ' ' + line.text main_string = re.sub("!|/|;|:|-", "", main_string) main_string = main_string.replace('|', '') main_string = main_string.replace('*', '') modular_speech(main_string) # sent_token = main_string.split('.') # # for sente in sent_token: # modular_speech(sente) # if keyboard.is_pressed('d'): # break else: save_speech('unknownError') except Exception: pass
def readit(): region = 'westcentralus' key = '43977e2279b849c1bb5c463387b37307' # Set credentials credentials = CognitiveServicesCredentials(key) # Create client client = ComputerVisionAPI(region, credentials=credentials) with picamera.PiCamera() as camera: # set camera resolution camera.resolution = cameraResolution # print("Starting camera preview...") name_docu = 'tempread.jpeg' camera.capture(name_docu, format='jpeg') camera.close() source = tinify.from_file(os.path.join(os.getcwd(), name_docu + '.jpeg')) url = source.url mode = TextRecognitionMode.handwritten raw = True custom_headers = None numberOfCharsInOperationId = 36 save_speech('waitForText') # Async SDK call rawHttpResponse = client.recognize_text(url, mode, custom_headers, raw) # Get ID from returned headers operationLocation = rawHttpResponse.headers["Operation-Location"] idLocation = len(operationLocation) - numberOfCharsInOperationId operationId = operationLocation[idLocation:] result = client.get_text_operation_result(operationId) # SDK call while result.status in ['NotStarted', 'Running']: time.sleep(1) result = client.get_text_operation_result(operationId) # Get data main_string = '' if result.status == TextOperationStatusCodes.succeeded: for line in result.recognition_result.lines: main_string = main_string + ' ' + line.text main_string = re.sub("!|/|;|:|-", "", main_string) main_string = main_string.replace('|','') main_string = main_string.replace('*', '') modular_speech(main_string) # sent_token = main_string.split('.') # for sente in sent_token: # modular_speech(sente) # if GPIO.input(cn1) == 0: # break else: save_speech('unknownError')