def detect_text_gcp(image_bytes: bytes) -> List[Text]: """ Given an image in bytes, recognize text in the image and return it as a list of text chunks :param image_bytes: image in bytes to perform OCR on """ client = vision.ImageAnnotatorClient() image = vision.types.Image(content=image_bytes) image_context = vision.types.ImageContext(language_hints=['en']) response = client.document_text_detection(image=image, image_context=image_context) text_chunks = [] for page in response.full_text_annotation.pages: for block in page.blocks: for paragraph in block.paragraphs: chunk = '' for word in paragraph.words: for symbol in word.symbols: chunk = chunk + symbol.text text_chunks.append(chunk) return text_chunks
def detect_faces(path): """Detects faces in an image.""" client = vision.ImageAnnotatorClient() with io.open(path, 'rb') as image_file: content = image_file.read() image = vision.types.Image(content=content) response = client.face_detection(image=image) faces = response.face_annotations result = [] for i, face in enumerate(faces): print("Face {} has pan angle of {}".format(i, face.pan_angle)) result.append({ "pan": face.pan_angle, "tilt": face.tilt_angle, "joy": face.joy_likelihood, "headwear": face.headwear_likelihood, "bounding_poly": [(v.x, v.y) for v in face.bounding_poly.vertices] }) return result
def detect_handwritten_ocr(path): """Detects handwritten characters in a local image. Args: path: The path to the local file. """ client = vision.ImageAnnotatorClient() with io.open(path, 'rb') as image_file: content = image_file.read() image = vision.types.Image(content=content) # Language hint codes for handwritten OCR: # en-t-i0-handwrit, mul-Latn-t-i0-handwrit # Note: Use only one language hint code per request for handwritten OCR. image_context = vision.types.ImageContext( language_hints=['en-t-i0-handwrit']) response = client.document_text_detection(image=image, image_context=image_context) #print('Full Text: {}'.format(response.full_text_annotation.text)) return (response.full_text_annotation.text)
def img_hashtags(url): client = vision.ImageAnnotatorClient() img=requests.get(url).content image=vision.types.Image(content=img) response=client.label_detection(image=image) labels=response.label_annotations web_entity_response=client.web_detection(image=image) web_entity=web_entity_response.web_detection hashtag_string='' if web_entity.web_entities: for label in web_entity.web_entities: hashtag=label.description hashtag_baby='' for i in hashtag: if i!=' ': hashtag_baby+=i hashtag_string+='#'+hashtag_baby+' ' for hashtag_label in labels: hashtag=hashtag_label.description hashtag_baby='' for i in hashtag: if i!=' ': hashtag_baby+=i hashtag_string+='#'+hashtag_baby+' ' return hashtag_string
def recognise_license_plate(img_path): start_time = datetime.now() img = cv2.imread(img_path) height, width = img.shape[:2] img = cv2.resize(img, (800, int((height * 800) / width))) cv2.imshow('Original image', img) cv2.imwrite(SOURCE_PATH + "output.jpg", img) img_path = SOURCE_PATH + "output.jpg" client = vision.ImageAnnotatorClient() with io.open(img_path, 'rb') as image_file: content = image_file.read() image = vision.types.Image(content=content) response = client.text_detection(image=image) texts = response.text_annotations for text in texts: if len(text.description) == 10: license_plate = text.description print('License plate ') vertices = [(vertex.x, vertex.y) for vertex in text.bounding_poly.vertices] cv2.putText(img, license_plate, (200, 200), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255), 3) print(vertices) cv2.rectangle(img, vertices[0], vertices[2], (0, 255, 0), 3) print('Total_time : {}'.format(datetime.now() - start_time)) cv2.imshow('done ', img) cv2.waitKey(0) cv2.imwrite(SOURCE_PATH + 'done1.jpg', img)
def blur_number_plate(img_path): # Read image img = cv2.imread(img_path) # Google Vision client client = vision.ImageAnnotatorClient() with io.open(img_path, 'rb') as image_file: content = image_file.read() image = vision.types.Image(content=content) # Response from client response = client.text_detection(image=image) text = response.text_annotations[0] # Coordinates for bounding box vertices = [(vertex.x, vertex.y) for vertex in text.bounding_poly.vertices] x, x_max = vertices[0][0], vertices[2][0] y, y_max = vertices[0][1], vertices[2][1] # Apply Gaussian blur to area of image within bounding box img[y:y_max, x:x_max] = cv2.GaussianBlur(img[y:y_max, x:x_max], ksize=(0, 0), sigmaX=10) # Save output cv2.imwrite(opt.out_path, img)
def detect_handwritten_ocr(path): """Detects handwritten characters in a local image. Args: path: The path to the local file. """ from google.cloud import vision_v1p3beta1 as vision os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "test_client.json" client = vision.ImageAnnotatorClient() with io.open(path, 'rb') as image_file: content = image_file.read() image = vision.types.Image(content=content) # Language hint codes for handwritten OCR: # en-t-i0-handwrit, mul-Latn-t-i0-handwrit # Note: Use only one language hint code per request for handwritten OCR. image_context = vision.types.ImageContext( language_hints=['en-t-i0-handwrit']) response = client.document_text_detection(image=image, image_context=image_context) print('Full Text: {}'.format(response.full_text_annotation.text))
def test_async_batch_annotate_files(self): # Setup Expected Response expected_response = {} expected_response = image_annotator_pb2.AsyncBatchAnnotateFilesResponse( **expected_response) operation = operations_pb2.Operation( name='operations/test_async_batch_annotate_files', done=True) operation.response.Pack(expected_response) # Mock the API response channel = ChannelStub(responses=[operation]) patch = mock.patch('google.api_core.grpc_helpers.create_channel') with patch as create_channel: create_channel.return_value = channel client = vision_v1p3beta1.ImageAnnotatorClient() # Setup Request requests = [] response = client.async_batch_annotate_files(requests) result = response.result() assert expected_response == result assert len(channel.requests) == 1 expected_request = image_annotator_pb2.AsyncBatchAnnotateFilesRequest( requests=requests) actual_request = channel.requests[0][1] assert expected_request == actual_request
def test_async_batch_annotate_files(self): # Setup Expected Response expected_response = {} expected_response = image_annotator_pb2.AsyncBatchAnnotateFilesResponse( **expected_response) operation = operations_pb2.Operation( name='operations/test_async_batch_annotate_files', done=True) operation.response.Pack(expected_response) # Mock the API response channel = ChannelStub(responses=[operation]) client = vision_v1p3beta1.ImageAnnotatorClient(channel=channel) # Setup Request requests = [] response = client.async_batch_annotate_files(requests) result = response.result() assert expected_response == result assert len(channel.requests) == 1 expected_request = image_annotator_pb2.AsyncBatchAnnotateFilesRequest( requests=requests) actual_request = channel.requests[0][1] assert expected_request == actual_request
def detect_handwritten_ocr(path): """Detects handwritten characters in a local image. Args: path: The path to the local file. """ from google.cloud import vision_v1p3beta1 as vision client = vision.ImageAnnotatorClient() with io.open(path, 'rb') as image_file: content = image_file.read() image = vision.types.Image(content=content) # Language hint codes for handwritten OCR: # en-t-i0-handwrit, mul-Latn-t-i0-handwrit # Note: Use only one language hint code per request for handwritten OCR. image_context = vision.types.ImageContext( language_hints=['en-t-i0-handwrit']) response = client.document_text_detection(image=image, image_context=image_context) print('Full Text: {}'.format(response.full_text_annotation.text)) for page in response.full_text_annotation.pages: for block in page.blocks: print('\nBlock confidence: {}\n'.format(block.confidence)) for paragraph in block.paragraphs: print('Paragraph confidence: {}'.format(paragraph.confidence)) for word in paragraph.words: word_text = ''.join( [symbol.text for symbol in word.symbols]) print('Word text: {} (confidence: {})'.format( word_text, word.confidence)) for symbol in word.symbols: print('\tSymbol: {} (confidence: {})'.format( symbol.text, symbol.confidence))
def recognize_license_plate(img_path): img = cv2.imread(img_path) height, width = img.shape[:2] img = cv2.resize(img, (800, int((height * 800) / width))) cv2.imwrite(SOURCE_PATH + "output.jpg", img) img_path = SOURCE_PATH + "output.jpg" client = vision.ImageAnnotatorClient() with io.open(img_path, 'rb') as image_file: content = image_file.read() image = vision.types.Image(content=content) response = client.text_detection(image=image) texts = response.text_annotations for text in texts: if len(text.description) == 10: license_plate = text.description print(license_plate)
def query_for_digits(img): """ Take an image content, apply various effects to be better parsed. Then query Google Vision API for Document text OCR Args: img: the image content to send Returns: imgs: all the cropped images """ # This is wat works the best for digits apparently ? np_img = np.frombuffer(img, np.uint8) img = cv2.imdecode(np_img, 0) blur = cv2.GaussianBlur(img, (3, 3), 0) _, image_threshed = cv2.threshold(blur, 200, 255, cv2.THRESH_BINARY) # _, image_threshed = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU) _img = cv2.imencode('.jpg', image_threshed)[1].tostring() client = vision.ImageAnnotatorClient() image = vision.types.Image(content=_img) # Adding Deutsch language fixes the digits recognition image_context = vision.types.ImageContext(language_hints=['de']) response = client.document_text_detection(image=image, image_context=image_context) return response.full_text_annotation.text.replace('\n', ' ')
def localize_objects_uri(uri): client = vision.ImageAnnotatorClient() image = vision.types.Image() image.source.image_uri = uri objects = client.object_localization(image=image).localized_object_annotations return objects
def get_similar_products_file(project_id, location, product_set_id, product_category, file_path, filter): """Search similar products to image. Args: project_id: Id of the project. location: A compute region name. product_set_id: Id of the product set. product_category: Category of the product. file_path: Local file path of the image to be searched. filter: Condition to be applied on the labels. Example for filter: (color = red OR color = blue) AND style = kids It will search on all products with the following labels: color:red AND style:kids color:blue AND style:kids """ # product_search_client is needed only for its helper methods. product_search_client = vision.ProductSearchClient() image_annotator_client = vision.ImageAnnotatorClient() # Read the image as a stream of bytes. with open(file_path, 'rb') as image_file: content = image_file.read() # Create annotate image request along with product search feature. image = vision.types.Image(content=content) # product search specific parameters product_set_path = product_search_client.product_set_path( project=project_id, location=location, product_set=product_set_id) product_search_params = vision.types.ProductSearchParams( product_set=product_set_path, product_categories=[product_category], filter=filter) image_context = vision.types.ImageContext( product_search_params=product_search_params) # Search products similar to the image. response = image_annotator_client.product_search( image, image_context=image_context) index_time = response.product_search_results.index_time print('Product set index time:') print(' seconds: {}'.format(index_time.seconds)) print(' nanos: {}\n'.format(index_time.nanos)) results = response.product_search_results.results print('Search results:') for result in results: product = result.product print('Score(Confidence): {}'.format(result.score)) print('Image name: {}'.format(result.image)) print('Product name: {}'.format(product.name)) print('Product display name: {}'.format(product.display_name)) print('Product description: {}\n'.format(product.description)) print('Product labels: {}\n'.format(product.product_labels))
def check_door(door_name): global _DOOR_CHECKER if _DOOR_CHECKER: try: # Set image attribute _DOOR_CHECKER.setResolution(2) # Image of 160*120px _DOOR_CHECKER.setPictureFormat("jpg") # Save top picture _DOOR_CHECKER.setCameraID(0) # Top camera _DOOR_CHECKER.takePicture2(_PEPPER_PATH, _FILENAME, True) except Exception, e: Logger.err("doorChecker.py", "check_door", "Photocapture exeption " + str(e)) # Download File from Pepper to local system Filetransfer.transfer_file_from_pepper_to_local( _PEPPER_PATH + "/" + _FILENAME, "/tmp/" + _FILENAME) os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = _GOOGLE_API_CONFIG # Google Vision API client = vision.ImageAnnotatorClient() with io.open(_LOCAL_PATH + "/" + _FILENAME, 'rb') as image_file: content = image_file.read() image = vision.types.Image(content=content) # Language hint codes for handwritten OCR: # en-t-i0-handwrit, mul-Latn-t-i0-handwrit # Note: Use only one language hint code per request for handwritten OCR. image_context = vision.types.ImageContext( language_hints=['en-t-i0-handwrit']) response = client.document_text_detection( image=image, image_context=image_context) google_api_text_response = response.full_text_annotation.text.strip( ) response_to_check = "".join(google_api_text_response.split()) if door_name in response_to_check: # trying to split room name from response speech_room_name = google_api_text_response.replace( door_name, "").replace("\n", "").strip() if speech_room_name != None and speech_room_name != "": Logger.info( "doorChecker.py", "check_door", "Here is your requested room " + door_name + ". The name of the room is " + speech_room_name) else: Logger.info( "doorChecker.py", "check_door", "Here is your requested room " + door_name + ".") #Houskeeping FS os.remove(_LOCAL_PATH + "/" + _FILENAME) return True
def recognize_license_plate(img_path, vaga, entrada_saida): # Read image with opencv img = cv2.imread(img_path) # Scale image # img = cv2.resize(img, None, fx=.8, fy=.8) # Show the origin image cv2.imshow('Original', img) # Save the image to temp file cv2.imwrite(SOURCE_PATH + 'output.jpg', img) # Create new img path for google vision img_path = SOURCE_PATH + 'output.jpg' # Create google vision client client = vision.ImageAnnotatorClient() # Read image file with io.open(img_path, 'rb') as image_file: content = image_file.read() image = vision.types.Image(content=content) # Recognize text response = client.text_detection(image=image) texts = response.text_annotations # placas_encontradas = [{'placa': None, 'vaga': None}] placas_encontradas = [] for text in texts: placa = placa_regex(text.description) if placa and placa not in placas_encontradas: placas_encontradas.append(placa) hora, entrada = pegar_data_hora_atual()[1], pegar_data_hora_atual()[2] colocar_informacoes_imagem(img, vaga, hora, placa, entrada_saida) entrada_saida_estacionamento(placa, vaga=vaga, entrada=entrada, tipo=entrada_saida) vertices = [(vertex.x, vertex.y) for vertex in text.bounding_poly.vertices] # Draw rectangle around license plate cv2.rectangle(img, (vertices[0][0] - 10, vertices[0][1] - 10), (vertices[2][0] + 10, vertices[2][1] + 10), (0, 255, 0), 3, cv2.LINE_AA) cv2.imshow('Reconhecimento', img) # placas_encontradas.append({'vaga': 'TN1'}) # for placa in placas_encontradas: # entrada_saida_estacionamento(placa['placa'], placa['vaga']) key = cv2.waitKey(0) & 0xFF if key == 27: cv2.destroyAllWindows()
def detect_text(path): client = vision.ImageAnnotatorClient() with io.open(path, 'rb') as image_file: content = image_file.read() image = vision.types.Image(content=content) response = client.text_detection(image=image) texts = response.text_annotations if len(texts) != 0: return re.sub('[^A-Za-z0-9]+', '', texts[0].description) return "None"
def test_batch_annotate_images_exception(self): # Mock the API response channel = ChannelStub(responses=[CustomException()]) client = vision_v1p3beta1.ImageAnnotatorClient(channel=channel) # Setup request requests = [] with pytest.raises(CustomException): client.batch_annotate_images(requests)
def test_batch_annotate_images_exception(self): # Mock the API response channel = ChannelStub(responses=[CustomException()]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = vision_v1p3beta1.ImageAnnotatorClient() with pytest.raises(CustomException): client.batch_annotate_images()
def recognize_food(img_path, list_foods): start_time = datetime.now() # Read image with opencv img = cv2.imread(img_path) # Get image size height, width = img.shape[:2] #scale Image img = cv2.resize(img, (800, int((height * 800) / width))) #Save image to temp file cv2.imwrite(SOURCE_PATH + 'output.jpg', img) img_path = SOURCE_PATH + 'output.jpg' #create google vision client client = vision.ImageAnnotatorClient() #Read image file with io.open(img_path, 'rb') as image_file: content = image_file.read() image = vision.types.Image(content=content) #recognize Content response = client.label_detection(image=image) labels = response.label_annotations for label in labels: # if len(text.description) == 10: desc = label.description.lower() score = round(label.score, 2) print("label: ", desc, " score: ", score) if (desc in list_foods): # score = round(label.score, 3) # print(desc, 'score: ', score) # Put text license plate number to image cv2.putText(img, desc.upper() + " ???", (300, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (50, 50, 200), 2) cv2.imshow('Recognize & Draw', img) cv2.waitKey(0) # Get first fruit only break print('Total time: {}'.format(datetime.now() - start_time))
def recognize_license_plate(img_path): start_time = datetime.now() # Read image with opencv img = cv2.imread(img_path) # Get image size height, width = img.shape[:2] # Scale image img = cv2.resize(img, (800, int((height * 800) / width))) # Show the origin image cv2.imshow('Origin image', img) # Save the image to temp file cv2.imwrite(SOURCE_PATH + "output.jpg", img) # Create new img path for google vision img_path = SOURCE_PATH + "output.jpg" # Create google vision client client = vision.ImageAnnotatorClient() # Read image file with io.open(img_path, 'rb') as image_file: content = image_file.read() image = vision.types.Image(content=content) # Recognize text response = client.text_detection(image=image) texts = response.text_annotations for text in texts: if len(text.description) == 10: license_plate = text.description print(license_plate) vertices = [(vertex.x, vertex.y) for vertex in text.bounding_poly.vertices] # Put text license plate number to image cv2.putText(img, license_plate, (200, 200), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255), 3) print(vertices) # Draw rectangle around license plate cv2.rectangle(img, (vertices[0][0] - 10, vertices[0][1] - 10), (vertices[2][0] + 10, vertices[2][1] + 10), (0, 255, 0), 3) print('Total time: {}'.format(datetime.now() - start_time)) cv2.imshow('Recognize & Draw', img) cv2.waitKey(0)
def sample_batch_annotate_images(): # Create a client client = vision_v1p3beta1.ImageAnnotatorClient() # Initialize request argument(s) request = vision_v1p3beta1.BatchAnnotateImagesRequest() # Make the request response = client.batch_annotate_images(request=request) # Handle the response print(response)
def recognize_image(img_path, list_foods): # Read image with opencv img = cv2.imread(img_path) # Get image size height, width = img.shape[:2] #scale Image img = cv2.resize(img, (800, int((height * 800) / width))) #Save image to temp file cv2.imwrite(SOURCE_PATH + 'output.jpg', img) img_path = SOURCE_PATH + 'output.jpg' #create google vision client client = vision.ImageAnnotatorClient() #Read image file with io.open(img_path, 'rb') as image_file: content = image_file.read() image = vision.types.Image(content=content) #recognize Content response = client.label_detection(image=image) labels = response.label_annotations for label in labels: # if len(text.description) == 10: desc = label.description.lower() score = round(label.score, 2) print("label: ", desc, " score: ", score) if (desc in list_foods): # If Image is recognized, then print the identified label on the image and save it. cv2.putText(img, desc.upper(), (300, 150), cv2.FONT_HERSHEY_PLAIN, 1, (50, 50, 200), 2) cv2.imshow('Identified Image', img) cv2.waitKey(0) #Stop after recognizing the first fruit break
def __detect_handwritten_ocr(self, data): from google.cloud import vision_v1p3beta1 as vision client = vision.ImageAnnotatorClient() image = vision.types.Image(content=data) image_context = vision.types.ImageContext( language_hints=['de-t-i0-handwrit']) response = client.document_text_detection(image=image, image_context=image_context) return response.full_text_annotation.text
def recognize_license_plate(): img_resp = requests.get(path) img_arr = np.array(bytearray(img_resp.content), dtype=np.uint8) img = cv2.imdecode(img_arr, cv2.IMREAD_COLOR) # finding out the Image size height, width = img.shape[:2] # Scaling the image img = cv2.resize(img, (800, int((height * 800) / width))) # Store the image in temp file cv2.imwrite("temp.jpg", img) # Create new img path for google vision img_path = "temp.jpg" # Create google vision client client = vision.ImageAnnotatorClient() # Read image file with io.open(img_path, 'rb') as image_file: content = image_file.read() image = vision.types.Image(content=content) # Recognize text response = client.text_detection(image=image) texts = response.text_annotations for text in texts: if len(text.description) == 6 or len(text.description) == 7: license_plate = text.description # print(license_plate) url = "https://hackthon19.herokuapp.com/search?licenseNumber=" + license_plate result = requests.get(url) data = result.json() if len(data) > 0: print("FOUND") else: print("NOT FOUND") img = cv2.imread(img_path) print(img) cv2.imshow('Recognize & Draw', img) cv2.waitKey(0) playsound('1.wav')
def detect_text_uri(uri): client = vision.ImageAnnotatorClient() image = vision.types.Image() image.source.image_uri = uri image_context = vision.types.ImageContext( language_hints=['en-t-i0-handwrit']) response = client.document_text_detection(image=image, image_context=image_context) texts = response.text_annotations print() for text in texts: print('\n"{}"'.format(text.description)) print()
def localize_objects(path): client = vision.ImageAnnotatorClient() with open(path, 'rb') as image_file: content = image_file.read() image = vision.types.Image(content=content) objects = client.object_localization( image=image).localized_object_annotations print('Number of objects found: {}'.format(len(objects))) for object_ in objects: print('\n{} (confidence: {})'.format(object_.name, object_.score)) print('Normalized bounding polygon vertices: ') for vertex in object_.bounding_poly.normalized_vertices: print(' - ({}, {})'.format(vertex.x, vertex.y))
def recognise_license_plate(img_path): client = vision.ImageAnnotatorClient() start_time = datetime.now() with io.open(img_path, 'rb') as image_file: content = image_file.read() image = vision.types.Image(content=content) response = client.text_detection(image=image) texts = response.text_annotations for text in texts: if len(text.description) == 7: license_plate = text.description print('License Plate {}'.format(license_plate)) print('Total Time :{}'.format(datetime.now() - start_time))
def detect_text(path): client = vision.ImageAnnotatorClient() with io.open(path, 'rb') as image_file: content = image_file.read() image = vision.types.Image(content=content) image_context = vision.types.ImageContext( language_hints=['en-t-i0-handwrit']) response = client.document_text_detection(image=image, image_context=image_context) texts = response.text_annotations for text in texts: return text.description.encode('utf-8') return text.description.language('en')
def sample_async_batch_annotate_files(): # Create a client client = vision_v1p3beta1.ImageAnnotatorClient() # Initialize request argument(s) request = vision_v1p3beta1.AsyncBatchAnnotateFilesRequest() # Make the request operation = client.async_batch_annotate_files(request=request) print("Waiting for operation to complete...") response = operation.result() # Handle the response print(response)