def _annotateBatch(self, batch): requests = [] for imagePath in batch: # Loads the image into memory with io.open(imagePath, 'rb') as image_file: content = image_file.read() image = types.Image(content=content) # https://googlecloudplatform.github.io/google-cloud-python/latest/vision/gapic/v1/types.html#google.cloud.vision_v1.types.AnnotateImageRequest request = types.AnnotateImageRequest(image=image, features=[ types.Feature(type=FeatureTypes.FACE_DETECTION, max_results=3), # Run face detection. # types.Feature(type=FeatureTypes.LANDMARK_DETECTION), # Run landmark detection. # types.Feature(type=FeatureTypes.LOGO_DETECTION), # Run logo detection. types.Feature(type=FeatureTypes.LABEL_DETECTION, max_results=10), # Run label detection. # types.Feature(type=FeatureTypes.TEXT_DETECTION), # Run OCR. # types.Feature(type=FeatureTypes.DOCUMENT_TEXT_DETECTION), # Run dense text document OCR. Takes precedence when both DOCUMENT_TEXT_DETECTION and TEXT_DETECTION are present. # types.Feature(type=FeatureTypes.SAFE_SEARCH_DETECTION), # Run computer vision models to compute image safe-search properties. types.Feature(type=FeatureTypes.IMAGE_PROPERTIES), # Compute a set of image properties, such as the image's dominant colors. types.Feature(type=FeatureTypes.CROP_HINTS, max_results=5), # Run crop hints. types.Feature(type=FeatureTypes.WEB_DETECTION, max_results=10), # Run web detection. ]) requests.append(request) response = self.client.batch_annotate_images(requests=requests) return [ Serializer.responseToJSON( response.responses[resultCache[imagePath]['index']]) for imagePath in batch ]
def get_texts_batch(rects, img, Y): texts = [] features = [ types.Feature(type=enums.Feature.Type.TEXT_DETECTION), ] requests = [] for i, rect in enumerate(rects): x, y, w, h = rect tag = img[int(y):int((y+h)), int(x):int((x+w))] cv2.imwrite('temp.png', tag) with open('temp.png', 'rb') as image_file: imageContext = types.ImageContext(language_hints=["en"]) image = types.Image(content = image_file.read()) request = types.AnnotateImageRequest(image=image, features=features, image_context=imageContext) requests.append(request) #print(labelSet[np.argmax(Y[i])]) #texts.append(detect_text('temp.png').strip()) client = vision.ImageAnnotatorClient() response = client.batch_annotate_images(requests) for response in response.responses: if len(response.full_text_annotation.text) > 0: texts.append(response.full_text_annotation.text.strip()) else: texts.append("none") os.remove('temp.png') return texts
def google_request(self): # Possible features: # LABEL_DETECTION, FACE_DETECTION, LOGO_DETECTION, TEXT_DETECTION, # DOCUMENT_TEXT_DETECTION, SAFE_SEARCH_DETECTION, WEB_DETECTION, # LANDMARK_DETECTION, IMAGE_PROPERTIES features = [ types.Feature(type=enums.Feature.Type.LABEL_DETECTION), ] api_requests = [] image = types.Image(content=self.opened_file) request = types.AnnotateImageRequest(image=image, features=features) api_requests.append(request) return self.client.batch_annotate_images(api_requests)
def call_Vision_API(image_binary, requested_features): ''' Wrapper around the Google Vision API. Enables calling it in one line with image binary and requested features. Arguments: - image_binary: image data - requested_features: array of strings with features requested. Returns: AnnotateImageResponse. Check the README for more info on this type. ''' assert isinstance(image_binary, bytes), 'image_binary should be of class \ bytes.' assert isinstance(requested_features, list), 'requested_features should be \ a list.' # load the image into compatible type image = types.Image(content=image_binary) # Instantiates a client client = vision.ImageAnnotatorClient() all_features = [] # Attach all the features caller requested for request in requested_features: if request not in labels: print(request, "is not a valid feature.") # append feature to the list of features to be requested feature_type = labels[request] feature = types.Feature(type=feature_type, max_results=10) all_features.append(feature) # create the request request = types.AnnotateImageRequest(image=image, \ features=all_features) # call the image annotation result = client.annotate_image(request) return result
def detect_text(images_array): """Detects text in the file.""" from google.cloud import vision client = vision.ImageAnnotatorClient() # with io.open(path, 'rb') as image_file: # content = image_file.read() features = [ types.Feature(type=enums.Feature.Type.TEXT_DETECTION) ] requests = [] for filename in images_array: with open(filename, 'rb') as image_file: image = types.Image( content=image_file.read()) request = types.AnnotateImageRequest( image=image, features=features) requests.append(request) response_vision = client.batch_annotate_images(requests) # response = client.text_detection(image=image) # texts = response.text_annotations # print('Texts:') if response_vision: return response_vision # for text in texts: # print('\n"{}"'.format(text.description)) # vertices = (['({},{})'.format(vertex.x, vertex.y) # for vertex in text.bounding_poly.vertices]) # print('bounds: {}'.format(','.join(vertices))) if response_vision.error.message: raise Exception( '{}\nFor more info on error messages, check: ' 'https://cloud.google.com/apis/design/errors'.format( response_vision.error.message))
def batch_detect_faces_uri(image_uris=[]): """Batch detections of faces via iamge uri""" if not image_uris: return [] # Instantiates a client client = vision.ImageAnnotatorClient() features = [ # types.Feature(type=enums.Feature.Type.LABEL_DETECTION), types.Feature(type=enums.Feature.Type.FACE_DETECTION), ] requests = [] for image_uri in image_uris: image = types.Image() image.source.image_uri = image_uri request = types.AnnotateImageRequest(image=image, features=features) requests.append(request) response = client.batch_annotate_images(requests) face_index = 0 faces_batched = [] for _response in response.responses: item_date = dict() item_date['face_request'] = face_index item_date['from_url'] = image_uris[face_index] item_date['faces_emotions'] = parse_face_emotions( _response.face_annotations) faces_batched.append(item_date) face_index += 1 return faces_batched
import io import os from google.cloud import vision from google.cloud.vision import enums from google.cloud.vision import types # Instantiates a client client = vision.ImageAnnotatorClient() features = [ types.Feature(type=enums.Feature.Type.LABEL_DETECTION), types.Feature(type=enums.Feature.Type.FACE_DETECTION), ] requests = [] # The name of the image file to annotate file_name = os.path.join(os.path.dirname(__file__), '[IMAGE].jpg') # Loads the image into memory with io.open(file_name, 'rb') as image_file: content = image_file.read() image = types.Image(content=content) request = types.AnnotateImageRequest(image=image, features=features) requests.append(request) response = client.batch_annotate_images(requests)
ROIs[1][0]:ROIs[1][0] + ROIs[1][2]] ROI_3 = img[ROIs[2][1]:ROIs[2][1] + ROIs[2][3], ROIs[2][0]:ROIs[2][0] + ROIs[2][2]] cv2.imwrite('Name.png', ROI_1) cv2.imwrite('Roll.png', ROI_2) cv2.imwrite('Marks.png', ROI_3) cv2.waitKey(0) cv2.destroyAllWindows() os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = r'key.json' client = vision.ImageAnnotatorClient() workbook = xlsxwriter.Workbook('Marks.xlsx') worksheet = workbook.add_worksheet() parent_folder_path = r'C:\Users\revukrishna2000\Documents\Python Scripts\VisionAPIDemo' features = [types.Feature(type=enums.Feature.Type.DOCUMENT_TEXT_DETECTION)] requests = [] row = 0 col = 0 for filename in ['Name.png', 'Roll.png', 'Marks.png']: filepath = os.path.join(parent_folder_path, filename) with open(filepath, 'rb') as image_file: image = vision.types.Image(content=image_file.read()) request = types.AnnotateImageRequest(image=image, features=features) requests.append(request) response = client.document_text_detection(image=image) text = response.full_text_annotation.text li = text.splitlines() for elem in li: worksheet.write(row, col, elem) row += 1
# -*- coding: UTF-8 -*- import os, io from google.cloud import vision from google.cloud.vision import enums from google.cloud.vision import types import pandas as pd #you should change 1) credentials and 2) system environment variable in order to change into other account os.environ[ 'GOOGLE_APPLICATION_CREDENTIALS'] = r'credentials.json' #specifying the location of credentials. client = vision.ImageAnnotatorClient() #fetching google vision api client # You can add other Google Vision's features, like face detection, over here. features = [ types.Feature(type=enums.Feature.Type.TEXT_DETECTION ) # Here, we are only going to use text detection feature. ] requests = [] # inputting files here, as in forms of filename for filename in ['1.jpg', '2.png', '3.jpg']: with io.open(filename, 'rb') as image_file: content = image_file.read() # reading the image content image = vision.types.Image(content=content) request = types.AnnotateImageRequest( image=image, features=features) #request is a single dictionary requests.append(request) #making list of requests for batch request print("numbers of files submitted to API: " + str(len(requests))) response = client.batch_annotate_images(
def async_detect_document_text(bucket_name, image_file, textfile, jsonfile): # Supported mime_types are: 'application/pdf' and 'image/tiff' mime_type = 'image/tiff' tmp_dir = tempfile._get_default_tempdir() png_fn = os.path.basename(image_file) fn = os.path.splitext(png_fn)[0] tif_fn = fn + '.tif' prefix_fn = fn + '-' tif_path = os.path.join(tmp_dir, tif_fn) gcs_src_uri = 'gs://{}/{}'.format(bucket_name, tif_fn) gcs_dst_uri = 'gs://{}/{}'.format(bucket_name, prefix_fn) logging.info('Converting... {!s}'.format(png_fn)) with Image.open(image_file) as im: im.save(tif_path, compression='tiff_lzw', tiffinfo={317: 2, 278: 1}) logging.info('Uploading... {!s}'.format(tif_fn)) upload_blob(bucket_name, tif_path, tif_fn) os.unlink(tif_path) # How many pages should be grouped into each json output file. # With a file of 1 pages batch_size = 1 client = vision.ImageAnnotatorClient() feature = types.Feature( type=vision.enums.Feature.Type.DOCUMENT_TEXT_DETECTION) gcs_src = types.GcsSource(uri=gcs_src_uri) input_config = types.InputConfig(gcs_source=gcs_src, mime_type=mime_type) gcs_dst = types.GcsDestination(uri=gcs_dst_uri) output_config = types.OutputConfig(gcs_destination=gcs_dst, batch_size=batch_size) image_context = types.ImageContext(language_hints=['en']) async_request = types.AsyncAnnotateFileRequest(features=[feature], input_config=input_config, output_config=output_config, image_context=image_context) operation = client.async_batch_annotate_files(requests=[async_request]) logging.info('Waiting... {!s}'.format(gcs_src_uri)) result = operation.result(timeout=GOOGLE_OPERATION_TIMEOUT) logging.debug('{!s}'.format(result)) delete_blob(bucket_name, tif_fn) # Once the request has completed and the output has been # written to GCS, we can list all the output files. storage_client = storage.Client() match = re.match(r'gs://([^/]+)/(.+)', gcs_dst_uri) bucket_name = match.group(1) prefix = match.group(2) bucket = storage_client.get_bucket(bucket_name=bucket_name) # List objects with the given prefix. blob_list = list(bucket.list_blobs(prefix=prefix)) # Process the first output file from GCS. # Since we specified batch_size=1, the first response contains # the first page of the input file. output = blob_list[0] logging.info('Downloading... {!s}'.format(output.name)) json_string = output.download_as_string() logging.debug('JSON len={:d}'.format(len(json_string))) response = json_format.Parse(json_string, types.AnnotateFileResponse()) error = response.responses[0].error if error.code != 0: logging.error("Vision API code: {!s}, msg: {!s}".format( error.code, error.message)) return None # The actual response for the first page of the input file. document = response.responses[0].full_text_annotation if textfile is not 0: logging.info('Saving... {!s}'.format(textfile)) with io.open(textfile, 'wb') as f: f.write(document.text.encode('utf-8')) if jsonfile is not 0: logging.info('Saving... {!s}'.format(jsonfile)) with io.open(jsonfile, 'wb') as f: f.write(json_format.MessageToJson(document)) output.delete() return document