def test_custom_form_empty_model_id(self, formrecognizer_test_endpoint, formrecognizer_test_api_key): client = FormRecognizerClient( formrecognizer_test_endpoint, AzureKeyCredential(formrecognizer_test_api_key)) with self.assertRaises(ValueError): client.begin_recognize_custom_forms(model_id="", form=b"xx")
def test_custom_form_empty_model_id(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormRecognizerClient( form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) with self.assertRaises(ValueError): client.begin_recognize_custom_forms(model_id="", form=b"xx")
def recognize_custom_forms(self): from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer import FormRecognizerClient endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] model_trained_with_labels_id = os.environ[ "ID_OF_MODEL_TRAINED_WITH_LABELS"] model_trained_without_labels_id = os.environ[ "ID_OF_MODEL_TRAINED_WITHOUT_LABELS"] form_recognizer_client = FormRecognizerClient( endpoint=endpoint, credential=AzureKeyCredential(key)) # Make sure your form's type is included in the list of form types the custom model can recognize with open("sample_forms/forms/Form_1.jpg", "rb") as f: stream = f.read() forms_with_labeled_model_poller = form_recognizer_client.begin_recognize_custom_forms( model_id=model_trained_with_labels_id, form=stream) forms_with_unlabeled_model_poller = form_recognizer_client.begin_recognize_custom_forms( model_id=model_trained_without_labels_id, form=stream) # Calling result after kicking off each call allows for server-side paralellization forms_with_labeled_model = forms_with_labeled_model_poller.result() forms_with_unlabeled_model = forms_with_unlabeled_model_poller.result() # With a form recognized by a model trained with labels, this 'name' key will be its # training-time label, otherwise it will be denoted by numeric indices. # Label data is not returned for model trained with labels. print( "---------Recognizing forms with models trained with labeled data---------" ) for labeled_form in forms_with_labeled_model: for name, field in labeled_form.fields.items(): print( "...Field '{}' has value '{}' based on '{}' within bounding box '{}', with a confidence score of {}" .format(name, field.value, field.value_data.text, format_bounding_box(field.value_data.bounding_box), field.confidence)) print( "-----------------------------------------------------------------------" ) print( "-------Recognizing forms with models trained with unlabeled data-------" ) for unlabeled_form in forms_with_unlabeled_model: for name, field in unlabeled_form.fields.items(): print( "...Field '{}' has label '{}' within bounding box '{}', with a confidence score of {}" .format(name, field.label_data.text, format_bounding_box(field.label_data.bounding_box), field.confidence)) print( "...Field '{}' has value '{}' based on '{}' within bounding box '{}', with a confidence score of {}" .format(name, field.value, field.value_data.text, format_bounding_box(field.value_data.bounding_box), field.confidence))
def test_authentication_bad_key(self, formrecognizer_test_endpoint, formrecognizer_test_api_key): client = FormRecognizerClient(formrecognizer_test_endpoint, AzureKeyCredential("xxxx")) with self.assertRaises(ClientAuthenticationError): poller = client.begin_recognize_custom_forms( model_id="xx", form=b"xx", content_type="image/jpeg")
def recognize_custom_forms(self): # [START recognize_custom_forms] from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer import FormRecognizerClient form_recognizer_client = FormRecognizerClient( endpoint=self.endpoint, credential=AzureKeyCredential(self.key)) # Make sure your form's type is included in the list of form types the custom model can recognize with open("sample_forms/forms/Form_1.jpg", "rb") as f: poller = form_recognizer_client.begin_recognize_custom_forms( model_id=self.model_id, stream=f) forms = poller.result() for idx, form in enumerate(forms): print("--------Recognizing Form #{}--------".format(idx)) print("Form {} has type {}".format(idx, form.form_type)) for name, field in form.fields.items(): # each field is of type FormField # The value of the field can also be a FormField, or a list of FormFields # In our sample, it is just a FormField. print( "...Field '{}' has value '{}' with a confidence score of {}" .format(name, field.value, field.confidence)) # label data is populated if you are using a model trained with unlabeled data, since the service needs to make predictions for # labels if not explicitly given to it. if field.label_data: print( "...Field '{}' has label '{}' with a confidence score of {}" .format(name, field.label_data.text, field.confidence)) print("-----------------------------------")
def test_authentication_bad_key(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential("xxxx")) with self.assertRaises(ClientAuthenticationError): poller = client.begin_recognize_custom_forms( model_id="xx", stream=b"xx", content_type="image/jpeg")
def test_auto_detect_unsupported_stream_content(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) with open(self.unsupported_content_py, "rb") as fd: myfile = fd.read() with self.assertRaises(ValueError): poller = client.begin_recognize_custom_forms( model_id="xxx", form=myfile, )
def test_custom_form_bad_endpoint(self, formrecognizer_test_endpoint, formrecognizer_test_api_key): with open(self.form_jpg, "rb") as fd: myfile = fd.read() with self.assertRaises(ServiceRequestError): client = FormRecognizerClient( "http://notreal.azure.com", AzureKeyCredential(formrecognizer_test_api_key)) poller = client.begin_recognize_custom_forms(model_id="xx", form=myfile)
def test_passing_unsupported_url_content_type(self, formrecognizer_test_endpoint, formrecognizer_test_api_key): client = FormRecognizerClient( formrecognizer_test_endpoint, AzureKeyCredential(formrecognizer_test_api_key)) with self.assertRaises(TypeError): poller = client.begin_recognize_custom_forms( model_id="xx", form="https://badurl.jpg", content_type="application/json")
def test_passing_unsupported_url_content_type(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormRecognizerClient( form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) with self.assertRaises(TypeError): poller = client.begin_recognize_custom_forms( model_id="xx", stream="https://badurl.jpg", content_type="application/json")
def recognize_custom_forms(self): path_to_sample_forms = os.path.abspath( os.path.join(os.path.abspath(__file__), "..", "./sample_forms/forms/Form_1.jpg")) # [START recognize_custom_forms] from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer import FormRecognizerClient endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] model_id = os.environ["CUSTOM_TRAINED_MODEL_ID"] form_recognizer_client = FormRecognizerClient( endpoint=endpoint, credential=AzureKeyCredential(key)) # Make sure your form's type is included in the list of form types the custom model can recognize with open(path_to_sample_forms, "rb") as f: poller = form_recognizer_client.begin_recognize_custom_forms( model_id=model_id, form=f) forms = poller.result() for idx, form in enumerate(forms): print("--------Recognizing Form #{}--------".format(idx + 1)) print("Form has type {}".format(form.form_type)) print("Form has form type confidence {}".format( form.form_type_confidence)) print("Form was analyzed with model with ID {}".format( form.model_id)) for name, field in form.fields.items(): # each field is of type FormField # label_data is populated if you are using a model trained without labels, # since the service needs to make predictions for labels if not explicitly given to it. if field.label_data: print( "...Field '{}' has label '{}' with a confidence score of {}" .format(name, field.label_data.text, field.confidence)) # The value of the field can also be a Dict[str, FormField], or a List[FormField] - in our sample, it is not. print( "...Label '{}' has value '{}' with a confidence score of {}" .format( field.label_data.text if field.label_data else name, field.value, field.confidence)) print("-----------------------------------")
def test_recognize_tables_dynamic_rows(self, custom_model_id): from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer import FormRecognizerClient endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] model_id_dynamic_rows_table = os.getenv("MODEL_ID_DYNAMIC_ROW_TABLES", custom_model_id) form_recognizer_client = FormRecognizerClient( endpoint=endpoint, credential=AzureKeyCredential(key)) path_to_sample_forms = os.path.abspath( os.path.join(os.path.abspath(__file__), "..", "..", "./sample_forms/forms/label_table_dynamic_rows1.pdf")) with open(path_to_sample_forms, "rb") as f: form = f.read() poller = form_recognizer_client.begin_recognize_custom_forms( model_id=model_id_dynamic_rows_table, form=form) result = poller.result() print( "\n\n--------Recognizing labeled table with dynamic rows--------\n" ) for form in result: for name, field in form.fields.items(): # substitute "table" for the label given to the table tag during training # (if different than sample training docs) if name == "table": for idx, row in enumerate(field.value): print("Row {}".format(idx + 1)) for column_name, row_value in row.value.items(): print( "...Column '{}' with value '{}' and a confidence score of {}" .format(column_name, row_value.value, row_value.confidence)) else: # non-table tagged FormField print( "...Field '{}' has value '{}' with a confidence score of {}" .format(name, field.value, field.confidence))
def main(): try: # Get configuration settings load_dotenv() form_endpoint = os.getenv('FORM_ENDPOINT') print(form_endpoint) form_key = os.getenv('FORM_KEY') print(form_key) # Create client using endpoint and key form_recognizer_client = FormRecognizerClient( form_endpoint, AzureKeyCredential(form_key)) form_training_client = FormTrainingClient(form_endpoint, AzureKeyCredential(form_key)) # Model ID from when you trained your model. model_id = os.getenv('MODEL_ID') # Test trained model with a new form with open('test1.jpg', "rb") as f: poller = form_recognizer_client.begin_recognize_custom_forms( model_id=model_id, form=f) result = poller.result() for recognized_form in result: print("Form type: {}".format(recognized_form.form_type)) for name, field in recognized_form.fields.items(): print( "Field '{}' has label '{}' with value '{}' and a confidence score of {}" .format( name, field.label_data.text if field.label_data else name, field.value, field.confidence)) except Exception as ex: print(ex)
def main(): try: # Get configuration settings form_endpoint = "https://doors1.cognitiveservices.azure.com/" form_key = "70b2796924584d8da912296e8dea613a" # Create client using endpoint and key form_recognizer_client = FormRecognizerClient( form_endpoint, AzureKeyCredential(form_key)) form_training_client = FormTrainingClient(form_endpoint, AzureKeyCredential(form_key)) # Model ID from when your trained your model. model_id = "8bf5a901-3ef6-4d1b-8a5b-f82ca3c1b05c" # Test trained model with a new form with open('test1.jpg', "rb") as f: poller = form_recognizer_client.begin_recognize_custom_forms( model_id=model_id, form=f) result = poller.result() for recognized_form in result: print("Form type: {}".format(recognized_form.form_type)) for name, field in recognized_form.fields.items(): print( "Field '{}' has label '{}' with value '{}' and a confidence score of {}" .format( name, field.label_data.text if field.label_data else name, field.value, field.confidence)) except Exception as ex: print(ex)
def get_bounding_boxes(self): from azure.ai.formrecognizer import FormWord, FormLine # [START create_form_recognizer_client] from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer import FormRecognizerClient form_recognizer_client = FormRecognizerClient( endpoint=self.endpoint, credential=AzureKeyCredential(self.key)) # [END create_form_recognizer_client] # Make sure your form's type is included in the list of form types the custom model can recognize with open("sample_forms/forms/Form_1.jpg", "rb") as f: poller = form_recognizer_client.begin_recognize_custom_forms( model_id=self.model_id, stream=f.read(), include_text_content=True) forms = poller.result() for idx, form in enumerate(forms): print("--------RECOGNIZING FORM #{}--------".format(idx)) print("Form has type {}".format(form.form_type)) for name, field in form.fields.items(): # each field is of type FormField # The value of the field can also be a FormField, or a list of FormFields # In our sample, it is not. print( "...Field '{}' has value '{}' based on '{}' within bounding box '{}', with a confidence score of {}" .format(name, field.value, field.value_data.text, format_bounding_box(field.value_data.bounding_box), field.confidence)) for page in form.pages: print("-------Recognizing Page #{} of Form #{}-------".format( page.page_number, idx)) print( "Has width '{}' and height '{}' measure with unit: {}, and has text angle '{}'" .format(page.width, page.height, page.unit, page.text_angle)) for table in page.tables: for cell in table.cells: print( "...Cell[{}][{}] has text '{}' with confidence {} based on the following words: " .format(cell.row_index, cell.column_index, cell.text, cell.confidence)) # text_content is only populated if you set include_text_content to True in your function call to recognize_custom_forms # It is a heterogeneous list of FormWord and FormLine. for content in cell.text_content: if isinstance(content, FormWord): print( "......Word '{}' within bounding box '{}' has a confidence of {}" .format( content.text, format_bounding_box( content.bounding_box), content.confidence)) elif isinstance(content, FormLine): print( "......Line '{}' within bounding box '{}' has the following words: " .format( content.text, format_bounding_box( content.bounding_box))) for word in content.words: print( ".........Word '{}' within bounding box '{}' has a confidence of {}" .format( word.text, format_bounding_box( word.bounding_box), word.confidence)) print("---------------------------------------------------") print("-----------------------------------")
def main(): """ Text and Image Recognizer """ st.markdown("bla bla") uploaded_image = st.file_uploader("Selectione une image") if uploaded_image is not None: image = Image.open(uploaded_image) st.image(image, use_column_width=True) image.save('test_images/test11111.png', 'PNG') # Dictionary to save the informations dic = {} ## Form Recognizer Configuration ## # Endpoint and Key endpoint_f = "https://recettesfromrecognizer.cognitiveservices.azure.com/" key_f = "dee1ba127bbf442489c58a86932ae162" # Authenticate the client object form_recognizer_client = FormRecognizerClient( endpoint_f, AzureKeyCredential(key_f)) # Model ID (The Model that we have trined on Azure Labeling Tool) model_id = "491ce87f-878c-4eaf-8bc4-a336a4a209a5" # image path image_path = os.path.join("test_images", "test1.png") # Open and test the image with open(image_path, "rb") as f: poller = form_recognizer_client.begin_recognize_custom_forms( model_id=model_id, form=f) # Result of the test for new image forms = poller.result() # To get cles and valeurs for recognized_form in forms: for name, field in recognized_form.fields.items(): print(" '{}' : ({}) Accuracy \n '{}' \n".format( name, field.confidence, field.value, )) dic[name] = field.value ## Costum Vision Configuration ## # Endpoint and Key endpoint_c = "https://testdeletes123.cognitiveservices.azure.com/" key_c = "de6ba6ab6d3246e48cdba750fbd0f17e" # Authenticate the client object computervision_client = ComputerVisionClient( endpoint_c, CognitiveServicesCredentials(key_c)) # Open the test image im = Image.open(image_path) img = open(image_path, "rb") # Detect the photo on the image :) detected_object = computervision_client.detect_objects_in_stream(img) # This example detects different kinds of objects with bounding boxes in a remote image. X = "" Xw = "" Y = "" Yh = "" if len(detected_object.objects) == 0: print("No objects detected.") else: for object in detected_object.objects: X = object.rectangle.x Xw = object.rectangle.x + object.rectangle.w Y = object.rectangle.y Yh = object.rectangle.y + object.rectangle.h # Create Box box = (X, Y, Xw, Yh) # Crop Image area = im.crop(box) #image = Image.open(uploaded_image) st.image(area, use_column_width=True) #area.show() # Convert the image to an array image_array = asarray(area) dic["image"] = image_array
def recognize_custom_forms(self, custom_model_id): path_to_sample_forms = os.path.abspath(os.path.join(os.path.abspath(__file__), "..", "..", "./sample_forms/forms/Form_1.jpg")) # [START recognize_custom_forms] from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer import FormRecognizerClient endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] model_id = os.getenv("CUSTOM_TRAINED_MODEL_ID", custom_model_id) form_recognizer_client = FormRecognizerClient( endpoint=endpoint, credential=AzureKeyCredential(key) ) # Make sure your form's type is included in the list of form types the custom model can recognize with open(path_to_sample_forms, "rb") as f: poller = form_recognizer_client.begin_recognize_custom_forms( model_id=model_id, form=f, include_field_elements=True ) forms = poller.result() for idx, form in enumerate(forms): print("--------Recognizing Form #{}--------".format(idx+1)) print("Form has type {}".format(form.form_type)) print("Form has form type confidence {}".format(form.form_type_confidence)) print("Form was analyzed with model with ID {}".format(form.model_id)) for name, field in form.fields.items(): # each field is of type FormField # label_data is populated if you are using a model trained without labels, # since the service needs to make predictions for labels if not explicitly given to it. if field.label_data: print("...Field '{}' has label '{}' with a confidence score of {}".format( name, field.label_data.text, field.confidence )) print("...Label '{}' has value '{}' with a confidence score of {}".format( field.label_data.text if field.label_data else name, field.value, field.confidence )) # iterate over tables, lines, and selection marks on each page for page in form.pages: for i, table in enumerate(page.tables): print("\nTable {} on page {}".format(i+1, table.page_number)) for cell in table.cells: print("...Cell[{}][{}] has text '{}' with confidence {}".format( cell.row_index, cell.column_index, cell.text, cell.confidence )) print("\nLines found on page {}".format(page.page_number)) for line in page.lines: print("...Line '{}' is made up of the following words: ".format(line.text)) for word in line.words: print("......Word '{}' has a confidence of {}".format( word.text, word.confidence )) if page.selection_marks: print("\nSelection marks found on page {}".format(page.page_number)) for selection_mark in page.selection_marks: print("......Selection mark is '{}' and has a confidence of {}".format( selection_mark.state, selection_mark.confidence )) print("-----------------------------------")
endpoint_f = "https://recettesfromrecognizer.cognitiveservices.azure.com/" key_f = "dee1ba127bbf442489c58a86932ae162" # Authenticate the client object form_recognizer_client = FormRecognizerClient(endpoint_f, AzureKeyCredential(key_f)) # Model ID (The Model that we have trined on Azure Labeling Tool) model_id = "491ce87f-878c-4eaf-8bc4-a336a4a209a5" # image path image_path = os.path.join("test_images", "test3.jpg") # Open and test the image with open(image_path, "rb") as f: poller = form_recognizer_client.begin_recognize_custom_forms( model_id=model_id, form=f) # Result of the test for new image forms = poller.result() # To get cles and valeurs for recognized_form in forms: for name, field in recognized_form.fields.items(): print(" '{}' : ({}) Accuracy \n '{}' \n".format( name, field.confidence, field.value, )) dic[name] = field.value ## Costum Vision Configuration ##
def recognize_custom_forms(self, labeled_model_id, unlabeled_model_id): from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer import FormRecognizerClient endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] model_trained_with_labels_id = os.getenv( "ID_OF_MODEL_TRAINED_WITH_LABELS", labeled_model_id) model_trained_without_labels_id = os.getenv( "ID_OF_MODEL_TRAINED_WITHOUT_LABELS", unlabeled_model_id) form_recognizer_client = FormRecognizerClient( endpoint=endpoint, credential=AzureKeyCredential(key)) path_to_sample_forms = os.path.abspath( os.path.join(os.path.abspath(__file__), "..", "..", "./sample_forms/forms/Form_1.jpg")) with open(path_to_sample_forms, "rb") as f: form = f.read() forms_with_labeled_model_poller = form_recognizer_client.begin_recognize_custom_forms( model_id=model_trained_with_labels_id, form=form) forms_with_unlabeled_model_poller = form_recognizer_client.begin_recognize_custom_forms( model_id=model_trained_without_labels_id, form=form) # Calling result() after kicking off each call allows for server-side parallelization forms_with_labeled_model = forms_with_labeled_model_poller.result() forms_with_unlabeled_model = forms_with_unlabeled_model_poller.result() # With a form recognized by a model trained with labels, the `name` key will be its label given during training. # `value` will contain the typed field value and `value_data` will contain information about the field value # `label_data` is not populated for a model trained with labels as this was the given label used to extract the key print( "---------Recognizing forms using models trained with labeled data---------" ) for labeled_form in forms_with_labeled_model: for name, field in labeled_form.fields.items(): print( "...Field '{}' has value '{}' within bounding box '{}', with a confidence score of {}" .format(name, field.value, format_bounding_box(field.value_data.bounding_box), field.confidence)) # Find a specific labeled field. Substitute "Merchant" with your specific training-time label try: print( "\nValue for a specific labeled field using the training-time label:" ) training_time_label = "Merchant" for labeled_form in forms_with_labeled_model: print("The Merchant is {}\n".format( labeled_form.fields[training_time_label].value)) except KeyError: print( "'Merchant' training-time label does not exist. Substitute with your own training-time label.\n" ) # With a form recognized by a model trained without labels, the `name` key will be denoted by numeric indices. # Non-unique form field label names will be found in the `label_data.text` # Information about the form field label and the field value are found in `label_data` and `value_data` print( "-----------------------------------------------------------------------" ) print( "-------Recognizing forms using models trained with unlabeled data-------" ) for unlabeled_form in forms_with_unlabeled_model: for name, field in unlabeled_form.fields.items(): print( "...Field '{}' has label '{}' within bounding box '{}', with a confidence score of {}" .format(name, field.label_data.text, format_bounding_box(field.label_data.bounding_box), field.confidence)) print( "...Field '{}' has value '{}' within bounding box '{}', with a confidence score of {}" .format(name, field.value, format_bounding_box(field.value_data.bounding_box), field.confidence)) # Find the value of a specific unlabeled field. Will only be found if sample training forms used print("\nValue for a specific unlabeled field:") field_label = "Vendor Name:" for unlabeled_form in forms_with_unlabeled_model: for name, field in unlabeled_form.fields.items(): if field.label_data.text == field_label: print("The Vendor Name is {}\n".format(field.value))
class RecognizeCustomForms(PerfStressTest): def __init__(self, arguments): super().__init__(arguments) with open( os.path.abspath( os.path.join(os.path.abspath(__file__), "..", "./../sample_forms/forms/Form_1.jpg")), "rb") as fd: self.custom_form_jpg = fd.read() # read test related env vars self.formrecognizer_storage_container_sas_url = os.environ[ "FORMRECOGNIZER_TRAINING_DATA_CONTAINER_SAS_URL"] formrecognizer_test_endpoint = os.environ[ "FORMRECOGNIZER_TEST_ENDPOINT"] form_recognizer_account_key = os.environ["FORMRECOGNIZER_TEST_API_KEY"] # assign the clients that will be used in the perf tests self.service_client = FormRecognizerClient( formrecognizer_test_endpoint, AzureKeyCredential(form_recognizer_account_key)) self.async_service_client = AsyncFormRecognizerClient( formrecognizer_test_endpoint, AzureKeyCredential(form_recognizer_account_key)) # training client will be used for model training in set up self.async_training_client = AsyncFormTrainingClient( formrecognizer_test_endpoint, AzureKeyCredential(form_recognizer_account_key)) async def global_setup(self): """The global setup is run only once.""" poller = await self.async_training_client.begin_training( self.formrecognizer_storage_container_sas_url, use_training_labels=True, model_name="labeled") model = await poller.result() self.model_id = model.model_id async def global_cleanup(self): """The global cleanup is run only once.""" await self.async_training_client.delete_model(self.model_id) async def close(self): """This is run after cleanup.""" await self.async_service_client.close() self.service_client.close() await self.async_training_client.close() await super().close() def run_sync(self): """The synchronous perf test.""" poller = self.service_client.begin_recognize_custom_forms( self.model_id, self.custom_form_jpg, content_type=FormContentType.IMAGE_JPEG) result = poller.result() assert result async def run_async(self): """The asynchronous perf test.""" poller = await self.async_service_client.begin_recognize_custom_forms( self.model_id, self.custom_form_jpg, content_type=FormContentType.IMAGE_JPEG) result = await poller.result() assert result
def get_bounding_boxes(self): from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer import FormRecognizerClient from azure.ai.formrecognizer import FormWord, FormLine endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] model_id = os.environ["CUSTOM_TRAINED_MODEL_ID"] form_recognizer_client = FormRecognizerClient( endpoint=endpoint, credential=AzureKeyCredential(key)) path_to_sample_forms = os.path.abspath( os.path.join(os.path.abspath(__file__), "..", "./sample_forms/forms/Form_1.jpg")) # Make sure your form's type is included in the list of form types the custom model can recognize with open(path_to_sample_forms, "rb") as f: poller = form_recognizer_client.begin_recognize_custom_forms( model_id=model_id, form=f, include_field_elements=True) forms = poller.result() for idx, form in enumerate(forms): print("--------RECOGNIZING FORM #{}--------".format(idx)) print("Form has type: {}".format(form.form_type)) for name, field in form.fields.items(): # each field is of type FormField # The value of the field can also be a Dict[str, FormField], or a List[FormField] - in our sample, it is not. print( "...Field '{}' has label '{}' with value '{}' within bounding box '{}', with a confidence score of {}" .format( name, field.label_data.text if field.label_data else name, field.value, format_bounding_box(field.value_data.bounding_box), field.confidence)) for page in form.pages: print("-------Recognizing Page #{} of Form #{}-------".format( page.page_number, idx)) print( "Has width '{}' and height '{}' measure with unit: {}, and has text angle '{}'" .format(page.width, page.height, page.unit, page.text_angle)) for table in page.tables: for cell in table.cells: print( "...Cell[{}][{}] has text '{}' with confidence {} based on the following words: " .format(cell.row_index, cell.column_index, cell.text, cell.confidence)) # field_elements is only populated if you set include_field_elements to True in your call # to begin_recognize_custom_forms # It is a heterogeneous list of FormWord and FormLine. for content in cell.field_elements: if isinstance(content, FormWord): print( "......Word '{}' within bounding box '{}' has a confidence of {}" .format( content.text, format_bounding_box( content.bounding_box), content.confidence)) elif isinstance(content, FormLine): print( "......Line '{}' within bounding box '{}' has the following words: " .format( content.text, format_bounding_box( content.bounding_box))) for word in content.words: print( ".........Word '{}' within bounding box '{}' has a confidence of {}" .format( word.text, format_bounding_box( word.bounding_box), word.confidence)) print("---------------------------------------------------") print("-----------------------------------")
def recognize_custom_forms(self): path_to_sample_forms = os.path.join('receipt.pdf') # [START recognize_custom_forms] from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer import FormRecognizerClient endpoint = 'https://gsdp-formrecog.cognitiveservices.azure.com/' key = '0176c0aa83a0451ea4aa8c0ae6845aa3' model_id = 'b131c408-ad86-424b-aa01-388b98a48d9d' form_recognizer_client = FormRecognizerClient( endpoint=endpoint, credential=AzureKeyCredential(key) ) # Make sure your form's type is included in the list of form types the custom model can recognize with open(path_to_sample_forms, "rb") as f: poller = form_recognizer_client.begin_recognize_custom_forms( model_id=model_id, form=f ) forms = poller.result() # we will eventually use confidence scores to decide whether to reject/accept customer data #90%/0.9 threshold for 'good' - subject to change #potentially anything below 70% and customer is asked to try again for idx, form in enumerate(forms): print("--------Recognizing Form #{}--------".format(idx+1)) # clinicName = receipt.fields.get("clinic name") # if clinicName: # print("The clinic name is: {}" .format(clinicName.value)) print("Form has type {}".format(form.form_type)) # print("Form has form type confidence {}".format(form.form_type_confidence)) # print("Form was analyzed with model with ID {}".format(form.model_id)) clinic_address = form.fields.get("clinic address") if clinic_address: print("Clinic address: {}".format(clinic_address.value)) clinicName = form.fields.get("clinic name") if clinicName: print("Clinic name: {}" .format(clinicName.value)) items = form.fields.get("item-names") # items_string = items.value # split = items_string.split() # items: fixing this later (afternoon) to split into individual;s, waiting on Elanco # can we access the split parts individually? possible solution if items: print("\t - Item Name: {}".format(items.value)) prices = form.fields.get("prices") # prices_string = prices.value # prices_split = prices_string.split() #accessing individual values here? split by space, assign to decimal array if prices: print("\t - Price: {}".format(prices.value)) invoiceDate = form.fields.get("invoice date") if invoiceDate: print("\t - Invoice Date: {}".format(invoiceDate.value)) patientName = form.fields.get("patientName") if patientName: print("\t - Patient Name: {}".format(patientName.value)) microchip = form.fields.get("microchip") customerName = form.fields.get("customer name") if customerName: print("\t - Customer Name: {}".format(customerName.value)) transactionNo = form.fields.get("transaction no") if transactionNo: print("Transaction number: {}".format(transactionNo.value)) quantity = form.fields.get("quantity") if quantity: print("quantity: {}".format(quantity.value))