def main(): try: # Get configuration settings form_endpoint = "https://doors1.cognitiveservices.azure.com/" form_key = "70b2796924584d8da912296e8dea613a" trainingDataUrl = "https://doors.blob.core.windows.net/treinamento?sp=racwdl&st=2021-05-27T23:44:21Z&se=2021-08-02T07:44:21Z&sv=2020-02-10&sr=c&sig=9Tq5HVWS6Fzq5mHIIklZk3Z1wO%2B5junlwtlNTIFP194%3D" # Authenticate Form Training Client form_recognizer_client = FormRecognizerClient( form_endpoint, AzureKeyCredential(form_key)) form_training_client = FormTrainingClient(form_endpoint, AzureKeyCredential(form_key)) # Train model poller = form_training_client.begin_training(trainingDataUrl, use_training_labels=False) model = poller.result() print("Model ID: {}".format(model.model_id)) print("Status: {}".format(model.status)) print("Training started on: {}".format(model.training_started_on)) print("Training completed on: {}".format(model.training_completed_on)) except Exception as ex: print(ex)
def test_training_auth_bad_key(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormTrainingClient(form_recognizer_account, AzureKeyCredential("xxxx")) with self.assertRaises(ClientAuthenticationError): poller = client.begin_training("xx", use_training_labels=False)
def test_api_version_form_training_client(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): with self.assertRaises(ValueError): ftc = FormTrainingClient(endpoint=form_recognizer_account, credential=AzureKeyCredential(form_recognizer_account_key), api_version="v9.1") # these do not raise ftc = FormTrainingClient(endpoint=form_recognizer_account, credential=AzureKeyCredential(form_recognizer_account_key), api_version="v2.0") ftc = FormTrainingClient(endpoint=form_recognizer_account, credential=AzureKeyCredential(form_recognizer_account_key), api_version=FormRecognizerApiVersion.V2_0)
def train_model_with_labels(self): from azure.ai.formrecognizer import FormTrainingClient from azure.core.credentials import AzureKeyCredential form_training_client = FormTrainingClient(self.endpoint, AzureKeyCredential(self.key)) poller = form_training_client.begin_train_model( self.container_sas_url, use_training_labels=True) model = poller.result() # Custom model information print("Model ID: {}".format(model.model_id)) print("Status: {}".format(model.status)) print("Requested on: {}".format(model.requested_on)) print("Completed on: {}".format(model.completed_on)) print("Recognized fields:") # looping through the submodels, which contains the fields they were trained on # The labels are based on the ones you gave the training document. for submodel in model.submodels: print("...The submodel with form type {} has accuracy '{}'".format( submodel.form_type, submodel.accuracy)) for name, field in submodel.fields.items(): print( "...The model found field '{}' to have name '{}' with an accuracy of {}" .format(name, field.name, field.accuracy)) # Training result information for doc in model.training_documents: print("Document name: {}".format(doc.document_name)) print("Document status: {}".format(doc.status)) print("Document page count: {}".format(doc.page_count)) print("Document errors: {}".format(doc.errors))
def train_model_without_labels(self): # [START training] from azure.ai.formrecognizer import FormTrainingClient from azure.core.credentials import AzureKeyCredential form_training_client = FormTrainingClient(self.endpoint, AzureKeyCredential(self.key)) # Default for begin_train_model is `use_training_labels=False` poller = form_training_client.begin_train_model( self.container_sas_url, use_training_labels=False) model = poller.result() # Custom model information print("Model ID: {}".format(model.model_id)) print("Status: {}".format(model.status)) print("Requested on: {}".format(model.requested_on)) print("Completed on: {}".format(model.completed_on)) print("Recognized fields:") # Looping through the submodels, which contains the fields they were trained on for submodel in model.submodels: print("...The submodel has form type '{}'".format( submodel.form_type)) for name, field in submodel.fields.items(): print( "...The model found field '{}' to have label '{}'".format( name, field.label)) # [END training] # Training result information for doc in model.training_documents: print("Document name: {}".format(doc.document_name)) print("Document status: {}".format(doc.status)) print("Document page count: {}".format(doc.page_count)) print("Document errors: {}".format(doc.errors))
def test_account_properties_auth_bad_key(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormTrainingClient(form_recognizer_account, AzureKeyCredential("xxxx")) with self.assertRaises(ClientAuthenticationError): result = client.get_account_properties()
def test_delete_model_auth_bad_key(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormTrainingClient(form_recognizer_account, AzureKeyCredential("xxxx")) with self.assertRaises(ClientAuthenticationError): client.delete_model("xx")
async def test_get_form_recognizer_client(self, formrecognizer_test_endpoint, formrecognizer_test_api_key, **kwargs): # this can be reverted to set_bodiless_matcher() after tests are re-recorded and don't contain these headers set_custom_default_matcher( compare_bodies=False, excluded_headers= "Authorization,Content-Length,x-ms-client-request-id,x-ms-request-id" ) transport = AioHttpTransport() ftc = FormTrainingClient( endpoint=formrecognizer_test_endpoint, credential=AzureKeyCredential(formrecognizer_test_api_key), transport=transport, api_version="2.1") async with ftc: await ftc.get_account_properties() assert transport.session is not None async with ftc.get_form_recognizer_client() as frc: assert transport.session is not None await (await frc.begin_recognize_receipts_from_url( self.receipt_url_jpg)).wait() await ftc.get_account_properties() assert transport.session is not None
def main(): try: # Get configuration settings load_dotenv() form_endpoint = os.getenv('FORM_ENDPOINT') form_key = os.getenv('FORM_KEY') trainingDataUrl = os.getenv('STORAGE_URL') # Authenticate Form Training Client form_recognizer_client = FormRecognizerClient( form_endpoint, AzureKeyCredential(form_key)) form_training_client = FormTrainingClient(form_endpoint, AzureKeyCredential(form_key)) # Train model poller = form_training_client.begin_training(trainingDataUrl, use_training_labels=False) model = poller.result() print("Model ID: {}".format(model.model_id)) print("Status: {}".format(model.status)) print("Training started on: {}".format(model.training_started_on)) print("Training completed on: {}".format(model.training_completed_on)) except Exception as ex: print(ex)
def test_api_version_form_training_client(self, formrecognizer_test_endpoint, formrecognizer_test_api_key): with self.assertRaises(ValueError): ftc = FormTrainingClient(endpoint=formrecognizer_test_endpoint, credential=AzureKeyCredential(formrecognizer_test_api_key), api_version="9.1") # these do not raise ftc = FormTrainingClient(endpoint=formrecognizer_test_endpoint, credential=AzureKeyCredential(formrecognizer_test_api_key), api_version="2.0") ftc = FormTrainingClient(endpoint=formrecognizer_test_endpoint, credential=AzureKeyCredential(formrecognizer_test_api_key), api_version=FormRecognizerApiVersion.V2_0)
def train_model_without_labels(self): # [START training] from azure.ai.formrecognizer import FormTrainingClient from azure.core.credentials import AzureKeyCredential endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] container_sas_url = os.environ["CONTAINER_SAS_URL"] form_training_client = FormTrainingClient(endpoint, AzureKeyCredential(key)) poller = form_training_client.begin_training(container_sas_url, use_training_labels=False) model = poller.result() # Custom model information print("Model ID: {}".format(model.model_id)) print("Status: {}".format(model.status)) print("Model name: {}".format(model.model_name)) print("Training started on: {}".format(model.training_started_on)) print("Training completed on: {}".format(model.training_completed_on)) print("Recognized fields:") # Looping through the submodels, which contains the fields they were trained on for submodel in model.submodels: print("...The submodel has form type '{}'".format(submodel.form_type)) for name, field in submodel.fields.items(): print("...The model found field '{}' to have label '{}'".format( name, field.label )) # [END training] # Training result information for doc in model.training_documents: print("Document name: {}".format(doc.name)) print("Document status: {}".format(doc.status)) print("Document page count: {}".format(doc.page_count)) print("Document errors: {}".format(doc.errors))
def manage_custom_models(self): from azure.core.credentials import AzureKeyCredential from azure.core.exceptions import ResourceNotFoundError from azure.ai.formrecognizer import FormTrainingClient endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] container_sas_url = os.environ["CONTAINER_SAS_URL_V2"] # [START get_account_properties] form_training_client = FormTrainingClient( endpoint=endpoint, credential=AzureKeyCredential(key)) # First, we see how many custom models we have, and what our limit is account_properties = form_training_client.get_account_properties() print( "Our account has {} custom models, and we can have at most {} custom models\n" .format(account_properties.custom_model_count, account_properties.custom_model_limit)) # [END get_account_properties] # Next, we get a paged list of all of our custom models # [START list_custom_models] custom_models = form_training_client.list_custom_models() print("We have models with the following IDs:") for model in custom_models: print(model.model_id) # [END list_custom_models] # let's train a model to use for this sample poller = form_training_client.begin_training(container_sas_url, use_training_labels=False) model = poller.result() # Now we'll get information for the model we just trained # [START get_custom_model] custom_model = form_training_client.get_custom_model( model_id=model.model_id) print("\nModel ID: {}".format(custom_model.model_id)) print("Status: {}".format(custom_model.status)) print("Model name: {}".format(custom_model.model_name)) print("Is this a composed model?: {}".format( custom_model.properties.is_composed_model)) print("Training started on: {}".format( custom_model.training_started_on)) print("Training completed on: {}".format( custom_model.training_completed_on)) # [END get_custom_model] # Finally, we will delete this model by ID # [START delete_model] form_training_client.delete_model(model_id=custom_model.model_id) try: form_training_client.get_custom_model( model_id=custom_model.model_id) except ResourceNotFoundError: print("Successfully deleted model with id {}".format( custom_model.model_id))
def test_delete_model_empty_model_id(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormTrainingClient( form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) with self.assertRaises(ValueError): result = client.delete_model("")
def test_get_model_none_model_id(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormTrainingClient( form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) with self.assertRaises(ValueError): result = client.get_custom_model(None)
def test_sample_recognize_custom_forms(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): _setenv('CONTAINER_SAS_URL', 'AZURE_FORM_RECOGNIZER_STORAGE_CONTAINER_SAS_URL') ftc = FormTrainingClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) container_sas_url = os.environ['CONTAINER_SAS_URL'] poller = ftc.begin_training(container_sas_url, use_training_labels=False) model = poller.result() os.environ['CUSTOM_TRAINED_MODEL_ID'] = model.model_id _test_file('sample_recognize_custom_forms.py', form_recognizer_account, form_recognizer_account_key)
def test_list_model_auth_bad_key(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormTrainingClient(form_recognizer_account, AzureKeyCredential("xxxx")) with self.assertRaises(ClientAuthenticationError): result = client.list_custom_models() for res in result: test = res
def test_account_properties(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormTrainingClient( form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) properties = client.get_account_properties() self.assertIsNotNone(properties.custom_model_limit) self.assertIsNotNone(properties.custom_model_count)
def authentication_with_api_key_credential_form_training_client(self): # [START create_ft_client_with_key] from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer import FormTrainingClient endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] form_training_client = FormTrainingClient(endpoint, AzureKeyCredential(key)) # [END create_ft_client_with_key] properties = form_training_client.get_account_properties()
def authentication_with_azure_active_directory_form_training_client(self): """DefaultAzureCredential will use the values from these environment variables: AZURE_CLIENT_ID, AZURE_TENANT_ID, AZURE_CLIENT_SECRET """ # [START create_ft_client_with_aad] from azure.ai.formrecognizer import FormTrainingClient from azure.identity import DefaultAzureCredential endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] credential = DefaultAzureCredential() form_training_client = FormTrainingClient(endpoint, credential) # [END create_ft_client_with_aad] properties = form_training_client.get_account_properties()
def manage_custom_models(self): # [START get_account_properties] from azure.core.credentials import AzureKeyCredential from azure.core.exceptions import ResourceNotFoundError from azure.ai.formrecognizer import FormTrainingClient endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] form_training_client = FormTrainingClient( endpoint=endpoint, credential=AzureKeyCredential(key)) # First, we see how many custom models we have, and what our limit is account_properties = form_training_client.get_account_properties() print( "Our account has {} custom models, and we can have at most {} custom models\n" .format(account_properties.custom_model_count, account_properties.custom_model_limit)) # [END get_account_properties] # Next, we get a paged list of all of our custom models # [START list_custom_models] custom_models = form_training_client.list_custom_models() print("We have models with the following IDs:") # Let's pull out the first model first_model = next(custom_models) print(first_model.model_id) for model in custom_models: print(model.model_id) # [END list_custom_models] # Now we'll get information for the first custom model in the paged list # [START get_custom_model] custom_model = form_training_client.get_custom_model( model_id=first_model.model_id) print("\nModel ID: {}".format(custom_model.model_id)) print("Status: {}".format(custom_model.status)) print("Training started on: {}".format( custom_model.training_started_on)) print("Training completed on: {}".format( custom_model.training_completed_on)) # [END get_custom_model] # Finally, we will delete this model by ID # [START delete_model] form_training_client.delete_model(model_id=custom_model.model_id) try: form_training_client.get_custom_model( model_id=custom_model.model_id) except ResourceNotFoundError: print("Successfully deleted model with id {}".format( custom_model.model_id))
def main(): try: # Get configuration settings load_dotenv() form_endpoint = os.getenv('FORM_ENDPOINT') form_key = os.getenv('FORM_KEY') # To train a model you need your Blob URI to access your training files trainingDataUrl = os.getenv('STORAGE_URL') # Create client using endpoint and key form_recognizer_client = FormRecognizerClient( form_endpoint, AzureKeyCredential(form_key)) form_training_client = FormTrainingClient(form_endpoint, AzureKeyCredential(form_key)) # Use Training Labels = False poller = form_training_client.begin_training(trainingDataUrl, use_training_labels=False) model = poller.result() print("Model ID: {}".format(model.model_id)) print("Status: {}".format(model.status)) print("Training started on: {}".format(model.training_started_on)) print("Training completed on: {}".format(model.training_completed_on)) print("\nRecognized fields:") for submodel in model.submodels: print( "The submodel with form type '{}' has recognized the following fields: {}" .format( submodel.form_type, ", ".join([ field.label if field.label else name for name, field in submodel.fields.items() ]), )) # Training result information for doc in model.training_documents: print("Document name: {}".format(doc.name)) print("Document status: {}".format(doc.status)) print("Document page count: {}".format(doc.page_count)) print("Document errors: {}".format(doc.errors)) except Exception as ex: print(ex)
def test_logging_info_ft_client(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormTrainingClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) mock_handler = MockHandler() logger = logging.getLogger("azure") logger.addHandler(mock_handler) logger.setLevel(logging.INFO) result = client.get_account_properties() for message in mock_handler.messages: if message.levelname == "INFO": # not able to use json.loads here. At INFO level only API key should be REDACTED if message.message.find("Ocp-Apim-Subscription-Key") != -1: assert message.message.find("REDACTED") != -1 else: assert message.message.find("REDACTED") == -1
def test_document_api_version_form_training_client(self): with pytest.raises(ValueError) as excinfo: client = FormTrainingClient( "url", "key", api_version=DocumentAnalysisApiVersion.V2021_09_30_PREVIEW) assert "Unsupported API version '2021-09-30-preview'. Please select from: {}\nAPI version '2021-09-30-preview' is " \ "only available for DocumentAnalysisClient and DocumentModelAdministrationClient.".format( ", ".join(v.value for v in FormRecognizerApiVersion)) == str(excinfo.value)
def train_model_with_labels(self): from azure.ai.formrecognizer import FormTrainingClient from azure.core.credentials import AzureKeyCredential endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] container_sas_url = os.environ["CONTAINER_SAS_URL"] form_training_client = FormTrainingClient(endpoint, AzureKeyCredential(key)) poller = form_training_client.begin_training(container_sas_url, use_training_labels=True, model_name="mymodel") model = poller.result() # Custom model information print("Model ID: {}".format(model.model_id)) print("Status: {}".format(model.status)) print("Model name: {}".format(model.model_name)) print("Is this a composed model?: {}".format( model.properties.is_composed_model)) print("Training started on: {}".format(model.training_started_on)) print("Training completed on: {}".format(model.training_completed_on)) print("Recognized fields:") # looping through the submodels, which contains the fields they were trained on # The labels are based on the ones you gave the training document. for submodel in model.submodels: print("...The submodel has model ID: {}".format(submodel.model_id)) print( "...The submodel with form type {} has an average accuracy '{}'" .format(submodel.form_type, submodel.accuracy)) for name, field in submodel.fields.items(): print( "...The model found the field '{}' with an accuracy of {}". format(name, field.accuracy)) # Training result information for doc in model.training_documents: print("Document name: {}".format(doc.name)) print("Document status: {}".format(doc.status)) print("Document page count: {}".format(doc.page_count)) print("Document errors: {}".format(doc.errors))
async def test_get_form_recognizer_client(self, formrecognizer_test_endpoint, formrecognizer_test_api_key): transport = AioHttpTransport() ftc = FormTrainingClient( endpoint=formrecognizer_test_endpoint, credential=AzureKeyCredential(formrecognizer_test_api_key), transport=transport, api_version="2.1") async with ftc: await ftc.get_account_properties() assert transport.session is not None async with ftc.get_form_recognizer_client() as frc: assert transport.session is not None await (await frc.begin_recognize_receipts_from_url( self.receipt_url_jpg)).wait() await ftc.get_account_properties() assert transport.session is not None
def test_sample_differentiate_output_models_trained_with_and_without_labels( self, resource_group, location, form_recognizer_account, form_recognizer_account_key): os.environ['CONTAINER_SAS_URL'] = self.get_settings_value( "FORM_RECOGNIZER_STORAGE_CONTAINER_SAS_URL") ftc = FormTrainingClient( form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) container_sas_url = os.environ['CONTAINER_SAS_URL'] poller = ftc.begin_training(container_sas_url, use_training_labels=False) unlabeled_model = poller.result() poller = ftc.begin_training(container_sas_url, use_training_labels=True) labeled_model = poller.result() os.environ["ID_OF_MODEL_TRAINED_WITH_LABELS"] = labeled_model.model_id os.environ[ "ID_OF_MODEL_TRAINED_WITHOUT_LABELS"] = unlabeled_model.model_id _test_file( 'sample_differentiate_output_models_trained_with_and_without_labels.py', form_recognizer_account, form_recognizer_account_key)
def copy_model(self): from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer import FormTrainingClient source_endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] source_key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] target_endpoint = os.environ["AZURE_FORM_RECOGNIZER_TARGET_ENDPOINT"] target_key = os.environ["AZURE_FORM_RECOGNIZER_TARGET_KEY"] source_model_id = os.environ["AZURE_SOURCE_MODEL_ID"] target_region = os.environ["AZURE_FORM_RECOGNIZER_TARGET_REGION"] target_resource_id = os.environ[ "AZURE_FORM_RECOGNIZER_TARGET_RESOURCE_ID"] # [START get_copy_authorization] target_client = FormTrainingClient( endpoint=target_endpoint, credential=AzureKeyCredential(target_key)) target = target_client.get_copy_authorization( resource_region=target_region, resource_id=target_resource_id) # [END get_copy_authorization] # [START begin_copy_model] source_client = FormTrainingClient( endpoint=source_endpoint, credential=AzureKeyCredential(source_key)) poller = source_client.begin_copy_model(model_id=source_model_id, target=target) copied_over_model = poller.result() print("Model ID: {}".format(copied_over_model.model_id)) print("Status: {}".format(copied_over_model.status))
def test_get_form_recognizer_client(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): transport = RequestsTransport() ftc = FormTrainingClient(endpoint=form_recognizer_account, credential=AzureKeyCredential(form_recognizer_account_key), transport=transport) with ftc: ftc.get_account_properties() assert transport.session is not None with ftc.get_form_recognizer_client() as frc: assert transport.session is not None frc.begin_recognize_receipts_from_url(self.receipt_url_jpg).wait() ftc.get_account_properties() assert transport.session is not None
def test_get_form_recognizer_client_v2(self, formrecognizer_test_endpoint, formrecognizer_test_api_key): transport = RequestsTransport() ftc = FormTrainingClient(endpoint=formrecognizer_test_endpoint, credential=AzureKeyCredential(formrecognizer_test_api_key), transport=transport, api_version="2.1") with ftc: ftc.get_account_properties() assert transport.session is not None with ftc.get_form_recognizer_client() as frc: assert transport.session is not None frc.begin_recognize_receipts_from_url(self.receipt_url_jpg).wait() assert frc._api_version == FormRecognizerApiVersion.V2_1 ftc.get_account_properties() assert transport.session is not None
def test_sample_copy_model(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): os.environ['CONTAINER_SAS_URL'] = self.get_settings_value( "FORM_RECOGNIZER_STORAGE_CONTAINER_SAS_URL") ftc = FormTrainingClient( form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) container_sas_url = os.environ['CONTAINER_SAS_URL'] poller = ftc.begin_training(container_sas_url, use_training_labels=False) model = poller.result() os.environ['AZURE_SOURCE_MODEL_ID'] = model.model_id os.environ[ "AZURE_FORM_RECOGNIZER_TARGET_ENDPOINT"] = form_recognizer_account os.environ[ "AZURE_FORM_RECOGNIZER_TARGET_KEY"] = form_recognizer_account_key os.environ["AZURE_FORM_RECOGNIZER_TARGET_REGION"] = location os.environ["AZURE_FORM_RECOGNIZER_TARGET_RESOURCE_ID"] = \ "/subscriptions/" + self.get_settings_value("SUBSCRIPTION_ID") + "/resourceGroups/" + \ resource_group.name + "/providers/Microsoft.CognitiveServices/accounts/" + \ FormRecognizerTest._FORM_RECOGNIZER_NAME _test_file('sample_copy_model.py', form_recognizer_account, form_recognizer_account_key)