def get_predict(self, doctype, sas_url, predict_type):

        # Getting model ID from doc type
        partition_key = self.app_settings.environment + '_' + predict_type
        model_id = storage_helpers.query_entity_model(self.table_service, self.app_settings.models_table, partition_key, doctype)

        # Getting prediction result
        prediction = fr_helpers.get_prediction(self.app_settings.fr_region, self.app_settings.fr_key, sas_url, model_id, predict_type)

        return prediction
예제 #2
0
    def test_query_entity_model_when_row_key_invalid(self):
        
        # Expecting failure when row key is invalid
        result = storage_helpers.query_entity_model(
                self.table_service, 
                self.table_model,
                self.partition_key,
                "abcd")

        assert result == None
예제 #3
0
    def test_query_entity_model_when_table_service_invalid(self):
        
        # Expecting failure when table service is invalid
        result = storage_helpers.query_entity_model(
                None, 
                self.table_model,
                self.partition_key,
                self.model)

        assert result == None
예제 #4
0
    def test_query_entity_model_when_valid(self):
        
        # Expecting success when all parameters are valid
        result = storage_helpers.query_entity_model(
                self.table_service, 
                self.table_model,
                self.partition_key,
                self.model)

        assert result != None
class PredictionsTest(unittest.TestCase):

    region = os.getenv('FR_REGION')
    key = os.getenv('FR_KEY')
    sas = os.getenv('SAS_TOKEN')
    models_table = os.getenv('MODELS_TABLE')
    storage_account = os.getenv('STORAGE_ACCOUNT_NAME')
    storage_key = os.getenv('STORAGE_KEY')
    storage_url = f"https://{storage_account}.blob.core.windows.net"

    label = "<ENTER LABEL>"
    container = "tests"
    partition_key = label[0].upper()

    table_service = storage_helpers.create_table_service(storage_account, storage_key)
    container_client = storage_helpers.create_container_client(storage_url, container, sas)
    model_id = storage_helpers.query_entity_model(table_service, models_table, partition_key, label)

    filename = "<ENTER FILE NAME>"

    test_url_valid = f"https://{storage_account}.blob.core.windows.net/{container}/{label}/test/{filename}{sas}"
    test_url_invalid = f"https://{storage_account}.blob.core.windows.net/{container}/{label}/test/filename{sas}"

    testing_path = label + '/test'
    blobs = storage_helpers.list_blobs(container_client, testing_path)

    def test_predict_supervised_when_valid(self):
    
        # Expecting fields back when all parameters are valid
        result = fr_helpers.get_prediction(
                self.region,
                self.key,
                self.test_url_valid,
                self.model_id,
                "supervised")

        assert len(result) > 0  

    def test_predict_supervised_when_url_invalid(self):
    
        # Expecting empty result when url is invalid
        result = fr_helpers.get_prediction(
                self.region,
                self.key,
                self.test_url_invalid,
                self.model_id,
                "supervised")

        assert len(result) == 0  

    def test_predict_supervised_when_region_invalid(self):
    
        # Expecting empty result when region is invalid
        result = fr_helpers.get_prediction(
                "easteurope",
                self.key,
                self.test_url_valid,
                self.model_id,
                "supervised")

        assert len(result) == 0  

    def test_predict_supervised_when_key_invalid(self):
    
        # Expecting empty result when key is invalid
        result = fr_helpers.get_prediction(
                self.region,
                "abcd",
                self.test_url_valid,
                self.model_id,
                "supervised")

        assert len(result) == 0  

    def test_predict_supervised_when_model_id_invalid(self):
    
        # Expecting empty result when the model id is invalid
        result = fr_helpers.get_prediction(
                self.region,
                self.key,
                self.test_url_valid,
                "abcd",
                "supervised")

        assert len(result) == 0  

    def test_batch_predict_supervised_when_valid(self):
    
        # Expecting predictions back when all parameters are valid
        result,_,_ = fr_helpers.batch_predictions(
                self.blobs,
                self.model_id,
                self.storage_url,
                self.container,
                self.sas,
                self.region,
                self.key)

        assert len(result) > 0  

    def test_batch_predict_supervised_when_url_invalid(self):
    
        # Expecting no predictions back when storage url is invalid
        result,_,_ = fr_helpers.batch_predictions(
                self.blobs,
                self.model_id,
                "invalidurl.net",
                self.container,
                self.sas,
                self.region,
                self.key)

        assert len(result) == 0 
    
    def test_batch_predict_supervised_when_blobs_empty(self):
    
        # Expecting no predictions back when the list of blobs is empty
        result,_,_ = fr_helpers.batch_predictions(
                [],
                self.model_id,
                self.storage_url,
                self.container,
                self.sas,
                self.region,
                self.key)

        assert len(result) == 0 
    def run(self, doctype, reuse=False):
        folders = storage_helpers.list_doctype_folders(self.container_client)

        if (doctype in folders):
            logging.info(f"Found {doctype} folder in storage.")
            testing_path = doctype + '/test'
            blobs = storage_helpers.list_blobs(self.container_client,
                                               testing_path)
            if (len(blobs) > 0):

                # Getting model ID from doctype name
                partition_key = self.app_settings.environment + '_supervised'

                model_id = storage_helpers.query_entity_model(
                    self.table_service, self.app_settings.models_table,
                    partition_key, doctype)

                if model_id != None:
                    logging.info(
                        f"Found model id {model_id} for doc type {doctype}")

                    evaluation_output_path = doctype + '/evaluation_file.json'

                    if (reuse == 'False'):
                        logging.warning("REUSE FALSE")
                        # Batch predictions on all test blobs
                        logging.info(f"Predicting for test set...")

                        predictions, count_analyzed, count_total = fr_helpers.batch_predictions(
                            blobs, model_id,
                            self.app_settings.storage_account_url,
                            self.app_settings.container, self.app_settings.sas,
                            self.app_settings.fr_region,
                            self.app_settings.fr_key)
                        evaluation = model_evaluation.evaluate(
                            predictions, self.app_settings.gt_path,
                            self.app_settings.lookup_path, count_analyzed,
                            count_total)
                        evaluation_file = json.dumps(evaluation)
                        storage_helpers.upload_blob(self.container_client,
                                                    evaluation_output_path,
                                                    evaluation_file)

                    else:
                        logging.info(
                            f"Evaluation file for doc type {doctype} already created, getting it from storage."
                        )
                        evaluation_file = storage_helpers.download_blob(
                            self.container_client, evaluation_output_path,
                            'text')
                        if (evaluation_file != None):
                            evaluation = json.loads(evaluation_file)

                    if (evaluation != None):

                        model_eval_json, mismatches = model_evaluation.create_eval_file(
                            evaluation, model_id,
                            self.app_settings.lookup_path)
                        response = {}
                        response[
                            'text'] = f"Evaluation for doc type {doctype} done."
                        response['eval'] = model_eval_json.copy()

                        model_eval_json['mismatches'] = mismatches
                        model_eval_file = json.dumps(model_eval_json)
                        model_eval_output_path = doctype + '/model_eval.json'
                        storage_helpers.upload_blob(self.container_client,
                                                    model_eval_output_path,
                                                    model_eval_file)

                        # Bell sound when the process finishes
                        print("\a")

                        return response

                else:
                    logging.error(
                        f"Could not continue as model id could not be retrieved."
                    )
                    raise EnvironmentError(f"Could not retrieve model id.")

            else:
                logging.warning(
                    f"Didn't find any testing files in storage for {doctype}")
                raise Warning(f"No test files.")

        else:
            logging.warning(f"Didn't find {doctype} folder in storage.")
            raise Warning(f"{doctype} not in storage.")