def test_process_prediction(self): model_path = 'tests/fixtures/test_predict_by_model/iris' options = fsclient.read_json_file( os.path.join(model_path, "options.json")) target_categories = ["setosa", "versicolor", "virginica"] ds = DataFrame.create_dataframe( os.path.join(model_path, "iris_test.csv")) ds.drop([options['targetFeature']]) results = [ "setosa", "versicolor", "virginica", "setosa", "versicolor", "virginica" ] results_proba = None proba_classes = None ModelHelper.process_prediction(ds, results, results_proba, proba_classes, None, options.get('minority_target_class'), options['targetFeature'], target_categories) ds_test = DataFrame.create_dataframe( os.path.join(model_path, "iris_test.csv")) self.assertEqual(ds.dtypes, ds_test.dtypes) self.assertEqual(ds.df.values.tolist(), ds_test.df.values.tolist())
def test_process_prediction_proba(self): model_path = 'tests/fixtures/test_predict_by_model/iris' options = fsclient.read_json_file( os.path.join(model_path, "options.json")) target_categories = ["setosa", "versicolor", "virginica"] ds = DataFrame.create_dataframe( os.path.join(model_path, "iris_test.csv")) ds.drop([options['targetFeature']]) results = None #[0, 1, 2, 0, 1, 2] results_proba = [[0.8, 0.1, 0.1], [0.4, 0.6, 0.1], [0.1, 0.2, 0.7], [0.7, 0.2, 0.1], [0.3, 0.7, 0.1], [0.1, 0.3, 0.6]] results_proba = np.array(results_proba) proba_classes = [0, 1, 2] ModelHelper.process_prediction(ds, results, results_proba, proba_classes, 0.5, None, options['targetFeature'], target_categories) ds_test = DataFrame.create_dataframe( os.path.join(model_path, "iris_test.csv")) self.assertEqual( ds.columns, ds_test.columns + ["proba_setosa", "proba_versicolor", "proba_virginica"]) self.assertEqual(ds.df[options['targetFeature']].values.tolist(), ds_test.df[options['targetFeature']].values.tolist())
def _predict_locally(self, filename_arg, model_id, threshold, data, columns, output): model_deploy = ModelDeploy(self.ctx, None) is_model_loaded, model_path, model_name = \ model_deploy.verify_local_model(model_id) if not is_model_loaded: raise AugerException( 'Model isn\'t loaded locally. ' 'Please use a2ml deploy command to download model.') model_path, model_existed = self._extract_model(model_name) model_options = fsclient.read_json_file( os.path.join(model_path, "model", "options.json")) filename = filename_arg if not filename: ds = DataFrame.create_dataframe(filename, data, columns) filename = os.path.join(self.ctx.config.get_path(), '.augerml', 'predict_data.csv') ds.saveToCsvFile(filename, compression=None) try: predicted = \ self._docker_run_predict(filename, threshold, model_path) finally: # clean up unzipped model # if it wasn't unzipped before if not model_existed: shutil.rmtree(model_path, ignore_errors=True) model_path = None if not filename_arg: ds_result = DataFrame.create_dataframe(predicted) ds_result.options['data_path'] = None ds_result.loaded_columns = columns return ModelHelper.save_prediction_result( ds_result, prediction_id=None, support_review_model=model_options.get("support_review_model") if model_path else False, json_result=False, count_in_result=False, prediction_date=None, model_path=model_path, model_id=model_id, output=output) elif output: fsclient.move_file(predicted, output) predicted = output return predicted
def build_review_data(self, data_path=None, output=None): if not data_path: data_path = self.options['data_path'] ds_train = DataFrame.create_dataframe(data_path) all_files = fsclient.list_folder(os.path.join( self.model_path, "predictions/*_actuals.feather.zstd"), wild=True, remove_folder_name=False, meta_info=True) all_files.sort(key=lambda f: f['last_modified'], reverse=True) for (file, ds_actuals) in DataFrame.load_from_files(all_files): if not ds_actuals.df.empty: ds_actuals.drop(['prediction_id', 'prediction_group_id']) ds_train.df = pd.concat( [ds_train.df, ds_actuals.df[ds_train.columns]], ignore_index=True) ds_train.drop_duplicates() if not output: output = os.path.splitext( data_path)[0] + "_review_%s.feather.zstd" % (get_uid()) ds_train.saveToFile(output) return output
def preprocess_target(model_path, data_path=None, records=None, features=None): ds = DataFrame.create_dataframe(data_path, records, features) return ModelHelper.preprocess_target_ds(model_path, ds)
def predict(self, filename, model_id, threshold=None, locally=False, data=None, columns=None, output = None, json_result=False, count_in_result=False, prediction_date=None, prediction_id=None): ds = DataFrame.create_dataframe(filename, data, columns) model_path = self.ctx.config.get_model_path(model_id) options = fsclient.read_json_file(os.path.join(model_path, "options.json")) results, results_proba, proba_classes, target_categories = \ self._predict_locally(ds.df, model_id, threshold) if locally else self._predict_remotely(ds.df, model_id, threshold) if target_categories and len(target_categories) == 2: for idx, item in enumerate(target_categories): if item == "False": target_categories[idx] = False if item == "True": target_categories[idx] = True ModelHelper.process_prediction(ds, results, results_proba, proba_classes, threshold, options.get('minority_target_class', self.ctx.config.get('minority_target_class')), options.get('targetFeature', self.ctx.config.get('target', None)), target_categories) predicted = ModelHelper.save_prediction(ds, prediction_id, options.get('support_review_model', True), json_result, count_in_result, prediction_date, model_path, model_id, output) if filename: self.ctx.log('Predictions stored in %s' % predicted) return {'predicted': predicted}
def _predict_on_cloud(self, filename, model_id, threshold, data, columns, output): ds = DataFrame.create_dataframe(filename, data, columns) pipeline_api = AugerPipelineApi(self.ctx, None, model_id) predictions = pipeline_api.predict(ds.get_records(), ds.columns, threshold) ds_result = DataFrame.create_dataframe(None, records=predictions['data'], features=predictions['columns']) ds_result.options['data_path'] = filename return ModelHelper.save_prediction_result(ds_result, prediction_id=None, support_review_model=False, json_result=False, count_in_result=False, prediction_date=None, model_path=None, model_id=model_id, output=output)
def add_actuals(self, actuals_path=None, actual_records=None, prediction_group_id=None, primary_prediction_group_id=None, primary_model_path=None, actual_date=None, actuals_id=None, calc_score=True): features = None if actuals_path or (actual_records and type(actual_records[0]) == list): features = ['prediction_id', 'actual'] ds_actuals = DataFrame.create_dataframe(actuals_path, actual_records, features=features) result = self._process_actuals(ds_actuals, prediction_group_id, primary_prediction_group_id, primary_model_path, actual_date, actuals_id, calc_score, raise_not_found=True) ds_actuals.drop(self.target_feature) ds_actuals.df = ds_actuals.df.rename( columns={'a2ml_actual': self.target_feature}) if not actuals_id: actuals_id = get_uid() file_name = str( actual_date or datetime.date.today()) + '_' + actuals_id + "_actuals.feather.zstd" ds_actuals.saveToFeatherFile( os.path.join(self.model_path, "predictions", file_name)) return result
def test_save_prediction(self): model_path = 'tests/fixtures/test_predict_by_model/iris' options = fsclient.read_json_file( os.path.join(model_path, "options.json")) prediction_id = "123" prediction_date = "today" results_file_path = os.path.join( model_path, "predictions", prediction_date + '_' + prediction_id + "_results.feather.zstd") predicted_file_path = os.path.join( model_path, "predictions", "iris_test_" + prediction_id + "_" + options.get('uid') + "_predicted.csv") ds = DataFrame.create_dataframe( os.path.join(model_path, "iris_test.csv")) fsclient.remove_file(results_file_path) self.assertFalse(fsclient.is_file_exists(results_file_path)) fsclient.remove_file(predicted_file_path) self.assertFalse(fsclient.is_file_exists(predicted_file_path)) res = ModelHelper.save_prediction(ds, prediction_id, support_review_model=True, json_result=False, count_in_result=False, prediction_date=prediction_date, model_path=model_path, model_id=options.get('uid')) self.assertEqual(res, predicted_file_path) self.assertTrue(fsclient.is_file_exists(predicted_file_path)) self.assertTrue(fsclient.is_file_exists(results_file_path)) ds = DataFrame.create_dataframe( os.path.join(model_path, "iris_test.csv")) fsclient.remove_file(results_file_path) self.assertFalse(fsclient.is_file_exists(results_file_path)) fsclient.remove_file(predicted_file_path) self.assertFalse(fsclient.is_file_exists(predicted_file_path)) res = ModelHelper.save_prediction(ds, prediction_id, support_review_model=True, json_result=True, count_in_result=False, prediction_date=prediction_date, model_path=model_path, model_id=options.get('uid')) res = json.loads(res) self.assertEqual(res['columns'], ds.columns) self.assertEqual(len(res['data']), 6) ds = DataFrame.create_dataframe( os.path.join(model_path, "iris_test.csv")) fsclient.remove_file(results_file_path) self.assertFalse(fsclient.is_file_exists(results_file_path)) fsclient.remove_file(predicted_file_path) self.assertFalse(fsclient.is_file_exists(predicted_file_path)) ds.options['data_path'] = None res = ModelHelper.save_prediction(ds, prediction_id, support_review_model=False, json_result=False, count_in_result=False, prediction_date=prediction_date, model_path=model_path, model_id=options.get('uid')) self.assertEqual(type(res[0]), dict) self.assertEqual(res[0][options['targetFeature']], 'setosa') ds = DataFrame.create_dataframe( os.path.join(model_path, "iris_test.csv")) fsclient.remove_file(results_file_path) self.assertFalse(fsclient.is_file_exists(results_file_path)) fsclient.remove_file(predicted_file_path) self.assertFalse(fsclient.is_file_exists(predicted_file_path)) ds.options['data_path'] = None ds.loaded_columns = ds.columns res = ModelHelper.save_prediction(ds, prediction_id, support_review_model=False, json_result=False, count_in_result=False, prediction_date=prediction_date, model_path=model_path, model_id=options.get('uid')) self.assertEqual(res['columns'], ds.columns) self.assertEqual(len(res['data']), 6) self.assertEqual(type(res['data'][0]), list)