def test_03_predict(self): """ ensure log file is created """ log_file = os.path.join("logs", "predict-test.log") if os.path.exists(log_file): os.remove(log_file) ## update the log country = 'all' y_pred = [0] y_proba = [0.6, 0.4] runtime = "00:00:02" model_version = 0.1 target_date = '2021-04-19' query = {'feature': [0]} update_predict_log(country, y_pred, y_proba, query, target_date, runtime, model_version, test=True) self.assertTrue(os.path.exists(log_file))
def test_04_predict(self): """ ensure that content can be retrieved from log file """ log_file = os.path.join("logs", "predict-test.log") ## update the log country = 'all' y_pred = [0] y_proba = [0.6, 0.4] runtime = "00:00:02" model_version = 0.1 target_date = '2021-04-19' query = {'feature': [0]} update_predict_log(country, y_pred, y_proba, query, target_date, runtime, model_version, test=True) df = pd.read_csv(log_file) logged_y_pred = [literal_eval(i) for i in df['y_pred'].copy()][-1] self.assertEqual(y_pred, logged_y_pred)
def test_01_create_pred_log(self): """ test that predict log is created """ logfile = the_testlogname("pred") if os.path.exists(logfile): os.remove(logfile) update_predict_log([0], [0, 0, 0], np.array([6.1, 2.8]), MODEL_VERSION, 0.05, True) self.assertTrue(os.path.exists(logfile))
def model_predict(query, model=None, test=False): """ example funtion to predict from model """ ## start timer for runtime time_start = time.time() ## input checks if isinstance(query, dict): query = pd.DataFrame(query) elif isinstance(query, pd.DataFrame): pass else: raise Exception("ERROR (model_predict) - invalid input. {} was given".format(type(query))) ## features check features = sorted(query.columns.tolist()) if features != ['petal_length', 'petal_width', 'sepal_length', 'sepal_width', ]: print("query features: {}".format(",".join(features))) raise Exception("ERROR (model_predict) - invalid features present") ## load model if needed if not model: model = model_load() ## output checking if len(query.shape) == 1: query = query.reshape(1, -1) ## make prediction and gather data for log entry y_pred = model.predict(query) y_proba = 'None' m, s = divmod(time.time()-time_start, 60) h, m = divmod(m, 60) runtime = "%03d:%02d:%02d"%(h, m, s) ## update the log file for i in range(query.shape[0]): update_predict_log(y_pred[i], y_proba, query.iloc[i].values.tolist(), runtime, MODEL_VERSION, test=test) return({'y_pred':y_pred, 'y_proba':y_proba})
def test_04_check_n_preds(self): """ test n predictions add n log lines """ n = 5 logfile = the_testlogname("pred") if not os.path.exists(logfile): before = 0 else: df = pd.read_csv(logfile, delimiter=',', quotechar='|') before = df.shape[0] for i in range(n): update_predict_log([0], [0, 0, 0], np.array([6.1, 2.8]), MODEL_VERSION, 0.05, True) df = pd.read_csv(logfile, delimiter=',', quotechar='|') after = df.shape[0] self.assertEqual(n, after - before)
def test_04_predict(self): """ ensure that content can be retrieved from log file """ log_file = os.path.join("logs", "model_predict", "predict-test.log") ## update the log y_pred = [0] y_proba = [0.6, 0.4] runtime = "00:00:02" model_version = 0.1 query = ['united_states', 24, 'aavail_basic', 8] update_predict_log(y_pred, y_proba, query, runtime, model_version, test=True) df = pd.read_csv(log_file) logged_y_pred = [literal_eval(i) for i in df['y_pred'].copy()][-1] self.assertEqual(y_pred, logged_y_pred)
def test_03_predict(self): """ ensure log file is created """ log_file = os.path.join("logs", "model_predict", "predict-test.log") if os.path.exists(log_file): os.remove(log_file) ## update the log y_pred = [0] y_proba = [0.6, 0.4] runtime = "00:00:02" model_version = 0.1 query = ['united_states', 24, 'aavail_basic', 8] update_predict_log(y_pred, y_proba, query, runtime, model_version, test=True) self.assertTrue(os.path.exists(log_file))
def model_predict(country, year, month, day, all_models=None, test=False): """ example funtion to predict from model """ ## start timer for runtime time_start = time.time() ## load model if needed if not all_models: all_data, all_models = model_load(training=False) ## input checks if country not in all_models.keys(): raise Exception( "ERROR (model_predict) - model for country '{}' could not be found" .format(country)) for d in [year, month, day]: if re.search("\D", d): raise Exception( "ERROR (model_predict) - invalid year, month or day") ## load data model = all_models[country] data = all_data[country] ## check date target_date = "{}-{}-{}".format(year, str(month).zfill(2), str(day).zfill(2)) print(target_date) if target_date not in data['dates']: raise Exception( "ERROR (model_predict) - date {} not in range {}-{}".format( target_date, data['dates'][0], data['dates'][-1])) date_indx = np.where(data['dates'] == target_date)[0][0] query = data['X'].iloc[[date_indx]] ## sainty check if data['dates'].shape[0] != data['X'].shape[0]: raise Exception("ERROR (model_predict) - dimensions mismatch") ## make prediction and gather data for log entry y_pred = model.predict(query) y_proba = None if 'predict_proba' in dir(model) and 'probability' in dir(model): if model.probability == True: y_proba = model.predict_proba(query) m, s = divmod(time.time() - time_start, 60) h, m = divmod(m, 60) runtime = "%03d:%02d:%02d" % (h, m, s) ## update predict log update_predict_log(country, y_pred, y_proba, query.to_dict(), target_date, runtime, MODEL_VERSION, test=test) return ({'y_pred': y_pred, 'y_proba': y_proba})