def predict_score_model(model, validation_set): ''' Input: Instantiated and fitted model, Pandas DataFrame Output: Floating point number Obtain predicted probabilities for the test data set, and then an roc_auc score for how good those probabilities were. ''' target, features = get_target_features(validation_set) preds, preds_probs = model.predict(features), model.predict_proba(features) scores = return_scores(target, preds, preds_probs) return scores['roc_auc_score']
''' st = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S') filename = './logs/' + model_name + '.txt' with open(filename, 'a+') as f: f.write(st + '\n') f.write('-' * 100 + '\n') f.write('Model Run: ' + model_name + '\n' * 2) f.write('Params: ' + str(fitted_model.get_params()) + '\n' * 2) f.write('Features: ' + ', '.join(train.columns) + '\n' * 2) f.write('Scores: ' + str(scores) + '\n' * 2) if __name__ == '__main__': # sys.argv[1] will hold the name of the model we want to run (logit, random forest, etc.), # and sys.argv[2] will hold our input dataframe (data will all the features and target). model_name = sys.argv[1] with open(sys.argv[2]) as f: input_df = pickle.load(f) train, test = tt_split_all_less60(input_df) model = get_model(model_name) fitted_model = fit_model(train, model) preds, preds_probs = predict_with_model(test, fitted_model) scores = return_scores(test.fire_bool, preds, preds_probs) log_results(model_name, train, fitted_model, scores)
if model_name == 'neural_net': train = normalize_df(train.drop('date_fire', axis=1)) test = normalize_df(test.drop('date_fire', axis=1)) ''' keep_list = ['conf'] train = train[keep_list] test = test[keep_list] train = train.drop(keep_list, axis=1) test = test.drop(keep_list, axis=1) ''' fitted_model, best_roc_auc = own_grid_search(model_name, train, test, train2) ''' roc_save_filename = 'roc_auc_' + model_name with open(roc_save_filename, 'w+') as f: pickle.dump(roc_auc_scores, f) ''' preds, preds_probs = predict_with_model(test, fitted_model) scores = return_scores(test.fire_bool, preds, preds_probs) log_results(model_name, train.drop('date_fire', axis=1), fitted_model, scores, best_roc_auc) filename = './model_output/' + model_name + '_preds_probs_daysprioryear_lessm_15.csv' output_model_preds(filename, model_name, preds_probs, test)