예제 #1
0
def predict_score_model(model, validation_set): 
	'''
	Input: Instantiated and fitted model, Pandas DataFrame
	Output: Floating point number	

	Obtain predicted probabilities for the test data set, and then an roc_auc score for how good 
	those probabilities were. 
	'''

	target, features = get_target_features(validation_set)
	preds, preds_probs = model.predict(features), model.predict_proba(features)
	scores = return_scores(target, preds, preds_probs)

	return scores['roc_auc_score']
예제 #2
0
	'''

	st = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')
	filename = './logs/' + model_name + '.txt'
	with open(filename, 'a+') as f:
		f.write(st + '\n')
		f.write('-' * 100 + '\n')
		f.write('Model Run: ' + model_name + '\n' * 2)
		f.write('Params: ' + str(fitted_model.get_params()) + '\n' * 2)
		f.write('Features: ' + ', '.join(train.columns) + '\n' * 2)
		f.write('Scores: ' + str(scores) + '\n' * 2)

if __name__ == '__main__': 
	# sys.argv[1] will hold the name of the model we want to run (logit, random forest, etc.), 
	# and sys.argv[2] will hold our input dataframe (data will all the features and target). 
	model_name = sys.argv[1]

	with open(sys.argv[2]) as f: 
		input_df = pickle.load(f)
	
	train, test = tt_split_all_less60(input_df)
	model = get_model(model_name)
	fitted_model = fit_model(train, model)
	preds, preds_probs = predict_with_model(test, fitted_model)
	scores = return_scores(test.fire_bool, preds, preds_probs)
	log_results(model_name, train, fitted_model, scores)




예제 #3
0
	if model_name == 'neural_net': 
		train = normalize_df(train.drop('date_fire', axis=1))
		test = normalize_df(test.drop('date_fire', axis=1))


	'''
	keep_list = ['conf']
	train = train[keep_list]
	test = test[keep_list]
	train = train.drop(keep_list, axis=1)
	test = test.drop(keep_list, axis=1)
	'''
	
	fitted_model, best_roc_auc = own_grid_search(model_name, train, test, train2)

	'''
	roc_save_filename = 'roc_auc_' + model_name
	with open(roc_save_filename, 'w+') as f: 
		pickle.dump(roc_auc_scores, f)
	'''
	preds, preds_probs = predict_with_model(test, fitted_model)
	scores = return_scores(test.fire_bool, preds, preds_probs)
	log_results(model_name, train.drop('date_fire', axis=1), fitted_model, scores, best_roc_auc)


	filename = './model_output/' + model_name + '_preds_probs_daysprioryear_lessm_15.csv'
	output_model_preds(filename, model_name, preds_probs, test)