Exemple #1
0
def own_grid_search(model_name, train_data, test_data, train_data2): 
	'''
	Input: String, Pandas DataFrame, Pandas DataFrame
	Output: Best fit model from grid search of parameters. 
	'''
	model = get_model(model_name, train_data)
	if isinstance(model, keras.models.Sequential): 
	    model =  fit_neural_net(model, train_data, test_data)
	    return model
	roc_auc_scores_list = []  
	grid_parameters = get_grid_params(model_name)
	param_names, param_combs = prepare_grid_params(grid_parameters)  
	for idx, param_comb in enumerate(param_combs): 
		output_dict = defaultdict(list)
		param_dict = {}
		output_dict['model'] = idx
		for idx, param in enumerate(param_names): 
			output_dict[param] = param_comb[idx]
			param_dict[param] = param_comb[idx]
		for months_forward in xrange(0, 78, 2): 
			date_split = train.date_fire.max() - datetime.timedelta(weeks=months_forward)
			training_set, validation_set = tt_split_same_months(train, 2013, [1], days_back=14, exact_split_date = date_split, direct_prior_days=False, add_test=True)
			# for months_forward in xrange(0, 132, 2): 
			# for months_forward in xrange(0, 33, 1): 
			# date_split = datetime.date(2013, 1, 1)
			# training_set, validation_set = tt_split_early_late(train, date_split, months_forward, months_backward=0.5, days_forward=2, weeks_forward=months_forward)
			# training_set, validation_set = tt_split_same_months(train, 2013, [months_forward], days_back=None)	
			# training_set, validation_set = tt_split_early_late(train, input_date, months_forward, months_backward=0.5, days_forward=30)
			# If there are no actual fires here, then training/testing on it is pointless and the ROC 
			# area under the curve can't be calculated. 
			print training_set.shape, validation_set.shape
			if validation_set.fire_bool.sum() > 0 and training_set.fire_bool.sum() > 0: 
				model = fit_model(model, param_dict, training_set.drop('date_fire', axis=1))
				roc_auc_score = predict_score_model(model, validation_set.drop('date_fire', axis=1))
				output_dict['roc_auc'].append(roc_auc_score)
		roc_auc_scores_list.append(output_dict)

	del train['year']
	del train['month']

	roc_save_filename = './model_output/roc_auc_daysprioryear_lessm_15_' + model_name
	with open(roc_save_filename, 'w+') as f: 
		pickle.dump(roc_auc_scores_list, f)
	best_params, best_roc_auc = return_best_params(roc_auc_scores_list) 
	model = fit_model(model, best_params, train_data2.drop('date_fire', axis=1))

	return model, best_roc_auc
Exemple #2
0
if __name__ == '__main__':
    with open(sys.argv[1]) as f:
        input_df = pickle.load(f)

    days_back = 60
    train, test = tt_split_all_less_n_days(input_df, days_back=days_back)
    train.loc[:, 'date_fire'] = pd.to_datetime(train['date_fire'].copy())

    for months_forward in xrange(0, 78, 2):
        date_split = train.date_fire.max() - datetime.timedelta(
            weeks=months_forward)
        training_set, validation_set = tt_split_same_months(
            train,
            2013, [1],
            days_back=14,
            exact_split_date=date_split,
            direct_prior_days=False,
            add_test=True)

        # training_set, validation_set = tt_split_early_late(train, date_split, months_forward, months_backward=None, days_forward=2, weeks_forward=months_forward)
        print months_forward
        for year in training_set.year.unique():
            print training_set.query('year == @year').date_fire.min(
            ), training_set.query('year == @year').date_fire.max()
        print 'on to validation'
        for year in validation_set.year.unique():
            print validation_set.query('year == @year').date_fire.min(
            ), validation_set.query('year == @year').date_fire.max()
        print '\n' * 2
        '''
Exemple #3
0
	return input_df

if __name__ == '__main__': 
	# sys.argv[1] will hold the name of the model we want to run (logit, random forest, etc.), 
	# and sys.argv[2] will hold our input dataframe (data will all the features and target). 
	model_name = sys.argv[1]

	with open(sys.argv[2]) as f: 
		input_df = pickle.load(f)


	days_back = 14
	train, test = tt_split_all_less_n_days(input_df, days_back=days_back)
	# train2, test2 = tt_split_early_late(train, train.date_fire.max(), months_forward = 0, months_backward=0.5)
	train2, test2 = tt_split_same_months(train, 2012, [1], days_back=14, exact_split_date=test.date_fire.max(), direct_prior_days=False, add_test=True)

	train_cols = train.columns
	test_cols = test.columns
	train2_cols = train2.columns
	test2_cols = test2.columns

	for col in train_cols: 
		if 'month' in col : 
			del train[col]

	for col in test_cols: 
		if 'month' in col: 
			del test[col]

	for col in train2_cols: 
import datetime
import pandas as pd
from data_manip.tt_splits import tt_split_all_less_n_days, tt_split_early_late, tt_split_same_months


if __name__ == '__main__': 
	with open(sys.argv[1]) as f: 
		input_df = pickle.load(f)

	days_back = 60
	train, test = tt_split_all_less_n_days(input_df, days_back=days_back)
        train.loc[:, 'date_fire'] = pd.to_datetime(train['date_fire'].copy())

	for months_forward in xrange(0, 78, 2): 
                date_split = train.date_fire.max() - datetime.timedelta(weeks=months_forward)
                training_set, validation_set = tt_split_same_months(train, 2013, [1], days_back=14, exact_split_date = date_split, direct_prior_days=False, add_test=True)

                # training_set, validation_set = tt_split_early_late(train, date_split, months_forward, months_backward=None, days_forward=2, weeks_forward=months_forward)
                print months_forward
                for year in training_set.year.unique(): 
                        print training_set.query('year == @year').date_fire.min(), training_set.query('year == @year').date_fire.max()
                print 'on to validation'
                for year in validation_set.year.unique(): 
                        print validation_set.query('year == @year').date_fire.min(), validation_set.query('year == @year').date_fire.max()
                print '\n' * 2

                '''
                print training_set.date_fire.min(), training_set.date_fire.max()
                print validation_set.date_fire.min(), validation_set.date_fire.max()
                print training_set.fire_bool.sum(), validation_set.fire_bool.sum()
                print '\n' * 2