def kfold_train(gamma=5):

	# with open('../data/node_dict.pkl','r') as f:
	#    graph_nodes = cPickle.load( f )
	graph_nodes = build_graph()

	lat_lng_df = pd.read_pickle('../data/all_intersections_df.pkl')

	df_train = pd.read_pickle('../data/train_df.pkl')
	X = df_train.loc[:, ['X', 'Y']].values
	y = df_train.loc[:, 'Category'].values

	gc = GraphClassifier(graph_nodes, lat_lng_df, gamma=gamma)	
	gc.fit(X, y)

	with open('../data/graph_class_gamma-{0}.pkl'.format(gamma),'w') as f:
	    cPickle.dump( gc, f, -1 )
def yst_train(gamma=20):

	# with open('../data/node_dict.pkl','r') as f:
	#    graph_nodes = cPickle.load( f )
	graph_nodes = build_graph()

	lat_lng_df = pd.read_pickle('../data/all_intersections_df.pkl')

	df_train = pd.read_pickle('../data/train_df_yst.pkl')
	X = df_train.loc[:, ['X', 'Y']].values
	y = df_train.loc[:, 'Category'].values

	# yst_columns = [ 'year_group', 'season', 'time_of_day', 'weekend' ]
	yst_columns = [ 'year', 'month', 'DayOfWeek' ]
	yst_column_values = [ df_train[col].values for col in yst_columns ]
	yst_dict = { col: val for col, val in izip(yst_columns, yst_column_values) }

	gc = GraphClassifier(graph_nodes, lat_lng_df, gamma=gamma)	
	gc.fit(X, y, yst_dict)

	with open('../data/graph_class_yst_gamma-{0}.pkl'.format(gamma),'w') as f:
	    cPickle.dump( gc, f, -1 )
def train_model(predict=True, use_resampling=False, num_resamples=10):

	graph_nodes = build_graph()

	with open('../data/node_dict.pkl','w') as f:
	    cPickle.dump( graph_nodes, f, -1 )

	lat_lng_df = pd.read_pickle('../data/all_intersections_df.pkl')

	df_train = pd.read_csv('../data/train.csv', parse_dates=['Dates'])
	X = df_train.loc[:, ['X', 'Y']].values
	y = df_train.loc[:, 'Category'].values

	if use_resampling:
		resamples = num_resamples
	else:
		resamples = None

	gc = GraphClassifier(graph_nodes, lat_lng_df, num_resamples=resamples)	
	gc.fit(X, y)

	# save fit model
	if use_resampling:
		with open('../data/graph_class_resample_{0}.pkl'.format(num_resamples),'w') as f:
		    cPickle.dump( gc, f, -1 )
	else:
		with open('../data/graph_class_normal.pkl','w') as f:
		    cPickle.dump( gc, f, -1 )

	if predict:
		predict_df = gc.predict(X)

		print "\nsaving submission..."
		with gzip.open('../data/submissions/submission_training_data.csv.gz', 'w') as f:
			predict_df.to_csv(f, index=False)
		print "Done!"