def predict(self, X, yst_dict):
		start = time()

		# change column order to lat, lng
		X = np.fliplr(X) 

		nrows = X.shape[0]

		categories = get_all_categories()
		ncols = len(categories)

		pred_X = np.zeros( (nrows, ncols) )

		for i, row in enumerate(X):
			if i % 100000 == 0:
				print i, (time() - start) / 60, "minutes"

			closest_cnn, distances = self._find_closest_intersection(row, 1)

			frequencies = np.zeros(39)
			for yst_col_vals in yst_dict.itervalues():
				row_yst = yst_col_vals[i]

				frequencies += np.asarray( self.graph_dict[ closest_cnn[0] ].get_yst_prob_freq( row_yst ) )
			
			pred_X[i] = frequencies / np.sum(frequencies)

		predict_df = pd.DataFrame(pred_X, columns=categories)
		predict_df.index.name = 'Id'
		predict_df = predict_df.reset_index()

		return predict_df
	def predict(self, X):
		start = time()

		# change column order to lat, lng
		X = np.fliplr(X) 

		nrows = X.shape[0]

		categories = get_all_categories()
		ncols = len(categories)

		pred_X = np.zeros( (nrows, ncols) )

		for i, row in enumerate(X):
			if i % 200000 == 0:
				print i, (time() - start) / 60, "minutes"

			closest_cnn, distances = self._find_closest_intersection(row, 1)
			probabilities = self.graph_dict[ closest_cnn[0] ].get_probabilities()
			pred_X[i] = probabilities


		predict_df = pd.DataFrame(pred_X, columns=categories)
		predict_df.index.name = 'Id'
		predict_df = predict_df.reset_index()

		return predict_df