Example #1
0
def recursiveFeatureElimination():
	with DB() as db:
		POIs = getPointsOfInterest()
		numRows, numCols = int(math.sqrt(len(POIs))), int(math.sqrt(len(POIs))) + 1

		# for hour in xrange(24):
		plt.figure()
		plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.5, hspace=0.5)
		fignum = 1
		for POI in POIs:
			x, y = loadData(db, POI['LAT'], POI['LONG'], generateAllFeatures)
			x, y = np.array(x), np.array(y)

			# Create the RFE object and compute a cross-validated score.
			svr = SVR(kernel="linear")
			rfecv = RFECV(estimator=svr, step=1, cv=StratifiedKFold(y, 2), scoring='accuracy')
			rfecv.fit(x, y)

			print("Optimal number of features : %d" % rfecv.n_features_)

			# Plot number of features VS. cross-validation scores
			plt.subplot(numRows, numCols, fignum)
			plt.title(POI['NAME'])
			plt.xlabel("Number of features selected")
			plt.ylabel("Cross validation score (nb of misclassifications)")
			plt.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_)

			fignum += 1
	plt.show()
Example #2
0
File: SVM.py Project: ncvc/BigData
def fitPipeline(db, latitude, longitude, generatePipeline):
	print 'Loading Data'
	x, y = loadData(db, latitude, longitude, generateAllFeatures)

	print 'Generating pipeline'
	pipeline = generatePipeline(x)

	print 'Training SVR'
	start = time.clock()
	pipeline.fit(x, y)
	print 'Total Training time:', time.clock() - start
	return pipeline
Example #3
0
def plot(generateX, xLabel='x', yLabel='Taxi Pickups', includeFunc=None):
	with DB() as db:
		POIs = getPointsOfInterest()
		numRows, numCols = int(math.sqrt(len(POIs))), int(math.sqrt(len(POIs))) + 1

		# for hour in xrange(24):
		plt.figure()
		plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.5, hspace=0.5)
		fignum = 1
		for POI in POIs:
			print 'POI', POI
			x, y = loadData(db, POI['LAT'], POI['LONG'], generateX, includeFunc=includeFunc)

			plt.subplot(numRows, numCols, fignum)
			plt.scatter(x, y)
			plt.title(POI['NAME'])
			plt.xlabel(xLabel)
			plt.ylabel(yLabel)

			fignum += 1
	plt.show()
Example #4
0
def featureSelection():
	with DB() as db:
		POIs = getPointsOfInterest()
		numRows, numCols = int(math.sqrt(len(POIs))), int(math.sqrt(len(POIs))) + 1

		# for hour in xrange(24):
		plt.figure()
		plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.5, hspace=0.5)
		fignum = 1
		for POI in POIs:
			print POI
			x, y = loadData(db, POI['LAT'], POI['LONG'], generateAllFeaturesExceptWeather)
			x, y = np.array(x), np.array(y)

			###############################################################################
			width = 0.6

			x_indices = np.arange(x.shape[-1])

			###############################################################################
			# Univariate feature selection with F-test for feature scoring
			# We use the default selection function: the 10% most significant features
			selector = SelectPercentile(f_regression, percentile=10)
			selector.fit(x, y)
			scores = -np.log10(selector.pvalues_)
			# scores /= scores.max()

			plt.subplot(numRows, numCols, fignum)

			plt.bar(x_indices-(width/2), scores, width=width, color='g')
			plt.title(POI['NAME'])
			plt.xlabel('Feature number')
			plt.ylabel('Univariate score ($-Log(p_{value})$)')
			plt.xticks(x_indices)
			plt.axis('tight')
			plt.legend(loc='upper right')

			fignum += 1
	plt.show()