Beispiel #1
0
def svm_3_pca(train_x,train_y,kaggle_test):
	pcas = [100,500,1000]
	gammas = [0.001,0.01,0.1]
	cs = [1.0,10.0,100.0]
	accuracy_list = []
	m,n = train_x.shape
	kf = KFold(m, n_folds=2)
	for k in pcas:
		print k,'pca'
		accuracy_list_p = []
		train_x_transform,pca_f = pca(train_x,k)
		kaggle_test_transform = pca_f.transform(kaggle_test)
		# X_train, X_valid, Y_train, Y_valid = train_test_split(train_x_transform, train_y,test_size=0.2,random_state=0)
		for g in gammas:
			for c in cs:
				accs = []
				counter = 0 
				for train_index, test_index in kf:
					X_train, X_test,y_train, y_test = train_x[train_index],train_x[test_index],train_y[train_index],train_y[test_index]
					clf = svm.SVC(C= c,gamma=g)
					clf.fit(X_train,y_train)
					pred = clf.predict(X_test)
					# print type(pred)
					corr, acc = accuracy(y_test,pred)
					accs.append(acc)
					print "results of svm using pca "+str(counter)+'counter '+str(k)+'pca components '+str(c)+'c'+str(g)+'gamma ' + ":\n%s\n" %(metrics.classification_report(y_test,pred))
					fname2 = 'results/svm_pca/'+str(counter)+'counter'+str(k)+'pca components'+str(c)+'c'+str(g)+'gamma'+'cfm_svm_pca.png'
					cfm_svm_pca(pred,y_test,c,g,k,fname2)
					counter +=1

				acc_mean = np.mean(accs)
				accuracy_list.append((k,c,g,acc_mean))
				accuracy_list_p.append((c,g,acc_mean))

		fname = 'results/svm_pca/svm_pca_accuracy_'+str(k)+'pca.png'
		plot_svm_accuracy(accuracy_list_p,fname)

	sortedaccuracy_list = sorted(accuracy_list, key = lambda x:x[3], reverse = True)
	print sortedaccuracy_list
	pca_optimal = sortedaccuracy_list[0][0]
	c_optimal = sortedaccuracy_list[0][1]
	g_optimal = sortedaccuracy_list[0][2]

	train_x_transform,pca_f = pca(train_x,pca_optimal)
	kaggle_test_transform = pca_f.transform(kaggle_test)

	print "classifcation kaggle"
	clf = svm.SVC(C= c_optimal,gamma=g_optimal)
	clf.fit(train_x_transform,train_y)
	pred_kaggle_svm = clf.predict(kaggle_test_transform)
	fname3 = 'results/svm_pca/svm_pca_kaggle.csv'
	write_tokaggle(fname3,pred_kaggle_svm)
Beispiel #2
0
def svm1(X, X_valid, y, Y_valid,train_x,train_y,kaggle_test):
	#X,y-training set
	#X_valid -validationset
	#train_x,whole training set
	#X_test kaggle
	m, n = X.shape
	kf = KFold(m, n_folds=2)
	gammas = [0.001,0.01,0.1]
	cs = [1.0,10.0,100.0]
	accuracy_list = []
	for g in gammas:
		for c in cs:
			accs = []
			counter = 0 
			for train_index, test_index in kf:
				X_train, X_test = X[train_index], X[test_index]
				y_train, y_test = y[train_index], y[test_index]
				clf = svm.SVC(C= c,gamma=g)
				clf.fit(X_train,y_train)
				pred = clf.predict(X_test)
				# print type(pred)
				corr, acc = accuracy(y_test,pred)
				accs.append(acc)
				counter +=1
			acc_mean = np.mean(accs)
			accuracy_list.append((c,g,acc_mean))

	sortedaccuracy_list = sorted(accuracy_list, key = lambda x:x[2], reverse = True)
	print sortedaccuracy_list
	c_optimal = sortedaccuracy_list[0][0]
	g_optimal = sortedaccuracy_list[0][1]
	fname = 'results/svm/svm_accuracy.png'
	plot_svm_accuracy(accuracy_list,fname)

	# check validationset
	print "testing on validationset"
	clf = svm.SVC(C= c_optimal,gamma=g_optimal)
	clf.fit(X,y)
	pred__valid_svm = clf.predict(X_valid)
	print "results of svm using "+str(c_optimal)+'c'+str(g_optimal)+'gamma' + ":\n%s\n" %(metrics.classification_report(Y_valid,pred__valid_svm))
	fname2 = 'results/svm/'+str(c_optimal)+'c'+str(g_optimal)+'gamma'+'cfm_svm.png'
	cfm_svm(pred__valid_svm,Y_valid,c_optimal,g_optimal,fname2)
	# classifcation of kaggle testset
	print "classifcation kaggle"
	clf = svm.SVC(C= c_optimal,gamma=g_optimal)
	clf.fit(train_x,train_y)
	pred_kaggle_svm = clf.predict(kaggle_test)
	fname3 = 'results/svm/svm_kaggle.csv'
	write_tokaggle(fname3,pred_kaggle_svm)
Beispiel #3
0
def logistic_regression2_pca(train_x,train_y,kaggle_test):
	pcas = [100,500,1000]
	m,n = train_x.shape
	kf = KFold(m, n_folds=2)
	cs = [1.0,10.0,100.0]
	accuracy_list=[]

	for k in pcas:
		for c in cs:
			accs = []
			counter = 0 
			for train_index, test_index in kf:
				print "counter" + str(counter)
				X_train, X_test,y_train, y_test = train_x[train_index],train_x[test_index],train_y[train_index],train_y[test_index]
				clf = linear_model.LogisticRegression(C = c)
				clf.fit(X_train,y_train)
				pred = clf.predict(X_test)
				corr, acc = accuracy(y_test,pred)
				accs.append(acc)
				print "results of logistic_regression using pca "+str(counter)+'counter '+str(k)+'pca components '+str(c)+'lambda'+":\n%s\n" %(metrics.classification_report(y_test,pred))
				fname2 = 'results/logistic_regression_pca/'+str(counter)+'counter'+str(k)+'pca components'+str(c)+'lambda'+'cfm_logistic_regression_pca.png'
				cfm_logistic_regression_pca(pred,y_test,c,k,fname2)
				counter += 1

			acc_mean = np.mean(accs)
			accuracy_list.append((k,c,acc_mean))
	sortedaccuracy_list = sorted(accuracy_list, key = lambda x:x[2], reverse = True)
	print sortedaccuracy_list
	c_optimal = sortedaccuracy_list[0][1]
	pca_optimal = sortedaccuracy_list[0][0]
	fname = 'results/logistic_regression_pca/logistic_regression_accuracy_pca.png'
	plot_logistic_regression_accuracy_pca(accuracy_list,fname)

	train_x_transform,pca_f = pca(train_x,pca_optimal)
	kaggle_test_transform = pca_f.transform(kaggle_test)

	print "classifcation kaggle"
	clf = linear_model.LogisticRegression(C= c_optimal)
	clf.fit(train_x_transform,train_y)
	pred_kaggle_svm = clf.predict(kaggle_test_transform)
	fname3 = 'results/logistic_regression_pca/logistic_regression_pca_kaggle.csv'
	write_tokaggle(fname3,pred_kaggle_svm)
Beispiel #4
0
def logistic_regression(X, X_valid, y, Y_valid,train_x,train_y,kaggle_test):
	m, n = X.shape
	kf = KFold(m, n_folds=2)
	cs = [1.0,10.0,100.0]
	accuracy_list=[]
	for c in cs:
		accs = []
		counter = 0 
		for train_index, test_index in kf:
			print "counter" + str(counter)
			X_train, X_test = X[train_index], X[test_index]
			y_train, y_test = y[train_index], y[test_index]
			clf = linear_model.LogisticRegression(C = c)
			clf.fit(X_train,y_train)
			pred = clf.predict(X_test)
			corr, acc = accuracy(y_test,pred)
			accs.append(acc)
			counter +=1
		acc_mean = np.mean(accs)
		accuracy_list.append((c,acc_mean))
	sortedaccuracy_list = sorted(accuracy_list, key = lambda x:x[1], reverse = True)
	print sortedaccuracy_list
	c_optimal = sortedaccuracy_list[0][0]
	fname = 'results/logistic_regression/logistic_regression_accuracy.png'
	plot_logistic_regression_accuracy(accuracy_list,fname)
	# check validationset
	print "testing on validationset"
	clf = linear_model.LogisticRegression(C = c_optimal)
	clf.fit(X,y)
	pred_valid_logistic_regression = clf.predict(X_valid)
	print "results of LogisticRegression using "+str(c_optimal)+'Lambda regularization' + ":\n%s\n" %(metrics.classification_report(Y_valid,pred_valid_logistic_regression))
	fname2 = 'results/logistic_regression/'+str(c_optimal)+'lambda'+'cfm_logistic_regression.png'
	cfm_logistic_regression(pred_valid_logistic_regression,Y_valid,c_optimal,fname2)
	# classifcation of kaggle testset
	print "classifcation kaggle"
	clf = linear_model.LogisticRegression(C = c_optimal)
	clf.fit(train_x,train_y)
	pred_kaggle_logistic = clf.predict(kaggle_test)
	fname3 = 'results/logistic_regression/logistic_regression_kaggle.csv'
	write_tokaggle(fname3,pred_kaggle_logistic)
Beispiel #5
0
def svm2_pca(X, X_valid, y, Y_valid,train_x,train_y,kaggle_test):
	pcas = [100,500,1000]
	gammas = [0.001,0.01,0.1]
	cs = [1.0,10.0,100.0]
	accuracy_list = []
	for pca in pcas:
		print 'pca',pca
		accuracy_list_p = []
		pca = PCA(n_components = pca,whiten = True)
		train_x_p = pca.fit_transform(train_x)

		X_p = pca.transform(X)
		X_valid_p = pca.transform(X_valid)
		kaggle_test_p = pca.transform(kaggle_test)
		m, n = X_p.shape
		kf = KFold(m, n_folds=2)
		for g in gammas:
			for c in cs:
				accs = []
				counter = 0 
				for train_index, test_index in kf:
					X_train, X_test = X_p[train_index], X_p[test_index]
					y_train, y_test = y[train_index], y[test_index]
					clf = svm.SVC(C= c,gamma=g)
					clf.fit(X_train,y_train)
					pred = clf.predict(X_test)
					# print type(pred)
					corr, acc = accuracy(y_test,pred)
					accs.append(acc)
					counter +=1
				acc_mean = np.mean(accs)
				accuracy_list.append((pca,c,g,acc_mean))
				accuracy_list_p.append((c,g,acc_mean))
		fname = 'results/svm_pca/svm_pca_accuracy_'+str(pca)+'pca.png'
		plot_svm_accuracy(accuracy_list_p,fname)

	sortedaccuracy_list = sorted(accuracy_list, key = lambda x:x[3], reverse = True)
	print sortedaccuracy_list
	pca_optimal = sortedaccuracy_list[0][0]
	c_optimal = sortedaccuracy_list[0][1]
	g_optimal = sortedaccuracy_list[0][2]

	pca = PCA(n_components = pca_optimal,whiten = True)
	train_x_p = pca.fit_transform(train_x)
	X_p = pca.transform(X)
	X_valid_p = pca.transform(X_valid)
	kaggle_test_p = pca.transform(kaggle_test)
	# check validationset
	print "testing on validationset"

	clf = svm.SVC(C= c_optimal,gamma=g_optimal)
	clf.fit(X_p,y)
	pred__valid_svm = clf.predict(X_valid_p)
	print "results of svm using pca "+str(pca_optimal)+'pca components'+str(c_optimal)+'c'+str(g_optimal)+'gamma' + ":\n%s\n" %(metrics.classification_report(Y_valid,pred__valid_svm))
	fname2 = 'results/svm_pca/'+str(pca_optimal)+'pca components'+str(c_optimal)+'c'+str(g_optimal)+'gamma'+'cfm_svm_pca.png'
	cfm_svm(pred__valid_svm,Y_valid,c_optimal,g_optimal,fname2)
	# classifcation of kaggle testset
	print "classifcation kaggle"
	clf = svm.SVC(C= c_optimal,gamma=g_optimal)
	clf.fit(train_x_p,train_y)
	pred_kaggle_svm = clf.predict(kaggle_test_p)
	fname3 = 'results/svm_pca/svm_pca_kaggle.csv'
	write_tokaggle(fname3,pred_kaggle_svm)