コード例 #1
0
def processDataset(datafile_train, datafile_test, resultfile_train, resultfile_test, p, k):
		logf = logging.getLogger(__name__)
		[x_full,y_full] = readData(datafile_train, p, k)
		yt=array(y_full)
		y = reorder(yt, [0, 6, 4, 9, 1, 7, 2, 11, 8, 5, 3, 10])
		x = array(normalize_scale(x_full))

		#[xtest_f,ytest_f] = readData(datafile_test, p, k)
		#ytest=ytest_f
		#xtest = normalize_scale(xtest_f, x_full)

		#outf_train = open(resultfile_train, "w")
		#outf_test  = open(resultfile_test, "w")

		ystrat = stratifier(y, 5)
		skfold = StratifiedKFold(ystrat, 5)
		lossHl = []; lossSl = []; lossRl = []; lossNrl = []; lossOe = []; lossAvprec = [];
		for train, test in skfold:
			xx = x[train]; yy = y[train]
			ystrat2 = stratifier(yy, 2)
			bestC = 2**-14
			bestMSE = 10000000000
			for C in [2**i for i in range(-14, 14, 4)]:
				sss = StratifiedShuffleSplit(ystrat2, 2, test_size=0.2, random_state=16)
				squaredErrors = []
				for train_index, test_index in sss: # 2 times
					xtr = xx[train_index]
					ytr = yy[train_index]
					W,P,_meanY = kde(xtr,ytr,C)
					[yp_p, yp] = predictkde(xx[test_index], W, P, ytr)
					squaredError = mse(yp_p, yy[test_index])
					squaredErrors.append(squaredError)
				meanSquaredError = mean(squaredErrors)
				if meanSquaredError < bestMSE:
					bestMSE = meanSquaredError
					bestC = C
			#train based on bestC
			W,P,_meanY = kde(xx,yy,bestC)

			# predict
			print "predicting..."
			[yp_p, yp] = predictkde(x[test], W, P, yy)
			[hl, sl, rl, nrl, oe, avprec] = computeMetrics(yp, yp_p, y[test])
			lossHl.append(hl); lossSl.append(sl); lossRl.append(rl); 
			lossNrl.append(nrl); lossOe.append(oe); lossAvprec.append(avprec);
		print "After training, average performance over 5 folds:"
		print "\tHL: ",array(lossHl).mean(), " +- ", array(lossHl).std()
		print "\tSL: ",array(lossSl).mean(), " +- ", array(lossSl).std()
		print "\tRL: ",array(lossRl).mean(), " +- ", array(lossRl).std()
		print "\tNRL: ",array(lossNrl).mean(), " +- ", array(lossNrl).std()
		print "\tOE: ",array(lossOe).mean(), " +- ", array(lossOe).std()
		print "\tAVPREC: ",array(lossAvprec).mean(), " +- ", array(lossAvprec).std()
		
		'''
コード例 #2
0
ファイル: run_kde.py プロジェクト: abhishek-kumar/PCC
def processDataset(datafile_train, datafile_test, resultfile_train, resultfile_test, p, k):
		logf = logging.getLogger(__name__)
		[x_full,y_full] = readData(datafile_train, p, k)
		y=array(y_full)
		x = array(normalize_scale(x_full))

		[xtest_f,ytest_f] = readData(datafile_test, p, k)
		ytest=array(ytest_f)
		xtest = array(normalize_scale(xtest_f, x_full))

		outf_test  = open(resultfile_test, "w")

		logf.info("Training KDE...")
		ystrat = stratifier(y, 5)
		bestC = 2**-14
		bestMSE = 10000000000
		for C in [2**i for i in range(-14, 14, 1)]:
			sss = StratifiedShuffleSplit(ystrat, 5, test_size=0.2, random_state=16)
			squaredErrors = []
			for train_index, test_index in sss: # 5 times
				xtr = x[train_index]
				ytr = y[train_index]
				W,P,_meanY = kde(xtr,ytr,C)
				[yp_p, yp] = predictkde(x[test_index], W, P, ytr)
				squaredError = mse(yp_p, y[test_index])
				squaredErrors.append(squaredError)
			meanSquaredError = mean(squaredErrors)
			if meanSquaredError < bestMSE:
				bestMSE = meanSquaredError
				bestC = C
		#train based on bestC
		W,P,_meanY = kde(x,y,bestC)
		logf.info("Training complete. Best C: " + str(bestC) + "\tAverage MSE using CV: " + str(bestMSE))

		#predict on the final test set
		[yp_p, yp] = predictkde(xtest, W, P, y)

		[hl, sl, rl, nrl, oe, avprec] = computeMetrics(yp, yp_p, ytest)
		#ll = logLoss(yp_p, ytest)
		logf.info("KDE: Test Set Hamming Loss: " + str(hl))
		logf.info("KDE Test Set 0-1 Loss: " + str(sl))
		logf.info("KDE Test Set Rank Loss: " + str(rl))
		#logf.info("KDE Test Set Log Loss: " + str(ll))
		logf.info("KDE Test Set Normalized Rank Loss: " + str(nrl))
		logf.info("KDE Test Set One-Error: " + str(oe))
		logf.info("KDE Test Set Avg Prec: " + str(avprec))
		print >>outf_test, "KDE\t" + str(hl) + "\t" + str(sl) + "\t" + str(rl) 

		outf_test.close()