def outputResultsClassificationWithMajorityClass(out1, out2, out1DecisionValues, out2DecisionValues, train_lt, test_lt, test_majorityClass):
	# Output the results to the appropriate output files
	writeFloatList(out1, TRAINPREDICTIONSEPSILONFILENAME)
	writeFloatList(out2, VALIDATIONPREDICTIONSEPSILONFILENAME)
	
	numTrainCorrect = 0
	for i in range(len(train_lt)):
		# Iterate through training labels and count the number that are the same as the predicted labels
		if out1[i] == train_lt[i]:
			# The current prediction is correct
			numTrainCorrect = numTrainCorrect + 1
	fracTrainCorrect = float(numTrainCorrect)/float(len(train_lt))
	print "Training accuracy:"
	print fracTrainCorrect
	
	trainLabels = BinaryLabels(train_lt)
	evaluatorTrain = ROCEvaluation()
	evaluatorTrain.evaluate(out1DecisionValues, trainLabels)
	print "Training AUC:"
	print evaluatorTrain.get_auROC()
	
	numValidCorrect = 0
	numPosCorrect = 0
	numNegCorrect = 0
	numMajorityClassCorrect = 0
	numMinorityClassCorrect = 0
	for i in range(len(test_lt)):
		# Iterate through validation labels and count the number that are the same as the predicted labels
		if out2[i] == test_lt[i]:
			# The current prediction is correct
			numValidCorrect = numValidCorrect + 1
			if (out2[i] == 1) and (test_lt[i] == 1):
				# The prediction is a positive example
				numPosCorrect = numPosCorrect + 1
			else:
				numNegCorrect = numNegCorrect + 1
			if test_majorityClass[i] == 1:
				numMajorityClassCorrect = numMajorityClassCorrect + 1
			else:
				numMinorityClassCorrect = numMinorityClassCorrect + 1
	fracValidCorrect = float(numValidCorrect)/float(len(test_lt))
	print "Validation accuracy:"
	print fracValidCorrect
	print "Fraction of correct positive examples:"
	print float(numPosCorrect)/float(len(np.where(test_lt > 0)[0]))
	print "Fraction of correct negative examples:"
	print float(numNegCorrect)/float(len(np.where(test_lt <= 0)[0]))
	print "Fraction of correct majority class examples:"
	print float(numMajorityClassCorrect)/float(len(np.where(test_majorityClass > 0)[0]))
	print "Fraction of correct minority class examples:"
	print float(numMinorityClassCorrect)/float(len(np.where(test_majorityClass <= 0)[0]))
	
	validLabels = BinaryLabels(test_lt)
	evaluatorValid = ROCEvaluation()
	evaluatorValid.evaluate(out2DecisionValues, validLabels)
	print "Validation AUC:"
	print evaluatorValid.get_auROC()
def evaluation_rocevaluation_modular (ground_truth, predicted):
	from modshogun import BinaryLabels
	from modshogun import ROCEvaluation

	ground_truth_labels = BinaryLabels(ground_truth)
	predicted_labels = BinaryLabels(predicted)

	evaluator = ROCEvaluation()
	evaluator.evaluate(predicted_labels,ground_truth_labels)

	return evaluator.get_ROC(), evaluator.get_auROC()
예제 #3
0
def evaluation_rocevaluation_modular(ground_truth, predicted):
    from modshogun import BinaryLabels
    from modshogun import ROCEvaluation

    ground_truth_labels = BinaryLabels(ground_truth)
    predicted_labels = BinaryLabels(predicted)

    evaluator = ROCEvaluation()
    evaluator.evaluate(predicted_labels, ground_truth_labels)

    return evaluator.get_ROC(), evaluator.get_auROC()
예제 #4
0
def main():
	fright=open(r'circle counterclockwise/train_cc_10_dim.txt','r')
	fleft=open(r'letters/Z/train_Z_10_dim.txt','r')
	data_right=np.load(fright)
	data_left=np.load(fleft)
	lenr=len(data_right[0])
	lenl=len(data_left[0])
	dim=10
	endr=int(0.9*lenr)
	endl=int(0.9*lenl)
	data_right_train=data_right[:,0:endr]
	data_left_train=data_left[:,0:endl]
	data_right_test=data_right[:,endr:]
	data_left_test=data_left[:,endl:]
	
	len_right_train=len(data_right_train[0])
	len_left_train=len(data_left_train[0])
	len_right_test=len(data_right_test[0])
	len_left_test=len(data_left_test[0])
	
	label_right_train=np.ones(len_right_train)
	label_right_test=np.ones(len_right_test)
	
	label_left_train=-1*np.ones(len_left_train)
	label_left_test=-1*np.ones(len_left_test)
	
	C=10
	
	train_dataset=np.hstack((data_right_train,data_left_train))
	normalization_mean=[]
	normalization_std=[]
	
	for i in range(0,dim):
		mean1=np.mean(train_dataset[i])
		std1=np.std(train_dataset[i])
		train_dataset[i]=(train_dataset[i]-mean1)/std1
		normalization_mean.append(mean1)
		normalization_std.append(std1)
	feats_train=RealFeatures(train_dataset)
	f,axes=subplots(2,2)
	fig = figure()
	ax = fig.add_subplot(111, projection='3d')
	ax.scatter(data_right_train[0],data_right_train[1],data_right_train[2],c='r')
	ax.scatter(data_left_train[0],data_left_train[1],data_left_train[2],c='b')
	ax.set_xlabel('STD X')
	ax.set_ylabel('STD Y')
	ax.set_zlabel('STD Z')
	ax.set_title('3D PLOT OF STD')
	axes[0,0].plot(data_right_train[0],data_right_train[1],'*')
	axes[0,0].plot(data_left_train[0],data_left_train[1],'x')
	axes[0,0].set_xlabel('STDX')
	axes[0,0].set_ylabel('std y')
	axes[0,0].set_title('STDX VS stdy')
	axes[0,1].plot(data_right_train[1],data_right_train[2],'*')
	axes[0,1].plot(data_left_train[1],data_left_train[2],'x')
	axes[0,1].set_xlabel('std Y')
	axes[0,1].set_ylabel('std Z')
	axes[0,1].set_title('stdY  VS stdZ')
	axes[1,0].plot(data_right_train[0],data_right_train[2],'*')
	axes[1,0].plot(data_left_train[0],data_left_train[2],'x')
	axes[1,0].set_xlabel('std X')
	axes[1,0].set_ylabel('std z')
	axes[1,0].set_title('std X VS stdz')
	show()
	train_labels=np.hstack((label_right_train,label_left_train))
	test_dataset=np.hstack((data_right_test,data_left_test))
	normalization_file=open('normalization_parameters_cc_vs_letters.txt','w')
	norm_params=[normalization_mean,normalization_std]
	norm_params=np.asarray(norm_params)
	np.save(normalization_file,norm_params)
	
	for i in range(0,dim):
		test_dataset[i]=(test_dataset[i]-normalization_mean[i])/normalization_std[i]
	
	labels_train=BinaryLabels(train_labels)
	
	
	print 'the length of test_dataset is ',len(test_dataset[0])
	feats_test=RealFeatures(test_dataset)
	
	test_labels=np.hstack((label_right_test,label_left_test))
	print 'the length is test_labels is ',len(test_labels)
	labels_test=BinaryLabels(test_labels)
	
	svm=LibLinear(C,feats_train,labels_train)
	epsilon=1e-3
	svm.set_epsilon(epsilon)
	svm.train()
	predictions=svm.apply(feats_test)
	predictions_on_train=svm.apply(feats_train)
	evaluator1=ROCEvaluation()
	evaluator2=AccuracyMeasure()
	
	evaluator1.evaluate(predictions_on_train,labels_train)
	evaluator2.evaluate(predictions,labels_test)
	file_name="z_vs_cc/accuracy="+ str(evaluator2.get_accuracy()) + " liblinear_cc_vs_Z_svm_classifier_with_C_10_and_normalized.h5"
	
	f3=SerializableHdf5File(file_name, "w")
	svm.save_serializable(f3)
	p_test=predictions.get_labels()
	for i in range(0,len(test_labels)):
		print 'predicted : ',p_test[i] ,' and actual ',test_labels[i] 
	print 'the Area under the curve is ',evaluator1.get_auROC()
	print 'the accuracy is ',evaluator2.get_accuracy()*100
	evaluator1.evaluate(predictions,labels_test)
	print 'the auc for test set is  ',evaluator1.get_auROC()
예제 #5
0
## plot points
subplot(211)
plot(pos[0,:], pos[1,:], "r.")
plot(neg[0,:], neg[1,:], "b.")
grid(True)
title('Data',size=10)

# plot ROC for SVM
subplot(223)
ROC_evaluation=ROCEvaluation()
ROC_evaluation.evaluate(svm.apply(),labels)
roc = ROC_evaluation.get_ROC()
print roc
plot(roc[0], roc[1])
fill_between(roc[0],roc[1],0,alpha=0.1)
text(mean(roc[0])/2,mean(roc[1])/2,'auROC = %.5f' % ROC_evaluation.get_auROC())
grid(True)
xlabel('FPR')
ylabel('TPR')
title('LibSVM (Gaussian kernel, C=%.3f) ROC curve' % svm.get_C1(),size=10)

# plot ROC for LDA
subplot(224)
ROC_evaluation.evaluate(lda.apply(),labels)
roc = ROC_evaluation.get_ROC()
plot(roc[0], roc[1])
fill_between(roc[0],roc[1],0,alpha=0.1)
text(mean(roc[0])/2,mean(roc[1])/2,'auROC = %.5f' % ROC_evaluation.get_auROC())
grid(True)
xlabel('FPR')
ylabel('TPR')