def evaluation_thresholds_modular(index): from modshogun import Labels, ROCEvaluation import numpy numpy.random.seed(17) output=numpy.arange(-1,1,0.001) output=(0.3*output+0.7*(numpy.random.rand(len(output))-0.5)) label=[-1.0]*(len(output)/2) label.extend([1.0]*(len(output)/2)) label=numpy.array(label) pred=Labels(output) truth=Labels(label) evaluator=ROCEvaluation() evaluator.evaluate(pred, truth) [fp,tp]=evaluator.get_ROC() thresh=evaluator.get_thresholds() b=thresh[index] #print "tpr", numpy.mean(output[label>0]>b), tp[index] #print "fpr", numpy.mean(output[label<0]>b), fp[index] return tp[index],fp[index],numpy.mean(output[label>0]>b),numpy.mean(output[label<0]>b)
def outputResultsClassificationWithMajorityClass(out1, out2, out1DecisionValues, out2DecisionValues, train_lt, test_lt, test_majorityClass): # Output the results to the appropriate output files writeFloatList(out1, TRAINPREDICTIONSEPSILONFILENAME) writeFloatList(out2, VALIDATIONPREDICTIONSEPSILONFILENAME) numTrainCorrect = 0 for i in range(len(train_lt)): # Iterate through training labels and count the number that are the same as the predicted labels if out1[i] == train_lt[i]: # The current prediction is correct numTrainCorrect = numTrainCorrect + 1 fracTrainCorrect = float(numTrainCorrect)/float(len(train_lt)) print "Training accuracy:" print fracTrainCorrect trainLabels = BinaryLabels(train_lt) evaluatorTrain = ROCEvaluation() evaluatorTrain.evaluate(out1DecisionValues, trainLabels) print "Training AUC:" print evaluatorTrain.get_auROC() numValidCorrect = 0 numPosCorrect = 0 numNegCorrect = 0 numMajorityClassCorrect = 0 numMinorityClassCorrect = 0 for i in range(len(test_lt)): # Iterate through validation labels and count the number that are the same as the predicted labels if out2[i] == test_lt[i]: # The current prediction is correct numValidCorrect = numValidCorrect + 1 if (out2[i] == 1) and (test_lt[i] == 1): # The prediction is a positive example numPosCorrect = numPosCorrect + 1 else: numNegCorrect = numNegCorrect + 1 if test_majorityClass[i] == 1: numMajorityClassCorrect = numMajorityClassCorrect + 1 else: numMinorityClassCorrect = numMinorityClassCorrect + 1 fracValidCorrect = float(numValidCorrect)/float(len(test_lt)) print "Validation accuracy:" print fracValidCorrect print "Fraction of correct positive examples:" print float(numPosCorrect)/float(len(np.where(test_lt > 0)[0])) print "Fraction of correct negative examples:" print float(numNegCorrect)/float(len(np.where(test_lt <= 0)[0])) print "Fraction of correct majority class examples:" print float(numMajorityClassCorrect)/float(len(np.where(test_majorityClass > 0)[0])) print "Fraction of correct minority class examples:" print float(numMinorityClassCorrect)/float(len(np.where(test_majorityClass <= 0)[0])) validLabels = BinaryLabels(test_lt) evaluatorValid = ROCEvaluation() evaluatorValid.evaluate(out2DecisionValues, validLabels) print "Validation AUC:" print evaluatorValid.get_auROC()
def evaluation_rocevaluation_modular (ground_truth, predicted): from modshogun import BinaryLabels from modshogun import ROCEvaluation ground_truth_labels = BinaryLabels(ground_truth) predicted_labels = BinaryLabels(predicted) evaluator = ROCEvaluation() evaluator.evaluate(predicted_labels,ground_truth_labels) return evaluator.get_ROC(), evaluator.get_auROC()
def evaluate(predicted_labels, labels, prefix="Results"): from modshogun import PRCEvaluation, ROCEvaluation, AccuracyMeasure prc_evaluator = PRCEvaluation() roc_evaluator = ROCEvaluation() acc_evaluator = AccuracyMeasure() auPRC = prc_evaluator.evaluate(predicted_labels, labels) auROC = roc_evaluator.evaluate(predicted_labels, labels) acc = acc_evaluator.evaluate(predicted_labels, labels) print ('{0}: auPRC = {1:.5f}, auROC = {2:.5f}, acc = {3:.5f} '+ '({4}% incorrectly classified)').format( prefix, auPRC, auROC, acc, (1-acc)*100)
def evaluate(predicted_labels, labels, prefix="Results"): from modshogun import PRCEvaluation, ROCEvaluation, AccuracyMeasure prc_evaluator = PRCEvaluation() roc_evaluator = ROCEvaluation() acc_evaluator = AccuracyMeasure() auPRC = prc_evaluator.evaluate(predicted_labels, labels) auROC = roc_evaluator.evaluate(predicted_labels, labels) acc = acc_evaluator.evaluate(predicted_labels, labels) print('{0}: auPRC = {1:.5f}, auROC = {2:.5f}, acc = {3:.5f} ' + '({4}% incorrectly classified)').format(prefix, auPRC, auROC, acc, (1 - acc) * 100)
def evaluation_thresholds_modular(index): from modshogun import Labels, ROCEvaluation import numpy numpy.random.seed(17) output = numpy.arange(-1, 1, 0.001) output = (0.3 * output + 0.7 * (numpy.random.rand(len(output)) - 0.5)) label = [-1.0] * (len(output) // 2) label.extend([1.0] * (len(output) // 2)) label = numpy.array(label) pred = Labels(output) truth = Labels(label) evaluator = ROCEvaluation() evaluator.evaluate(pred, truth) [fp, tp] = evaluator.get_ROC() thresh = evaluator.get_thresholds() b = thresh[index] #print("tpr", numpy.mean(output[label>0]>b), tp[index]) #print("fpr", numpy.mean(output[label<0]>b), fp[index]) return tp[index], fp[index], numpy.mean(output[label > 0] > b), numpy.mean( output[label < 0] > b)
def evaluation_rocevaluation_modular(ground_truth, predicted): from modshogun import BinaryLabels from modshogun import ROCEvaluation ground_truth_labels = BinaryLabels(ground_truth) predicted_labels = BinaryLabels(predicted) evaluator = ROCEvaluation() evaluator.evaluate(predicted_labels, ground_truth_labels) return evaluator.get_ROC(), evaluator.get_auROC()
def outputResultsClassification(out1, out2, out1DecisionValues, out2DecisionValues, train_lt, test_lt): # Output the results to the appropriate output files writeFloatList(out1, TRAINPREDICTIONSEPSILONFILENAME) writeFloatList(out2, VALIDATIONPREDICTIONSEPSILONFILENAME) numTrainCorrect = 0 for i in range(len(train_lt)): # Iterate through training labels and count the number that are the same as the predicted labels if out1[i] == train_lt[i]: # The current prediction is correct numTrainCorrect = numTrainCorrect + 1 fracTrainCorrect = float(numTrainCorrect) / float(len(train_lt)) print "Training accuracy:" print fracTrainCorrect numValidCorrect = 0 numPosCorrect = 0 numNegCorrect = 0 for i in range(len(test_lt)): # Iterate through validation labels and count the number that are the same as the predicted labels if out2[i] == test_lt[i]: # The current prediction is correct numValidCorrect = numValidCorrect + 1 if (out2[i] == 1) and (test_lt[i] == 1): # The prediction is a positive example numPosCorrect = numPosCorrect + 1 else: numNegCorrect = numNegCorrect + 1 fracValidCorrect = float(numValidCorrect) / float(len(test_lt)) print "Validation accuracy:" print fracValidCorrect print "Number of correct positive examples:" print numPosCorrect print "Number of correct negative examples:" print numNegCorrect validLabels = BinaryLabels(test_lt) evaluatorValid = ROCEvaluation() evaluatorValid.evaluate(out2DecisionValues, validLabels) print "Validation AUC:" print evaluatorValid.get_auROC()
def main(): fright=open(r'circle counterclockwise/train_cc_10_dim.txt','r') fleft=open(r'letters/Z/train_Z_10_dim.txt','r') data_right=np.load(fright) data_left=np.load(fleft) lenr=len(data_right[0]) lenl=len(data_left[0]) dim=10 endr=int(0.9*lenr) endl=int(0.9*lenl) data_right_train=data_right[:,0:endr] data_left_train=data_left[:,0:endl] data_right_test=data_right[:,endr:] data_left_test=data_left[:,endl:] len_right_train=len(data_right_train[0]) len_left_train=len(data_left_train[0]) len_right_test=len(data_right_test[0]) len_left_test=len(data_left_test[0]) label_right_train=np.ones(len_right_train) label_right_test=np.ones(len_right_test) label_left_train=-1*np.ones(len_left_train) label_left_test=-1*np.ones(len_left_test) C=10 train_dataset=np.hstack((data_right_train,data_left_train)) normalization_mean=[] normalization_std=[] for i in range(0,dim): mean1=np.mean(train_dataset[i]) std1=np.std(train_dataset[i]) train_dataset[i]=(train_dataset[i]-mean1)/std1 normalization_mean.append(mean1) normalization_std.append(std1) feats_train=RealFeatures(train_dataset) f,axes=subplots(2,2) fig = figure() ax = fig.add_subplot(111, projection='3d') ax.scatter(data_right_train[0],data_right_train[1],data_right_train[2],c='r') ax.scatter(data_left_train[0],data_left_train[1],data_left_train[2],c='b') ax.set_xlabel('STD X') ax.set_ylabel('STD Y') ax.set_zlabel('STD Z') ax.set_title('3D PLOT OF STD') axes[0,0].plot(data_right_train[0],data_right_train[1],'*') axes[0,0].plot(data_left_train[0],data_left_train[1],'x') axes[0,0].set_xlabel('STDX') axes[0,0].set_ylabel('std y') axes[0,0].set_title('STDX VS stdy') axes[0,1].plot(data_right_train[1],data_right_train[2],'*') axes[0,1].plot(data_left_train[1],data_left_train[2],'x') axes[0,1].set_xlabel('std Y') axes[0,1].set_ylabel('std Z') axes[0,1].set_title('stdY VS stdZ') axes[1,0].plot(data_right_train[0],data_right_train[2],'*') axes[1,0].plot(data_left_train[0],data_left_train[2],'x') axes[1,0].set_xlabel('std X') axes[1,0].set_ylabel('std z') axes[1,0].set_title('std X VS stdz') show() train_labels=np.hstack((label_right_train,label_left_train)) test_dataset=np.hstack((data_right_test,data_left_test)) normalization_file=open('normalization_parameters_cc_vs_letters.txt','w') norm_params=[normalization_mean,normalization_std] norm_params=np.asarray(norm_params) np.save(normalization_file,norm_params) for i in range(0,dim): test_dataset[i]=(test_dataset[i]-normalization_mean[i])/normalization_std[i] labels_train=BinaryLabels(train_labels) print 'the length of test_dataset is ',len(test_dataset[0]) feats_test=RealFeatures(test_dataset) test_labels=np.hstack((label_right_test,label_left_test)) print 'the length is test_labels is ',len(test_labels) labels_test=BinaryLabels(test_labels) svm=LibLinear(C,feats_train,labels_train) epsilon=1e-3 svm.set_epsilon(epsilon) svm.train() predictions=svm.apply(feats_test) predictions_on_train=svm.apply(feats_train) evaluator1=ROCEvaluation() evaluator2=AccuracyMeasure() evaluator1.evaluate(predictions_on_train,labels_train) evaluator2.evaluate(predictions,labels_test) file_name="z_vs_cc/accuracy="+ str(evaluator2.get_accuracy()) + " liblinear_cc_vs_Z_svm_classifier_with_C_10_and_normalized.h5" f3=SerializableHdf5File(file_name, "w") svm.save_serializable(f3) p_test=predictions.get_labels() for i in range(0,len(test_labels)): print 'predicted : ',p_test[i] ,' and actual ',test_labels[i] print 'the Area under the curve is ',evaluator1.get_auROC() print 'the accuracy is ',evaluator2.get_accuracy()*100 evaluator1.evaluate(predictions,labels_test) print 'the auc for test set is ',evaluator1.get_auROC()
gk=GaussianKernel(features, features, 1.0) svm = LibSVM(1000.0, gk, labels) svm.train() lda=LDA(1,features,labels) lda.train() ## plot points subplot(211) plot(pos[0,:], pos[1,:], "r.") plot(neg[0,:], neg[1,:], "b.") grid(True) title('Data',size=10) # plot ROC for SVM subplot(223) ROC_evaluation=ROCEvaluation() ROC_evaluation.evaluate(svm.apply(),labels) roc = ROC_evaluation.get_ROC() print roc plot(roc[0], roc[1]) fill_between(roc[0],roc[1],0,alpha=0.1) text(mean(roc[0])/2,mean(roc[1])/2,'auROC = %.5f' % ROC_evaluation.get_auROC()) grid(True) xlabel('FPR') ylabel('TPR') title('LibSVM (Gaussian kernel, C=%.3f) ROC curve' % svm.get_C1(),size=10) # plot ROC for LDA subplot(224) ROC_evaluation.evaluate(lda.apply(),labels) roc = ROC_evaluation.get_ROC()