def metric_lmnn_modular(train_fname=traindat, test_fname=testdat, label_train_fname=label_traindat, k=3): try: from modshogun import RealFeatures, MulticlassLabels, LMNN, KNN, CSVFile except ImportError: return # wrap features and labels into Shogun objects feats_train = RealFeatures(CSVFile(train_fname)) feats_test = RealFeatures(CSVFile(test_fname)) labels = MulticlassLabels(CSVFile(label_train_fname)) # LMNN lmnn = LMNN(feats_train, labels, k) lmnn.train() lmnn_distance = lmnn.get_distance() # perform classification with KNN knn = KNN(k, lmnn_distance, labels) knn.train() output = knn.apply(feats_test).get_labels() return lmnn, output
def run_knn(Xtrain,Ytrain,Xtest,Ytest): prod_features = RealFeatures(Xtrain) prod_labels = MulticlassLabels(Ytrain) test_features = RealFeatures(Xtest) test_labels = MulticlassLabels(Ytest) if os.path.exists(".lmnn_model30000_5_reg05_cor20"): print "Using LMNN distance" lmnn = LMNN() sf = SerializableAsciiFile(".lmnn_model30000_5_reg05_cor20", 'r') lmnn.load_serializable(sf) diagonal = np.diag(lmnn.get_linear_transform()) #print('%d out of %d elements are non-zero.' % (np.sum(diagonal != 0), diagonal.size)) #diagonal = lmnn.get_linear_transform() np.set_printoptions(precision=1,threshold=1e10,linewidth=500) #lmnn.set_diagonal(True) dist = lmnn.get_distance() else: dist = EuclideanDistance() # classifier knn = KNN(K, dist, prod_labels) #knn.set_use_covertree(True) parallel = knn.get_global_parallel() parallel.set_num_threads(4) knn.set_global_parallel(parallel) knn.train(prod_features) print "Classifying test set..." pred = knn.apply_multiclass(test_features) print "Accuracy = %2.2f%%" % (100*np.mean(pred == Ytest)) cm = build_confusion_matrix(Ytest, pred, NCLASSES) #save_confusion_matrix(cm) #cm = load_confusion_matrix() print "Confusion matrix: " print cm #plot_confusion_matrix(cm) #results = predict_class_prob(pred, cm) #nn = build_neighbours_matrix(knn, prod_labels) #results = predict_class_from_neighbours(nn) #print "Log loss: " + str(calculate_log_loss(results, Ytest)) #print_prediction_output(results) return cm
def lmnn_diagonal(train_features, train_labels, test_features, test_labels, k=1): from modshogun import LMNN, KNN, MSG_DEBUG, MulticlassAccuracy import numpy lmnn = LMNN(train_features, train_labels, k) lmnn.set_diagonal(True) lmnn.train() distance = lmnn.get_distance() knn = KNN(k, distance, train_labels) knn.train() train_output = knn.apply() test_output = knn.apply(test_features) evaluator = MulticlassAccuracy() print 'LMNN-diagonal training error is %.4f' % ((1-evaluator.evaluate(train_output, train_labels))*100) print 'LMNN-diagonal test error is %.4f' % ((1-evaluator.evaluate(test_output, test_labels))*100)
def lmnn(train_features, train_labels, test_features, test_labels, k=1): from modshogun import LMNN, KNN, MSG_DEBUG, MulticlassAccuracy import numpy # dummy = LMNN() # dummy.io.set_loglevel(MSG_DEBUG) lmnn = LMNN(train_features, train_labels, k) lmnn.train() distance = lmnn.get_distance() knn = KNN(k, distance, train_labels) knn.train() train_output = knn.apply() test_output = knn.apply(test_features) evaluator = MulticlassAccuracy() print 'LMNN training error is %.4f' % ((1-evaluator.evaluate(train_output, train_labels))*100) print 'LMNN test error is %.4f' % ((1-evaluator.evaluate(test_output, test_labels))*100)
def lmnn(train_features, train_labels, test_features, test_labels, k=1): from modshogun import LMNN, KNN, MSG_DEBUG, MulticlassAccuracy import numpy # dummy = LMNN() # dummy.io.set_loglevel(MSG_DEBUG) lmnn = LMNN(train_features, train_labels, k) lmnn.train() distance = lmnn.get_distance() knn = KNN(k, distance, train_labels) knn.train() train_output = knn.apply() test_output = knn.apply(test_features) evaluator = MulticlassAccuracy() print 'LMNN training error is %.4f' % ( (1 - evaluator.evaluate(train_output, train_labels)) * 100) print 'LMNN test error is %.4f' % ( (1 - evaluator.evaluate(test_output, test_labels)) * 100)
def metric_lmnn_modular(train_fname=traindat,test_fname=testdat,label_train_fname=label_traindat,k=3): try: from modshogun import RealFeatures,MulticlassLabels,LMNN,KNN,CSVFile except ImportError: return # wrap features and labels into Shogun objects feats_train=RealFeatures(CSVFile(train_fname)) feats_test=RealFeatures(CSVFile(test_fname)) labels=MulticlassLabels(CSVFile(label_train_fname)) # LMNN lmnn=LMNN(feats_train,labels,k) lmnn.train() lmnn_distance=lmnn.get_distance() # perform classification with KNN knn=KNN(k,lmnn_distance,labels) knn.train() output=knn.apply(feats_test).get_labels() return lmnn,output
def lmnn_classify(traindat, testdat, k=3): from modshogun import LMNN, KNN, MulticlassAccuracy, MSG_DEBUG train_features, train_labels = traindat.features, traindat.labels lmnn = LMNN(train_features, train_labels, k) lmnn.set_maxiter(1200) lmnn.io.set_loglevel(MSG_DEBUG) lmnn.train() distance = lmnn.get_distance() knn = KNN(k, distance, train_labels) knn.train() test_features, test_labels = testdat.features, testdat.labels predicted_labels = knn.apply(test_features) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(predicted_labels, test_labels) err = 1 - acc return err
def lmnn_classify(traindat, testdat, k=3): from modshogun import LMNN, KNN, MulticlassAccuracy, MSG_DEBUG train_features, train_labels = traindat.features, traindat.labels lmnn = LMNN(train_features, train_labels, k) lmnn.set_maxiter(1200) lmnn.io.set_loglevel(MSG_DEBUG) lmnn.train() distance = lmnn.get_distance() knn = KNN(k, distance, train_labels) knn.train() test_features, test_labels = testdat.features, testdat.labels predicted_labels = knn.apply(test_features) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(predicted_labels, test_labels) err = 1-acc return err
def lmnn_diagonal(train_features, train_labels, test_features, test_labels, k=1): from modshogun import LMNN, KNN, MSG_DEBUG, MulticlassAccuracy import numpy lmnn = LMNN(train_features, train_labels, k) lmnn.set_diagonal(True) lmnn.train() distance = lmnn.get_distance() knn = KNN(k, distance, train_labels) knn.train() train_output = knn.apply() test_output = knn.apply(test_features) evaluator = MulticlassAccuracy() print 'LMNN-diagonal training error is %.4f' % ( (1 - evaluator.evaluate(train_output, train_labels)) * 100) print 'LMNN-diagonal test error is %.4f' % ( (1 - evaluator.evaluate(test_output, test_labels)) * 100)
distance = EuclideanDistance(features, features) k = 2 knn = KNN(k, distance, labels) plot_data(x, y, axarr[0]) plot_neighborhood_graph(x, knn.nearest_neighbors(), axarr[0]) axarr[0].set_aspect('equal') axarr[0].set_xlim(-6, 4) axarr[0].set_ylim(-3, 2) lmnn = LMNN(features, labels, k) lmnn.set_maxiter(10000) lmnn.train() L = lmnn.get_linear_transform() knn.set_distance(lmnn.get_distance()) plot_data(x, y, axarr[1]) plot_neighborhood_graph(x, knn.nearest_neighbors(), axarr[1]) axarr[1].set_aspect('equal') axarr[1].set_xlim(-6, 4) axarr[1].set_ylim(-3, 2) xL = numpy.dot(x, L.T) ## to see the data after the linear transformation features = RealFeatures(xL.T) distance = EuclideanDistance(features, features) knn.set_distance(distance) plot_data(xL, y, axarr[2]) plot_neighborhood_graph(xL, knn.nearest_neighbors(), axarr[2]) axarr[2].set_aspect('equal')
prod_features = RealFeatures(Xtrain.T) prod_labels = MulticlassLabels(Ytrain.T) test_features = RealFeatures(Xtest.T) k = 5 # load LMNN if os.path.exists(".lmnn_model30000_5_reg05_cor20"): sf = SerializableAsciiFile(".lmnn_model30000_5_reg05_cor20", 'r') lmnn = LMNN() lmnn.load_serializable(sf) diagonal = np.diag(lmnn.get_linear_transform()) print('%d out of %d elements are non-zero.' % (np.sum(diagonal != 0), diagonal.size)) #print diagonal dist = lmnn.get_distance() else: dist = EuclideanDistance() cm = load_confusion_matrix() print cm # classifier knn = KNN(k, dist, prod_labels) parallel = knn.get_global_parallel() parallel.set_num_threads(4) knn.set_global_parallel(parallel) knn.train(prod_features) print "Classifying test set..." pred = knn.apply_multiclass(test_features)