def metric_lmnn_modular(train_fname=traindat, test_fname=testdat, label_train_fname=label_traindat, k=3): try: from modshogun import RealFeatures, MulticlassLabels, LMNN, KNN, CSVFile except ImportError: return # wrap features and labels into Shogun objects feats_train = RealFeatures(CSVFile(train_fname)) feats_test = RealFeatures(CSVFile(test_fname)) labels = MulticlassLabels(CSVFile(label_train_fname)) # LMNN lmnn = LMNN(feats_train, labels, k) lmnn.train() lmnn_distance = lmnn.get_distance() # perform classification with KNN knn = KNN(k, lmnn_distance, labels) knn.train() output = knn.apply(feats_test).get_labels() return lmnn, output
def test_LMNN(): X = np.eye(80) Y = np.array([i for j in range(4) for i in range(20)]) feats = RealFeatures(X.T) labs = MulticlassLabels(Y.astype(np.float64)) arr = LMNN(feats, labs, 2) arr.train() L = arr.get_linear_transform() X_proj = np.dot(L, X.T) test_x = np.eye(80)[0:20:] test = RealFeatures(test_x.T) test_proj = np.dot(L, test_x.T) pdb.set_trace()
def run_knn(Xtrain,Ytrain,Xtest,Ytest): prod_features = RealFeatures(Xtrain) prod_labels = MulticlassLabels(Ytrain) test_features = RealFeatures(Xtest) test_labels = MulticlassLabels(Ytest) if os.path.exists(".lmnn_model30000_5_reg05_cor20"): print "Using LMNN distance" lmnn = LMNN() sf = SerializableAsciiFile(".lmnn_model30000_5_reg05_cor20", 'r') lmnn.load_serializable(sf) diagonal = np.diag(lmnn.get_linear_transform()) #print('%d out of %d elements are non-zero.' % (np.sum(diagonal != 0), diagonal.size)) #diagonal = lmnn.get_linear_transform() np.set_printoptions(precision=1,threshold=1e10,linewidth=500) #lmnn.set_diagonal(True) dist = lmnn.get_distance() else: dist = EuclideanDistance() # classifier knn = KNN(K, dist, prod_labels) #knn.set_use_covertree(True) parallel = knn.get_global_parallel() parallel.set_num_threads(4) knn.set_global_parallel(parallel) knn.train(prod_features) print "Classifying test set..." pred = knn.apply_multiclass(test_features) print "Accuracy = %2.2f%%" % (100*np.mean(pred == Ytest)) cm = build_confusion_matrix(Ytest, pred, NCLASSES) #save_confusion_matrix(cm) #cm = load_confusion_matrix() print "Confusion matrix: " print cm #plot_confusion_matrix(cm) #results = predict_class_prob(pred, cm) #nn = build_neighbours_matrix(knn, prod_labels) #results = predict_class_from_neighbours(nn) #print "Log loss: " + str(calculate_log_loss(results, Ytest)) #print_prediction_output(results) return cm
def diagonal_lmnn(features, labels, k=3, max_iter=10000): from modshogun import LMNN, MSG_DEBUG import numpy lmnn = LMNN(features, labels, k) # lmnn.io.set_loglevel(MSG_DEBUG) lmnn.set_diagonal(True) lmnn.set_maxiter(max_iter) lmnn.train(numpy.eye(features.get_num_features())) return lmnn
def lmnn(train_features, train_labels, test_features, test_labels, k=1): from modshogun import LMNN, KNN, MSG_DEBUG, MulticlassAccuracy import numpy # dummy = LMNN() # dummy.io.set_loglevel(MSG_DEBUG) lmnn = LMNN(train_features, train_labels, k) lmnn.train() distance = lmnn.get_distance() knn = KNN(k, distance, train_labels) knn.train() train_output = knn.apply() test_output = knn.apply(test_features) evaluator = MulticlassAccuracy() print 'LMNN training error is %.4f' % ((1-evaluator.evaluate(train_output, train_labels))*100) print 'LMNN test error is %.4f' % ((1-evaluator.evaluate(test_output, test_labels))*100)
def main(): # Get training file name from the command line traindatafile = sys.argv[1] # The training file is in libSVM format with open(traindatafile, mode="r") as myFile: lines = myFile.readlines() random.shuffle(lines) open("tempdata.dat", 'w').writelines(lines) tr_data = load_svmlight_file("tempdata.dat") #To randomly select 5000 points Xtr = tr_data[0].toarray() # Converts sparse matrices to dense Ytr = tr_data[1] # The trainig labels Xtr = Xtr[:5000] Ytr = Ytr[:5000] # Cast data to Shogun format to work with LMNN features = RealFeatures(Xtr.T) labels = MulticlassLabels(Ytr.astype(np.float64)) #print(Xtr.shape) ### Do magic stuff here to learn the best metric you can ### kmax = 25 #inductive bias values = list(range(1, kmax + 1)) k = predict(Xtr, Ytr, values) # Number of target neighbours per example - tune this using validation #print(k) # Initialize the LMNN package print("K : "), print(k) k = 5 lmnn = LMNN(features, labels, k) init_transform = np.eye(Xtr.shape[1]) # Choose an appropriate timeout lmnn.set_maxiter(25000) lmnn.train(init_transform) # Let LMNN do its magic and return a linear transformation # corresponding to the Mahalanobis metric it has learnt L = lmnn.get_linear_transform() M = np.matrix(np.dot(L.T, L)) print("LMNN done") #print(M) # Save the model for use in testing phase # Warning: do not change this file name np.save("model.npy", M)
def lmnn(train_features, train_labels, test_features, test_labels, k=1): from modshogun import LMNN, KNN, MSG_DEBUG, MulticlassAccuracy import numpy # dummy = LMNN() # dummy.io.set_loglevel(MSG_DEBUG) lmnn = LMNN(train_features, train_labels, k) lmnn.train() distance = lmnn.get_distance() knn = KNN(k, distance, train_labels) knn.train() train_output = knn.apply() test_output = knn.apply(test_features) evaluator = MulticlassAccuracy() print 'LMNN training error is %.4f' % ( (1 - evaluator.evaluate(train_output, train_labels)) * 100) print 'LMNN test error is %.4f' % ( (1 - evaluator.evaluate(test_output, test_labels)) * 100)
def diagonal_lmnn(features,labels,k=3,max_iter=10000): from modshogun import LMNN, MSG_DEBUG import numpy lmnn = LMNN(features,labels,k) # lmnn.io.set_loglevel(MSG_DEBUG) lmnn.set_diagonal(True) lmnn.set_maxiter(max_iter) lmnn.train(numpy.eye(features.get_num_features())) return lmnn
def metric_lmnn_modular(train_fname=traindat,test_fname=testdat,label_train_fname=label_traindat,k=3): try: from modshogun import RealFeatures,MulticlassLabels,LMNN,KNN,CSVFile except ImportError: return # wrap features and labels into Shogun objects feats_train=RealFeatures(CSVFile(train_fname)) feats_test=RealFeatures(CSVFile(test_fname)) labels=MulticlassLabels(CSVFile(label_train_fname)) # LMNN lmnn=LMNN(feats_train,labels,k) lmnn.train() lmnn_distance=lmnn.get_distance() # perform classification with KNN knn=KNN(k,lmnn_distance,labels) knn.train() output=knn.apply(feats_test).get_labels() return lmnn,output
def main(): # Get training file name from the command line traindatafile = sys.argv[1] # The training file is in libSVM format tr_data = load_svmlight_file(traindatafile) print("loaded data") init_transform = np.eye(tr_data[0].toarray().shape[1]) print(init_transform) Xtr = tr_data[0][:6000].toarray() # Converts sparse matrices to dense Ytr = tr_data[1][:6000] # The trainig labels # Cast data to Shogun format to work with LMNN features = RealFeatures(Xtr.T) labels = MulticlassLabels(Ytr.astype(np.float64)) ### Do magic stuff here to learn the best metric you can ### # Number of target neighbours per example - tune this using validation k = 21 # Initialize the LMNN package print("starting lmnn train....") lmnn = LMNN(features, labels, k) # Choose an appropriate timeout lmnn.set_maxiter(3000) lmnn.train(init_transform) # Let LMNN do its magic and return a linear transformation # corresponding to the Mahalanobis metric it has learnt L = lmnn.get_linear_transform() M = np.matrix(np.dot(L.T, L)) print(M) # Save the model for use in testing phase # Warning: do not change this file name statistics = lmnn.get_statistics() pyplot.plot(statistics.obj.get()) pyplot.grid(True) pyplot.xlabel('Number of iterations') pyplot.ylabel('LMNN objective') pyplot.show() np.save("model.npy", M)
def main(): # Get training file name from the command line traindatafile = sys.argv[1] # The training file is in libSVM format tr_data = load_svmlight_file(traindatafile); Xtr = tr_data[0].toarray(); # Converts sparse matrices to dense Ytr = tr_data[1]; # The trainig labels Indices_array = np.arange(Ytr.shape[0]); np.random.shuffle(Indices_array); Xtr = Xtr[Indices_array]; Xtr = Xtr[:6000]; Ytr = Ytr[Indices_array]; Ytr = Ytr[:6000]; # Cast data to Shogun format to work with LMNN features = RealFeatures(Xtr.T) labels = MulticlassLabels(Ytr.astype(np.float64)) ### Do magic stuff here to learn the best metric you can ### # Number of target neighbours per example - tune this using validation k = 10 # Initialize the LMNN package lmnn = LMNN(features, labels, k) init_transform = np.eye(Xtr.shape[1]) # Choose an appropriate timeout lmnn.set_maxiter(200000) lmnn.train(init_transform) # Let LMNN do its magic and return a linear transformation # corresponding to the Mahalanobis metric it has learnt L = lmnn.get_linear_transform() M = np.matrix(np.dot(L.T, L)) # Save the model for use in testing phase # Warning: do not change this file name np.save("model.npy", M)
def lmnn_diagonal(train_features, train_labels, test_features, test_labels, k=1): from modshogun import LMNN, KNN, MSG_DEBUG, MulticlassAccuracy import numpy lmnn = LMNN(train_features, train_labels, k) lmnn.set_diagonal(True) lmnn.train() distance = lmnn.get_distance() knn = KNN(k, distance, train_labels) knn.train() train_output = knn.apply() test_output = knn.apply(test_features) evaluator = MulticlassAccuracy() print 'LMNN-diagonal training error is %.4f' % ((1-evaluator.evaluate(train_output, train_labels))*100) print 'LMNN-diagonal test error is %.4f' % ((1-evaluator.evaluate(test_output, test_labels))*100)
def RunLMNNShogun(): totalTimer = Timer() # Load input dataset. Log.Info("Loading dataset", self.verbose) # Use the last row of the training set as the responses. X, y = SplitTrainData(self.dataset) try: feat = RealFeatures(X.T) labels = MulticlassLabels(y.astype(np.float64)) with totalTimer: # Get the options for running LMNN. if "k" in options: self.k = int(options.pop("k")) if "maxiter" in options: n = int(options.pop("maxiter")) else: n = 2000 if len(options) > 0: Log.Fatal("Unknown parameters: " + str(options)) raise Exception("unknown parameters") # Perform LMNN. prep = ShogunLMNN(feat, labels, self.k) prep.set_maxiter(n) prep.train() except Exception as e: return [-1, -1] time = totalTimer.ElapsedTime() # Get distance. distance = prep.get_linear_transform() dataList = [X, y] accuracy1NN = Metrics.KNNAccuracy(distance, dataList, 1, False) accuracy3NN = Metrics.KNNAccuracy(distance, dataList, 3, False) accuracy3NNDW = Metrics.KNNAccuracy(distance, dataList, 3, True) accuracy5NN = Metrics.KNNAccuracy(distance, dataList, 5, False) accuracy5NNDW = Metrics.KNNAccuracy(distance, dataList, 5, True) return [time, accuracy1NN, accuracy3NN, accuracy3NNDW, accuracy5NN, accuracy5NNDW]
def lmnn_classify(traindat, testdat, k=3): from modshogun import LMNN, KNN, MulticlassAccuracy, MSG_DEBUG train_features, train_labels = traindat.features, traindat.labels lmnn = LMNN(train_features, train_labels, k) lmnn.set_maxiter(1200) lmnn.io.set_loglevel(MSG_DEBUG) lmnn.train() distance = lmnn.get_distance() knn = KNN(k, distance, train_labels) knn.train() test_features, test_labels = testdat.features, testdat.labels predicted_labels = knn.apply(test_features) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(predicted_labels, test_labels) err = 1 - acc return err
def metric_lmnn_statistics( k=3, fname_features='../../data/fm_train_multiclass_digits.dat.gz', fname_labels='../../data/label_train_multiclass_digits.dat'): try: from modshogun import LMNN, CSVFile, RealFeatures, MulticlassLabels, MSG_DEBUG import matplotlib.pyplot as pyplot except ImportError: print 'Error importing modshogun or other required modules. Please, verify their installation.' return features = RealFeatures(load_compressed_features(fname_features).T) labels = MulticlassLabels(CSVFile(fname_labels)) # print 'number of examples = %d' % features.get_num_vectors() # print 'number of features = %d' % features.get_num_features() assert (features.get_num_vectors() == labels.get_num_labels()) # train LMNN lmnn = LMNN(features, labels, k) lmnn.set_correction(100) # lmnn.io.set_loglevel(MSG_DEBUG) print 'Training LMNN, this will take about two minutes...' lmnn.train() print 'Training done!' # plot objective obtained during training statistics = lmnn.get_statistics() pyplot.plot(statistics.obj.get()) pyplot.grid(True) pyplot.xlabel('Iterations') pyplot.ylabel('LMNN objective') pyplot.title( 'LMNN objective during training for the multiclass digits data set') pyplot.show()
def lmnn_diagonal(train_features, train_labels, test_features, test_labels, k=1): from modshogun import LMNN, KNN, MSG_DEBUG, MulticlassAccuracy import numpy lmnn = LMNN(train_features, train_labels, k) lmnn.set_diagonal(True) lmnn.train() distance = lmnn.get_distance() knn = KNN(k, distance, train_labels) knn.train() train_output = knn.apply() test_output = knn.apply(test_features) evaluator = MulticlassAccuracy() print 'LMNN-diagonal training error is %.4f' % ( (1 - evaluator.evaluate(train_output, train_labels)) * 100) print 'LMNN-diagonal test error is %.4f' % ( (1 - evaluator.evaluate(test_output, test_labels)) * 100)
def main(): Xtr, Ytr = gettrainData() Xtr = Xtr[:len(Xtr) // 6] Ytr = Ytr[:len(Ytr) // 6] # Cast data to Shogun format to work with LMNN features = RealFeatures(Xtr.T) labels = MulticlassLabels(Ytr.astype(np.float64)) print(2.1) ### Do magic stuff here to learn the best metric you can ### # Number of target neighbours per example - tune this using validation k = 10 # Initialize the LMNN package lmnn = LMNN(features, labels, k) print(2.2) init_transform = np.eye(Xtr.shape[1]) print(2.3) # Choose an appropriate timeout lmnn.set_maxiter(8000) print(2.4) lmnn.train(init_transform) print(2.5) # Let LMNN do its magic and return a linear transformation # corresponding to the Mahalanobis metric it has learnt L = lmnn.get_linear_transform() print(2.6) M = np.matrix(np.dot(L.T, L)) print(2.7) # Save the model for use in testing phase # Warning: do not change this file name np.save("model2.npy", M)
def lmnn_classify(traindat, testdat, k=3): from modshogun import LMNN, KNN, MulticlassAccuracy, MSG_DEBUG train_features, train_labels = traindat.features, traindat.labels lmnn = LMNN(train_features, train_labels, k) lmnn.set_maxiter(1200) lmnn.io.set_loglevel(MSG_DEBUG) lmnn.train() distance = lmnn.get_distance() knn = KNN(k, distance, train_labels) knn.train() test_features, test_labels = testdat.features, testdat.labels predicted_labels = knn.apply(test_features) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(predicted_labels, test_labels) err = 1-acc return err
def metric_lmnn_statistics( k=3, fname_features="../../data/fm_train_multiclass_digits.dat.gz", fname_labels="../../data/label_train_multiclass_digits.dat", ): try: from modshogun import LMNN, CSVFile, RealFeatures, MulticlassLabels, MSG_DEBUG import matplotlib.pyplot as pyplot except ImportError: print "Error importing modshogun or other required modules. Please, verify their installation." return features = RealFeatures(load_compressed_features(fname_features).T) labels = MulticlassLabels(CSVFile(fname_labels)) # print 'number of examples = %d' % features.get_num_vectors() # print 'number of features = %d' % features.get_num_features() assert features.get_num_vectors() == labels.get_num_labels() # train LMNN lmnn = LMNN(features, labels, k) lmnn.set_correction(100) # lmnn.io.set_loglevel(MSG_DEBUG) print "Training LMNN, this will take about two minutes..." lmnn.train() print "Training done!" # plot objective obtained during training statistics = lmnn.get_statistics() pyplot.plot(statistics.obj.get()) pyplot.grid(True) pyplot.xlabel("Iterations") pyplot.ylabel("LMNN objective") pyplot.title("LMNN objective during training for the multiclass digits data set") pyplot.show()
print('%d vectors with %d features' % (features.get_num_vectors(), features.get_num_features())) assert (features.get_num_vectors() == labels.get_num_labels()) distance = EuclideanDistance(features, features) k = 2 knn = KNN(k, distance, labels) plot_data(x, y, axarr[0]) plot_neighborhood_graph(x, knn.nearest_neighbors(), axarr[0]) axarr[0].set_aspect('equal') axarr[0].set_xlim(-6, 4) axarr[0].set_ylim(-3, 2) lmnn = LMNN(features, labels, k) lmnn.set_maxiter(10000) lmnn.train() L = lmnn.get_linear_transform() knn.set_distance(lmnn.get_distance()) plot_data(x, y, axarr[1]) plot_neighborhood_graph(x, knn.nearest_neighbors(), axarr[1]) axarr[1].set_aspect('equal') axarr[1].set_xlim(-6, 4) axarr[1].set_ylim(-3, 2) xL = numpy.dot(x, L.T) ## to see the data after the linear transformation features = RealFeatures(xL.T) distance = EuclideanDistance(features, features) knn.set_distance(distance)
print "Training data size: " + str(Xtrain.shape) print "Test data size: " + str(Xtest.shape) N = Xtest.shape[0] prod_features = RealFeatures(Xtrain.T) prod_labels = MulticlassLabels(Ytrain.T) test_features = RealFeatures(Xtest.T) k = 5 # load LMNN if os.path.exists(".lmnn_model30000_5_reg05_cor20"): sf = SerializableAsciiFile(".lmnn_model30000_5_reg05_cor20", 'r') lmnn = LMNN() lmnn.load_serializable(sf) diagonal = np.diag(lmnn.get_linear_transform()) print('%d out of %d elements are non-zero.' % (np.sum(diagonal != 0), diagonal.size)) #print diagonal dist = lmnn.get_distance() else: dist = EuclideanDistance() cm = load_confusion_matrix() print cm # classifier knn = KNN(k, dist, prod_labels) parallel = knn.get_global_parallel()
#!/usr/bin/python from scipy import io data_dict = io.loadmat('../data/NBData20_train_preprocessed.mat') xt = data_dict['xt'] yt = data_dict['yt'] import numpy from modshogun import RealFeatures, MulticlassLabels, LMNN, MSG_DEBUG features = RealFeatures(xt.T) labels = MulticlassLabels(numpy.squeeze(yt)) k = 6 lmnn = LMNN(features, labels, k) lmnn.io.set_loglevel(MSG_DEBUG) lmnn.set_diagonal(True) lmnn.set_maxiter(10000) lmnn.train(numpy.eye(features.get_num_features()))
#!/usr/bin/python from scipy import io data_dict = io.loadmat('../data/NBData20_train_preprocessed.mat') xt = data_dict['xt'] yt = data_dict['yt'] import numpy from modshogun import RealFeatures,MulticlassLabels,LMNN,MSG_DEBUG features = RealFeatures(xt.T) labels = MulticlassLabels(numpy.squeeze(yt)) k = 6 lmnn = LMNN(features,labels,k) lmnn.io.set_loglevel(MSG_DEBUG) lmnn.set_diagonal(True) lmnn.set_maxiter(10000) lmnn.train(numpy.eye(features.get_num_features()))
labels = MulticlassLabels(y) print('%d vectors with %d features' % (features.get_num_vectors(), features.get_num_features())) assert(features.get_num_vectors() == labels.get_num_labels()) distance = EuclideanDistance(features, features) k = 2 knn = KNN(k, distance, labels) plot_data(x, y, axarr[0]) plot_neighborhood_graph(x, knn.nearest_neighbors(), axarr[0]) axarr[0].set_aspect('equal') axarr[0].set_xlim(-6, 4) axarr[0].set_ylim(-3, 2) lmnn = LMNN(features, labels, k) lmnn.set_maxiter(10000) lmnn.train() L = lmnn.get_linear_transform() knn.set_distance(lmnn.get_distance()) plot_data(x, y, axarr[1]) plot_neighborhood_graph(x, knn.nearest_neighbors(), axarr[1]) axarr[1].set_aspect('equal') axarr[1].set_xlim(-6, 4) axarr[1].set_ylim(-3, 2) xL = numpy.dot(x, L.T) ## to see the data after the linear transformation features = RealFeatures(xL.T) distance = EuclideanDistance(features, features) knn.set_distance(distance)
random.seed(13) subset = random.permutation(len(Y)) Xtrain = X[subset[:30000],:] Ytrain = Y[subset[:30000]] print "Training data used: " + str(Xtrain.shape) prod_features = RealFeatures(Xtrain.T) prod_labels = MulticlassLabels(Ytrain.T) k = 5 # train LMNN sf = SerializableAsciiFile(".lmnn_model30000_5_reg05_cor20", 'w') print "Training LMNN..." #init_t = np.eye(features.shape[1]) lmnn = LMNN(prod_features, prod_labels, k) lmnn.set_maxiter(800) #lmnn.set_diagonal(True) lmnn.set_stepsize_threshold(1e-10) lmnn.set_regularization(0.5) lmnn.set_correction(20) #lmnn.train(init_t) lmnn.train() lmnn.save_serializable(sf) plot_lmnn_statistics(lmnn)