def load_data(num_train_samples=7291, m_data_dict=data_dict): from modshogun import RealFeatures, MulticlassLabels import numpy train_vec = m_data_dict['yTr'][0][:num_train_samples].astype(numpy.float64) train_labels = MulticlassLabels(train_vec) test_vec = m_data_dict['yTe'][0].astype(numpy.float64) test_labels = MulticlassLabels(test_vec) print "#train_labels = " + str(train_labels.get_num_labels()) print "#test_labels = " + str(test_labels.get_num_labels()) train_mat = m_data_dict['xTr'][:, :num_train_samples].astype(numpy.float64) train_features = RealFeatures(train_mat) test_mat = m_data_dict['xTe'].astype(numpy.float64) test_features = RealFeatures(test_mat) print "#train_vectors = " + str(train_features.get_num_vectors()) print "#test_vectors = " + str(test_features.get_num_vectors()) print "data dimension = " + str(test_features.get_num_features()) return train_features, train_labels, test_features, test_labels
def load_data(num_train_samples=7291, m_data_dict=data_dict): from modshogun import RealFeatures, MulticlassLabels import numpy train_vec = m_data_dict['yTr'][0][:num_train_samples].astype(numpy.float64) train_labels = MulticlassLabels(train_vec) test_vec = m_data_dict['yTe'][0].astype(numpy.float64) test_labels = MulticlassLabels(test_vec) print "#train_labels = " + str(train_labels.get_num_labels()) print "#test_labels = " + str(test_labels.get_num_labels()) train_mat = m_data_dict['xTr'][:,:num_train_samples].astype(numpy.float64) train_features = RealFeatures(train_mat) test_mat = m_data_dict['xTe'].astype(numpy.float64) test_features = RealFeatures(test_mat) print "#train_vectors = " + str(train_features.get_num_vectors()) print "#test_vectors = " + str(test_features.get_num_vectors()) print "data dimension = " + str(test_features.get_num_features()) return train_features, train_labels, test_features, test_labels
def metric_lmnn_statistics( k=3, fname_features="../../data/fm_train_multiclass_digits.dat.gz", fname_labels="../../data/label_train_multiclass_digits.dat", ): try: from modshogun import LMNN, CSVFile, RealFeatures, MulticlassLabels, MSG_DEBUG import matplotlib.pyplot as pyplot except ImportError: print "Error importing modshogun or other required modules. Please, verify their installation." return features = RealFeatures(load_compressed_features(fname_features).T) labels = MulticlassLabels(CSVFile(fname_labels)) # print 'number of examples = %d' % features.get_num_vectors() # print 'number of features = %d' % features.get_num_features() assert features.get_num_vectors() == labels.get_num_labels() # train LMNN lmnn = LMNN(features, labels, k) lmnn.set_correction(100) # lmnn.io.set_loglevel(MSG_DEBUG) print "Training LMNN, this will take about two minutes..." lmnn.train() print "Training done!" # plot objective obtained during training statistics = lmnn.get_statistics() pyplot.plot(statistics.obj.get()) pyplot.grid(True) pyplot.xlabel("Iterations") pyplot.ylabel("LMNN objective") pyplot.title("LMNN objective during training for the multiclass digits data set") pyplot.show()
def metric_lmnn_statistics( k=3, fname_features='../../data/fm_train_multiclass_digits.dat.gz', fname_labels='../../data/label_train_multiclass_digits.dat'): try: from modshogun import LMNN, CSVFile, RealFeatures, MulticlassLabels, MSG_DEBUG import matplotlib.pyplot as pyplot except ImportError: print 'Error importing modshogun or other required modules. Please, verify their installation.' return features = RealFeatures(load_compressed_features(fname_features).T) labels = MulticlassLabels(CSVFile(fname_labels)) # print 'number of examples = %d' % features.get_num_vectors() # print 'number of features = %d' % features.get_num_features() assert (features.get_num_vectors() == labels.get_num_labels()) # train LMNN lmnn = LMNN(features, labels, k) lmnn.set_correction(100) # lmnn.io.set_loglevel(MSG_DEBUG) print 'Training LMNN, this will take about two minutes...' lmnn.train() print 'Training done!' # plot objective obtained during training statistics = lmnn.get_statistics() pyplot.plot(statistics.obj.get()) pyplot.grid(True) pyplot.xlabel('Iterations') pyplot.ylabel('LMNN objective') pyplot.title( 'LMNN objective during training for the multiclass digits data set') pyplot.show()
axis.scatter(xi[:,0], xi[:,1], s=50, facecolors='none', edgecolors=COLS[idx]) def plot_neighborhood_graph(x, nn, axis): for i in xrange(x.shape[0]): xs = [x[i,0], x[nn[1,i], 0]] ys = [x[i,1], x[nn[1,i], 1]] axis.plot(xs, ys, COLS[int(y[i])]) figure, axarr = pyplot.subplots(3, 1) x, y = sandwich_data() features = RealFeatures(x.T) labels = MulticlassLabels(y) print('%d vectors with %d features' % (features.get_num_vectors(), features.get_num_features())) assert(features.get_num_vectors() == labels.get_num_labels()) distance = EuclideanDistance(features, features) k = 2 knn = KNN(k, distance, labels) plot_data(x, y, axarr[0]) plot_neighborhood_graph(x, knn.nearest_neighbors(), axarr[0]) axarr[0].set_aspect('equal') axarr[0].set_xlim(-6, 4) axarr[0].set_ylim(-3, 2) lmnn = LMNN(features, labels, k) lmnn.set_maxiter(10000) lmnn.train() L = lmnn.get_linear_transform()
def plot_neighborhood_graph(x, nn, axis): for i in xrange(x.shape[0]): xs = [x[i, 0], x[nn[1, i], 0]] ys = [x[i, 1], x[nn[1, i], 1]] axis.plot(xs, ys, COLS[int(y[i])]) figure, axarr = pyplot.subplots(3, 1) x, y = sandwich_data() features = RealFeatures(x.T) labels = MulticlassLabels(y) print('%d vectors with %d features' % (features.get_num_vectors(), features.get_num_features())) assert (features.get_num_vectors() == labels.get_num_labels()) distance = EuclideanDistance(features, features) k = 2 knn = KNN(k, distance, labels) plot_data(x, y, axarr[0]) plot_neighborhood_graph(x, knn.nearest_neighbors(), axarr[0]) axarr[0].set_aspect('equal') axarr[0].set_xlim(-6, 4) axarr[0].set_ylim(-3, 2) lmnn = LMNN(features, labels, k) lmnn.set_maxiter(10000) lmnn.train() L = lmnn.get_linear_transform()