예제 #1
0
def load_data(num_train_samples=7291, m_data_dict=data_dict):
    from modshogun import RealFeatures, MulticlassLabels
    import numpy

    train_vec = m_data_dict['yTr'][0][:num_train_samples].astype(numpy.float64)
    train_labels = MulticlassLabels(train_vec)
    test_vec = m_data_dict['yTe'][0].astype(numpy.float64)
    test_labels = MulticlassLabels(test_vec)
    print "#train_labels = " + str(train_labels.get_num_labels())
    print "#test_labels  = " + str(test_labels.get_num_labels())

    train_mat = m_data_dict['xTr'][:, :num_train_samples].astype(numpy.float64)
    train_features = RealFeatures(train_mat)
    test_mat = m_data_dict['xTe'].astype(numpy.float64)
    test_features = RealFeatures(test_mat)
    print "#train_vectors = " + str(train_features.get_num_vectors())
    print "#test_vectors  = " + str(test_features.get_num_vectors())
    print "data dimension = " + str(test_features.get_num_features())

    return train_features, train_labels, test_features, test_labels
예제 #2
0
def load_data(num_train_samples=7291, m_data_dict=data_dict):
	from modshogun import RealFeatures, MulticlassLabels
	import numpy

	train_vec = m_data_dict['yTr'][0][:num_train_samples].astype(numpy.float64)
	train_labels = MulticlassLabels(train_vec)
	test_vec = m_data_dict['yTe'][0].astype(numpy.float64)
 	test_labels = MulticlassLabels(test_vec)
	print "#train_labels = " + str(train_labels.get_num_labels())
	print "#test_labels  = " + str(test_labels.get_num_labels())

	train_mat = m_data_dict['xTr'][:,:num_train_samples].astype(numpy.float64)
	train_features = RealFeatures(train_mat)
	test_mat = m_data_dict['xTe'].astype(numpy.float64)
	test_features = RealFeatures(test_mat)
	print "#train_vectors = " + str(train_features.get_num_vectors())
	print "#test_vectors  = " + str(test_features.get_num_vectors())
	print "data dimension = " + str(test_features.get_num_features())

	return train_features, train_labels, test_features, test_labels
예제 #3
0
def metric_lmnn_statistics(
    k=3,
    fname_features="../../data/fm_train_multiclass_digits.dat.gz",
    fname_labels="../../data/label_train_multiclass_digits.dat",
):
    try:
        from modshogun import LMNN, CSVFile, RealFeatures, MulticlassLabels, MSG_DEBUG
        import matplotlib.pyplot as pyplot
    except ImportError:
        print "Error importing modshogun or other required modules. Please, verify their installation."
        return

    features = RealFeatures(load_compressed_features(fname_features).T)
    labels = MulticlassLabels(CSVFile(fname_labels))

    # 	print 'number of examples = %d' % features.get_num_vectors()
    # 	print 'number of features = %d' % features.get_num_features()

    assert features.get_num_vectors() == labels.get_num_labels()

    # train LMNN
    lmnn = LMNN(features, labels, k)
    lmnn.set_correction(100)
    # 	lmnn.io.set_loglevel(MSG_DEBUG)
    print "Training LMNN, this will take about two minutes..."
    lmnn.train()
    print "Training done!"

    # plot objective obtained during training
    statistics = lmnn.get_statistics()

    pyplot.plot(statistics.obj.get())
    pyplot.grid(True)
    pyplot.xlabel("Iterations")
    pyplot.ylabel("LMNN objective")
    pyplot.title("LMNN objective during training for the multiclass digits data set")

    pyplot.show()
예제 #4
0
def metric_lmnn_statistics(
        k=3,
        fname_features='../../data/fm_train_multiclass_digits.dat.gz',
        fname_labels='../../data/label_train_multiclass_digits.dat'):
    try:
        from modshogun import LMNN, CSVFile, RealFeatures, MulticlassLabels, MSG_DEBUG
        import matplotlib.pyplot as pyplot
    except ImportError:
        print 'Error importing modshogun or other required modules. Please, verify their installation.'
        return

    features = RealFeatures(load_compressed_features(fname_features).T)
    labels = MulticlassLabels(CSVFile(fname_labels))

    #	print 'number of examples = %d' % features.get_num_vectors()
    #	print 'number of features = %d' % features.get_num_features()

    assert (features.get_num_vectors() == labels.get_num_labels())

    # train LMNN
    lmnn = LMNN(features, labels, k)
    lmnn.set_correction(100)
    #	lmnn.io.set_loglevel(MSG_DEBUG)
    print 'Training LMNN, this will take about two minutes...'
    lmnn.train()
    print 'Training done!'

    # plot objective obtained during training
    statistics = lmnn.get_statistics()

    pyplot.plot(statistics.obj.get())
    pyplot.grid(True)
    pyplot.xlabel('Iterations')
    pyplot.ylabel('LMNN objective')
    pyplot.title(
        'LMNN objective during training for the multiclass digits data set')

    pyplot.show()
예제 #5
0
		axis.scatter(xi[:,0], xi[:,1], s=50, facecolors='none', edgecolors=COLS[idx])

def plot_neighborhood_graph(x, nn, axis):
	for i in xrange(x.shape[0]):
		xs = [x[i,0], x[nn[1,i], 0]]
		ys = [x[i,1], x[nn[1,i], 1]]
		axis.plot(xs, ys, COLS[int(y[i])])

figure, axarr = pyplot.subplots(3, 1)
x, y = sandwich_data()

features = RealFeatures(x.T)
labels = MulticlassLabels(y)

print('%d vectors with %d features' % (features.get_num_vectors(), features.get_num_features()))
assert(features.get_num_vectors() == labels.get_num_labels())

distance = EuclideanDistance(features, features)
k = 2
knn = KNN(k, distance, labels)

plot_data(x, y, axarr[0])
plot_neighborhood_graph(x, knn.nearest_neighbors(), axarr[0])
axarr[0].set_aspect('equal')
axarr[0].set_xlim(-6, 4)
axarr[0].set_ylim(-3, 2)

lmnn = LMNN(features, labels, k)
lmnn.set_maxiter(10000)
lmnn.train()
L = lmnn.get_linear_transform()
예제 #6
0
def plot_neighborhood_graph(x, nn, axis):
    for i in xrange(x.shape[0]):
        xs = [x[i, 0], x[nn[1, i], 0]]
        ys = [x[i, 1], x[nn[1, i], 1]]
        axis.plot(xs, ys, COLS[int(y[i])])


figure, axarr = pyplot.subplots(3, 1)
x, y = sandwich_data()

features = RealFeatures(x.T)
labels = MulticlassLabels(y)

print('%d vectors with %d features' %
      (features.get_num_vectors(), features.get_num_features()))
assert (features.get_num_vectors() == labels.get_num_labels())

distance = EuclideanDistance(features, features)
k = 2
knn = KNN(k, distance, labels)

plot_data(x, y, axarr[0])
plot_neighborhood_graph(x, knn.nearest_neighbors(), axarr[0])
axarr[0].set_aspect('equal')
axarr[0].set_xlim(-6, 4)
axarr[0].set_ylim(-3, 2)

lmnn = LMNN(features, labels, k)
lmnn.set_maxiter(10000)
lmnn.train()
L = lmnn.get_linear_transform()