def prepare_data(use_toy=True):
    from os.path import exists
    from tools.load import LoadMatrix
    lm=LoadMatrix()

    if not use_toy and exists('../data/../mldata/uci-20070111-optdigits.mat'):
        from scipy.io import loadmat

        mat = loadmat('../data/../mldata/uci-20070111-optdigits.mat')['int0'].astype(float)
        X = mat[:-1,:]
        Y = mat[-1,:]
        isplit = X.shape[1]/2
        traindat = X[:,:isplit]
        label_traindat = Y[:isplit]
        testdat = X[:, isplit:]
        label_testdat = Y[isplit:]
    else:
        traindat = lm.load_numbers('../data/fm_train_real.dat')
        testdat  = lm.load_numbers('../data/fm_test_real.dat')
        label_traindat = lm.load_labels('../data/label_train_multiclass.dat')
        label_testdat = None

    return [traindat, label_traindat, testdat, label_testdat]
# This is an example for the initialization of a linear kernel on word (2byte)
# data.

from tools.load import LoadMatrix
from numpy import ushort

lm = LoadMatrix()
traindat = ushort(lm.load_numbers("../data/fm_train_word.dat"))
testdat = ushort(lm.load_numbers("../data/fm_test_word.dat"))

parameter_list = [[traindat, testdat, 1.2], [traindat, testdat, 1.2]]


def kernel_linear_word_modular(fm_train_word=traindat, fm_test_word=testdat, scale=1.2):

    from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer
    from shogun.Features import WordFeatures

    feats_train = WordFeatures(fm_train_word)
    feats_test = WordFeatures(fm_test_word)

    kernel = LinearKernel(feats_train, feats_train)
    kernel.set_normalizer(AvgDiagKernelNormalizer(scale))
    kernel.init(feats_train, feats_train)

    km_train = kernel.get_kernel_matrix()
    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()
    return kernel

def distance():
    print "Distance"

    width = 1.7
    size_cache = 10

    from sg import sg

    sg("set_features", "TRAIN", fm_train_real)
    sg("set_features", "TEST", fm_test_real)
    sg("set_distance", "EUCLIDIAN", "REAL")
    sg("set_kernel", "DISTANCE", size_cache, width)
    km = sg("get_kernel_matrix", "TRAIN")
    km = sg("get_kernel_matrix", "TEST")


if __name__ == "__main__":
    from tools.load import LoadMatrix

    lm = LoadMatrix()
    fm_train_real = lm.load_numbers("../data/fm_train_real.dat")
    fm_test_real = lm.load_numbers("../data/fm_test_real.dat")
    distance()
Exemple #4
0
	epsilon=1e-5
	labels=Labels(label_train_twoclass)

	svm=LibSVM(C, kernel, labels)
	svm.set_epsilon(epsilon)
	svm.train()

	#kernel.init(feats_train, feats_test)
	output = svm.classify(feats_test)#.get_labels()
        #output_vector = output.get_labels()
        out=svm.classify().get_labels()
        testerr=mean(sign(out)!=testlab)
        print testerr


	#sv_idx=svm.get_support_vectors()
	#alphas=svm.get_alphas()
        #pm = PerformanceMeasures(output_vector, output)
        #acc = pm.get_accuracy()
        #roc = pm.get_auROC()
        #fms = pm.get_fmeasure()


if __name__=='__main__':
	from tools.load import LoadMatrix
	lm=LoadMatrix()
	fm_train_real=lm.load_numbers('/home/mati/lib/shogun-0.9.3/examples/documented/data/fm_train_real.dat')
	fm_test_real=lm.load_numbers('/home/mati/lib/shogun-0.9.3/examples/documented/data/fm_test_real.dat')
	label_train_twoclass=lm.load_labels('/home/mati/lib/shogun-0.9.3/examples/documented/data/label_train_twoclass.dat')
	libsvm()
def bray_curtis_distance ():
	print 'BrayCurtisDistance'
	from sg import sg
	sg('set_distance', 'BRAYCURTIS', 'REAL')

	sg('set_features', 'TRAIN', fm_train_real)
	dm=sg('get_distance_matrix', 'TRAIN')

	sg('set_features', 'TEST', fm_test_real)
	dm=sg('get_distance_matrix', 'TEST')

if __name__=='__main__':
	from tools.load import LoadMatrix
	lm=LoadMatrix()
	fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
	fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
	bray_curtis_distance()
from tools.load import LoadMatrix

lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')

parameter_list = [[data, 0.01, 1.0], [data, 0.05, 2.0]]

def preprocessor_kernelpcacut_modular(data, threshold, width):
	from shogun.Features import RealFeatures
	from shogun.Preprocessor import KernelPCACut
	from shogun.Kernel import GaussianKernel
	
	features = RealFeatures(data)
	
	kernel = GaussianKernel(features,features,width)
		
	preprocessor = KernelPCACut(kernel,threshold)
	preprocessor.init(features)
	preprocessor.apply_to_feature_matrix(features)

	return features


if __name__=='__main__':
	print 'KernelPCACut'
	preprocessor_kernelpcacut_modular(*parameter_list[0])

#       (w(t),b(t)) are the current parameters of the linear classifier
#       (w(t+1),b(t+1)) are the new parameters of the linear classifier
#       alpha is the learning rate.
#
# The Perceptron algorithm iterates until all training examples are correctly
# classified or the prescribed maximal number of iterations is reached.
#
# The learning rate and the maximal number of iterations can be set by
#   sg('set_perceptron_parameters', alpha, max_iter);
#

from tools.load import LoadMatrix
from sg import sg
lm = LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
train_label = lm.load_labels('../data/label_train_twoclass.dat')
parameter_list = [[traindat, testdat, train_label],
                  [traindat, testdat, train_label]]


def classifier_perceptron(fm_train_real=traindat,
                          fm_test_real=testdat,
                          label_train_twoclass=train_label):

    sg('set_features', 'TRAIN', fm_train_real)
    sg('set_labels', 'TRAIN', label_train_twoclass)
    sg('new_classifier', 'PERCEPTRON')
    # often does not converge, mind your data!
    sg('train_classifier')
#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()
data=lm.load_numbers('../data/fm_train_real.dat')
label=lm.load_numbers('../data/label_train_twoclass.dat')

parameter_list=[[data,label]]

def features_io_modular (fm_train_real, label_train_twoclass):
	import numpy
	from modshogun import SparseRealFeatures, RealFeatures, MulticlassLabels
	from modshogun import GaussianKernel
	from modshogun import LibSVMFile, CSVFile, BinaryFile, HDF5File

	feats=SparseRealFeatures(fm_train_real)
	feats2=SparseRealFeatures()

	f=BinaryFile("tmp/fm_train_sparsereal.bin","w")
	feats.save(f)

	f=LibSVMFile("tmp/fm_train_sparsereal.ascii","w")
	feats.save(f)

	f=BinaryFile("tmp/fm_train_sparsereal.bin")
	feats2.load(f)

	f=LibSVMFile("tmp/fm_train_sparsereal.ascii")
	feats2.load(f)

	feats=RealFeatures(fm_train_real)
	feats2=RealFeatures()
#!/usr/bin/env python
from tools.load import LoadMatrix
lm = LoadMatrix()
fm_train_real = lm.load_numbers('../data/fm_train_real.dat')
fm_test_real = lm.load_numbers('../data/fm_test_real.dat')
label_train_multiclass = lm.load_labels('../data/label_train_multiclass.dat')

parameter_list = [[
    fm_train_real, fm_test_real, label_train_multiclass, 1.2, 1.2, 1e-5, 1,
    0.001, 1.5
],
                  [
                      fm_train_real, fm_test_real, label_train_multiclass, 5,
                      1.2, 1e-2, 1, 0.001, 2
                  ]]


def mkl_multiclass_modular(fm_train_real, fm_test_real, label_train_multiclass,
                           width, C, epsilon, num_threads, mkl_epsilon,
                           mkl_norm):

    from shogun.Features import CombinedFeatures, RealFeatures, MulticlassLabels
    from shogun.Kernel import CombinedKernel, GaussianKernel, LinearKernel, PolyKernel
    from shogun.Classifier import MKLMulticlass

    kernel = CombinedKernel()
    feats_train = CombinedFeatures()
    feats_test = CombinedFeatures()

    subkfeats_train = RealFeatures(fm_train_real)
    subkfeats_test = RealFeatures(fm_test_real)
Exemple #10
0
from tools.load import LoadMatrix
from numpy import ushort

lm=LoadMatrix()
traindat = ushort(lm.load_numbers('../data/fm_train_word.dat'))
testdat = ushort(lm.load_numbers('../data/fm_test_word.dat'))

parameter_list=[[traindat,testdat,1.2],[traindat,testdat,1.2]]

def kernel_linear_word_modular (fm_train_word=traindat,fm_test_word=testdat,scale=1.2):
	
	from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer
	from shogun.Features import WordFeatures

	feats_train=WordFeatures(fm_train_word)
	feats_test=WordFeatures(fm_test_word)

	kernel=LinearKernel(feats_train, feats_train)
	kernel.set_normalizer(AvgDiagKernelNormalizer(scale))
	kernel.init(feats_train, feats_train)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return kernel

if __name__=='__main__':
	print 'LinearWord'
	kernel_linear_word_modular(*parameter_list[0])
#!/usr/bin/env python
from tools.load import LoadMatrix
lm = LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')
label = lm.load_numbers('../data/label_train_twoclass.dat')

parameter_list = [[data, label]]


def features_io_modular(fm_train_real, label_train_twoclass):
    import numpy
    from modshogun import SparseRealFeatures, RealFeatures, MulticlassLabels
    from modshogun import GaussianKernel
    from modshogun import LibSVMFile, CSVFile, BinaryFile, HDF5File

    feats = SparseRealFeatures(fm_train_real)
    feats2 = SparseRealFeatures()

    f = BinaryFile("tmp/fm_train_sparsereal.bin", "w")
    feats.save(f)

    f = LibSVMFile("tmp/fm_train_sparsereal.ascii", "w")
    feats.save(f)

    f = BinaryFile("tmp/fm_train_sparsereal.bin")
    feats2.load(f)

    f = LibSVMFile("tmp/fm_train_sparsereal.ascii")
    feats2.load(f)

    feats = RealFeatures(fm_train_real)
#!/usr/bin/env python
from tools.load import LoadMatrix
from modshogun import *


lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')
labels = lm.load_numbers('../data/label_train_multiclass.dat')

parameter_list = [[data, labels, CANVAR_FLDA], [data, labels, CLASSIC_FLDA]]
def preprocessor_fisherlda_modular (data, labels, method):

	from modshogun import RealFeatures, MulticlassLabels, CANVAR_FLDA
	from modshogun import FisherLda
	from modshogun import MulticlassLabels

	sg_features = RealFeatures(data)
	sg_labels = MulticlassLabels(labels)
        
	preprocessor=FisherLda(method)
	preprocessor.init(sg_features, sg_labels, 1)
	yn=preprocessor.apply_to_feature_matrix(sg_features)

	return yn


if __name__=='__main__':
	print('FisherLda')
	preprocessor_fisherlda_modular(*parameter_list[0])

###########################################################################
# kernel can be used to maximize AUC instead of margin in SVMs
###########################################################################
from tools.load import LoadMatrix
from numpy import double

lm = LoadMatrix()

traindat = double(lm.load_numbers("../data/fm_train_real.dat"))
testdat = lm.load_labels("../data/label_train_twoclass.dat")
parameter_list = [[traindat, testdat, 1.7], [traindat, testdat, 1.6]]


def kernel_auc_modular(fm_train_real=traindat, label_train_real=testdat, width=1.7):

    from shogun.Kernel import GaussianKernel, AUCKernel
    from shogun.Features import RealFeatures, Labels

    feats_train = RealFeatures(fm_train_real)

    subkernel = GaussianKernel(feats_train, feats_train, width)

    kernel = AUCKernel(0, subkernel)
    kernel.setup_auc_maximization(Labels(label_train_real))
    km_train = kernel.get_kernel_matrix()
    return kernel


if __name__ == "__main__":
    print "AUC"
    kernel_auc_modular(*parameter_list[0])
###########################################################################
# linear kernel on byte features
###########################################################################
from tools.load import LoadMatrix
from numpy import ubyte
lm=LoadMatrix()

traindat = ubyte(lm.load_numbers('../data/fm_train_byte.dat'))
testdat = ubyte(lm.load_numbers('../data/fm_test_byte.dat'))

parameter_list=[[traindat,testdat],[traindat,testdat]]

def kernel_linear_byte_modular(fm_train_byte=traindat,fm_test_byte=testdat):
	from shogun.Kernel import LinearKernel
	from shogun.Features import ByteFeatures

	feats_train=ByteFeatures(fm_train_byte)
	feats_test=ByteFeatures(fm_test_byte)

	kernel=LinearKernel(feats_train, feats_train)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return kernel

if __name__=='__main__':
	print('LinearByte')
	kernel_linear_byte_modular(*parameter_list[0])
###########################################################################
# anova kernel
###########################################################################
from tools.load import LoadMatrix
from numpy import double

lm = LoadMatrix()

traindat = double(lm.load_numbers('../data/fm_train_real.dat'))
testdat = double(lm.load_numbers('../data/fm_test_real.dat'))
parameter_list = [[traindat, testdat, 2, 10], [traindat, testdat, 5, 10]]


def kernel_anova_modular(fm_train_real=traindat,
                         fm_test_real=testdat,
                         cardinality=2,
                         size_cache=10):
    from shogun.Kernel import ANOVAKernel
    from shogun.Features import RealFeatures

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    kernel = ANOVAKernel(feats_train, feats_train, cardinality, size_cache)

    for i in range(0, feats_train.get_num_vectors()):
        for j in range(0, feats_train.get_num_vectors()):
            k1 = kernel.compute_rec1(i, j)
            k2 = kernel.compute_rec2(i, j)
            if abs(k1 - k2) > 1e-10:
                print "|%s|%s|" % (k1, k2)
from tools.load import LoadMatrix
from numpy import ushort
from sg import sg
lm=LoadMatrix()

trainword=ushort(lm.load_numbers('../data/fm_test_word.dat'))
testword=ushort(lm.load_numbers('../data/fm_test_word.dat'))
parameter_list=[[trainword,testword,10,1.4],
	       [trainword,testword,11,1.5]]

def kernel_linearword (fm_train_word=trainword,fm_test_word=testword,
		       size_cache=10, scale=1.4):
	sg('set_features', 'TRAIN', fm_train_word)
	sg('set_features', 'TEST', fm_test_word)
	sg('set_kernel', 'LINEAR', 'WORD', size_cache, scale)
	km=sg('get_kernel_matrix', 'TRAIN')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('LinearWord')
	kernel_linearword(*parameter_list[0])
from tools.load import LoadMatrix

lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')

parameter_list = [[data,10],[data,20]]

def converter_locallylinearembedding_modular(data,k):
	from shogun.Features import RealFeatures
	from shogun.Converter import LocallyLinearEmbedding
	
	features = RealFeatures(data)
		
	converter = LocallyLinearEmbedding()
	converter.set_target_dim(1)
	converter.set_k(k)
	converter.apply(features)

	return features


if __name__=='__main__':
	print 'LocallyLinearEmbedding'
	converter_locallylinearembedding_modular(*parameter_list[0])

###########################################################################
# anova kernel
###########################################################################
from tools.load import LoadMatrix
from numpy import double
lm=LoadMatrix()

traindat = double(lm.load_numbers('../data/fm_train_real.dat'))
testdat = double(lm.load_numbers('../data/fm_test_real.dat'))
parameter_list = [[traindat,testdat,2,10], [traindat,testdat,5,10]]

def kernel_anova_modular (fm_train_real=traindat,fm_test_real=testdat,cardinality=2, size_cache=10):
	from shogun.Kernel import ANOVAKernel
	from shogun.Features import RealFeatures
	
	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	
	kernel=ANOVAKernel(feats_train, feats_train, cardinality, size_cache)
        
	for i in range(0,feats_train.get_num_vectors()):
		for j in range(0,feats_train.get_num_vectors()):
			k1 = kernel.compute_rec1(i,j)
			k2 = kernel.compute_rec2(i,j)
			#if abs(k1-k2) > 1e-10:
			#	print "|%s|%s|" % (k1, k2)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train, km_test, kernel
#!/usr/bin/env python
from tools.load import LoadMatrix
from modshogun import *


lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')
labels = lm.load_numbers('../data/label_train_multiclass.dat')

parameter_list = [[data, labels, CANVAR_FLDA], [data, labels, CLASSIC_FLDA]]
def preprocessor_fisherlda_modular (data, labels, method):

	from modshogun import RealFeatures, MulticlassLabels, CANVAR_FLDA
	from modshogun import FisherLda
	from modshogun import MulticlassLabels

	sg_features = RealFeatures(data)
	sg_labels = MulticlassLabels(labels)

	preprocessor=FisherLda(method)
	preprocessor.fit(sg_features, sg_labels, 1)
	yn=preprocessor.apply_to_feature_matrix(sg_features)

	return yn


if __name__=='__main__':
	print('FisherLda')
	preprocessor_fisherlda_modular(*parameter_list[0])

from tools.load import LoadMatrix
import numpy

lm = LoadMatrix()
data = lm.load_numbers("../data/fm_train_real.dat")

parameter_list = [[data]]


def converter_multidimensionalscaling_modular(data):
    from shogun.Features import RealFeatures
    from shogun.Converter import MultidimensionalScaling
    from shogun.Distance import EuclidianDistance

    features = RealFeatures(data)

    distance_before = EuclidianDistance()
    distance_before.init(features, features)

    converter = MultidimensionalScaling()
    converter.set_target_dim(2)
    converter.set_landmark(False)
    embedding = converter.apply(features)

    distance_after = EuclidianDistance()
    distance_after.init(embedding, embedding)

    distance_matrix_after = distance_after.get_distance_matrix()
    distance_matrix_before = distance_before.get_distance_matrix()

    return numpy.linalg.norm(distance_matrix_after - distance_matrix_before) / numpy.linalg.norm(distance_matrix_before)
from tools.load import LoadMatrix
from sg import sg

lm = LoadMatrix()

trainbyte = ubyte(lm.load_numbers("../data/fm_train_byte.dat"))
testbyte = ubyte(lm.load_numbers("../data/fm_test_byte.dat"))

parameter_list = [[trainbyte, testbyte], [trainbyte, testbyte]]


def kernel_linearbyte(fm_train_byte=trainbyte, fm_test_byte=testbyte):

    # import pdb
    # pdb.set_trace()
    sg("set_features", "TRAIN", fm_train_byte)
    sg("set_features", "TEST", fm_test_byte, "RAWBYTE")
    sg("set_kernel", "LINEAR", "BYTE", 10)
    km = sg("get_kernel_matrix", "TRAIN")
    km = sg("get_kernel_matrix", "TEST")
    return km


if __name__ == "__main__":
    print "LinearByte"
    kernel_linearbyte(*parameter_list[0])
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,1.4,10,True],[traindat,testdat,1.5,11,True]]

def preproc_prunevarsubmean (fm_train_real=traindat,fm_test_real=testdat,
		 width=1.4,size_cache=10,divide_by_std=True):

	sg('add_preproc', 'PRUNEVARSUBMEAN', divide_by_std)
	sg('set_kernel', 'CHI2', 'REAL', size_cache, width)

	sg('set_features', 'TRAIN', fm_train_real)
	sg('attach_preproc', 'TRAIN')
	km=sg('get_kernel_matrix', 'TRAIN')

	sg('set_features', 'TEST', fm_test_real)
	sg('attach_preproc', 'TEST')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print 'PruneVarSubMean'
	preproc_prunevarsubmean(*parameter_list[0])
###########################################################################
# linear kernel on byte features
###########################################################################
from tools.load import LoadMatrix
from numpy import ubyte
lm = LoadMatrix()

traindat = ubyte(lm.load_numbers('../data/fm_train_byte.dat'))
testdat = ubyte(lm.load_numbers('../data/fm_test_byte.dat'))

parameter_list = [[traindat, testdat], [traindat, testdat]]


def kernel_linear_byte_modular(fm_train_byte=traindat, fm_test_byte=testdat):
    from shogun.Kernel import LinearKernel
    from shogun.Features import ByteFeatures

    feats_train = ByteFeatures(fm_train_byte)
    feats_test = ByteFeatures(fm_test_byte)

    kernel = LinearKernel(feats_train, feats_train)
    km_train = kernel.get_kernel_matrix()

    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()
    return kernel


if __name__ == '__main__':
    print 'LinearByte'
    kernel_linear_byte_modular(*parameter_list[0])