def prepare_data(use_toy=True): from os.path import exists from tools.load import LoadMatrix lm=LoadMatrix() if not use_toy and exists('../data/../mldata/uci-20070111-optdigits.mat'): from scipy.io import loadmat mat = loadmat('../data/../mldata/uci-20070111-optdigits.mat')['int0'].astype(float) X = mat[:-1,:] Y = mat[-1,:] isplit = X.shape[1]/2 traindat = X[:,:isplit] label_traindat = Y[:isplit] testdat = X[:, isplit:] label_testdat = Y[isplit:] else: traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') label_traindat = lm.load_labels('../data/label_train_multiclass.dat') label_testdat = None return [traindat, label_traindat, testdat, label_testdat]
# This is an example for the initialization of a linear kernel on word (2byte) # data. from tools.load import LoadMatrix from numpy import ushort lm = LoadMatrix() traindat = ushort(lm.load_numbers("../data/fm_train_word.dat")) testdat = ushort(lm.load_numbers("../data/fm_test_word.dat")) parameter_list = [[traindat, testdat, 1.2], [traindat, testdat, 1.2]] def kernel_linear_word_modular(fm_train_word=traindat, fm_test_word=testdat, scale=1.2): from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer from shogun.Features import WordFeatures feats_train = WordFeatures(fm_train_word) feats_test = WordFeatures(fm_test_word) kernel = LinearKernel(feats_train, feats_train) kernel.set_normalizer(AvgDiagKernelNormalizer(scale)) kernel.init(feats_train, feats_train) km_train = kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test = kernel.get_kernel_matrix() return kernel
def distance(): print "Distance" width = 1.7 size_cache = 10 from sg import sg sg("set_features", "TRAIN", fm_train_real) sg("set_features", "TEST", fm_test_real) sg("set_distance", "EUCLIDIAN", "REAL") sg("set_kernel", "DISTANCE", size_cache, width) km = sg("get_kernel_matrix", "TRAIN") km = sg("get_kernel_matrix", "TEST") if __name__ == "__main__": from tools.load import LoadMatrix lm = LoadMatrix() fm_train_real = lm.load_numbers("../data/fm_train_real.dat") fm_test_real = lm.load_numbers("../data/fm_test_real.dat") distance()
epsilon=1e-5 labels=Labels(label_train_twoclass) svm=LibSVM(C, kernel, labels) svm.set_epsilon(epsilon) svm.train() #kernel.init(feats_train, feats_test) output = svm.classify(feats_test)#.get_labels() #output_vector = output.get_labels() out=svm.classify().get_labels() testerr=mean(sign(out)!=testlab) print testerr #sv_idx=svm.get_support_vectors() #alphas=svm.get_alphas() #pm = PerformanceMeasures(output_vector, output) #acc = pm.get_accuracy() #roc = pm.get_auROC() #fms = pm.get_fmeasure() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('/home/mati/lib/shogun-0.9.3/examples/documented/data/fm_train_real.dat') fm_test_real=lm.load_numbers('/home/mati/lib/shogun-0.9.3/examples/documented/data/fm_test_real.dat') label_train_twoclass=lm.load_labels('/home/mati/lib/shogun-0.9.3/examples/documented/data/label_train_twoclass.dat') libsvm()
def bray_curtis_distance (): print 'BrayCurtisDistance' from sg import sg sg('set_distance', 'BRAYCURTIS', 'REAL') sg('set_features', 'TRAIN', fm_train_real) dm=sg('get_distance_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) dm=sg('get_distance_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') bray_curtis_distance()
from tools.load import LoadMatrix lm=LoadMatrix() data = lm.load_numbers('../data/fm_train_real.dat') parameter_list = [[data, 0.01, 1.0], [data, 0.05, 2.0]] def preprocessor_kernelpcacut_modular(data, threshold, width): from shogun.Features import RealFeatures from shogun.Preprocessor import KernelPCACut from shogun.Kernel import GaussianKernel features = RealFeatures(data) kernel = GaussianKernel(features,features,width) preprocessor = KernelPCACut(kernel,threshold) preprocessor.init(features) preprocessor.apply_to_feature_matrix(features) return features if __name__=='__main__': print 'KernelPCACut' preprocessor_kernelpcacut_modular(*parameter_list[0])
# (w(t),b(t)) are the current parameters of the linear classifier # (w(t+1),b(t+1)) are the new parameters of the linear classifier # alpha is the learning rate. # # The Perceptron algorithm iterates until all training examples are correctly # classified or the prescribed maximal number of iterations is reached. # # The learning rate and the maximal number of iterations can be set by # sg('set_perceptron_parameters', alpha, max_iter); # from tools.load import LoadMatrix from sg import sg lm = LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') train_label = lm.load_labels('../data/label_train_twoclass.dat') parameter_list = [[traindat, testdat, train_label], [traindat, testdat, train_label]] def classifier_perceptron(fm_train_real=traindat, fm_test_real=testdat, label_train_twoclass=train_label): sg('set_features', 'TRAIN', fm_train_real) sg('set_labels', 'TRAIN', label_train_twoclass) sg('new_classifier', 'PERCEPTRON') # often does not converge, mind your data! sg('train_classifier')
#!/usr/bin/env python from tools.load import LoadMatrix lm=LoadMatrix() data=lm.load_numbers('../data/fm_train_real.dat') label=lm.load_numbers('../data/label_train_twoclass.dat') parameter_list=[[data,label]] def features_io_modular (fm_train_real, label_train_twoclass): import numpy from modshogun import SparseRealFeatures, RealFeatures, MulticlassLabels from modshogun import GaussianKernel from modshogun import LibSVMFile, CSVFile, BinaryFile, HDF5File feats=SparseRealFeatures(fm_train_real) feats2=SparseRealFeatures() f=BinaryFile("tmp/fm_train_sparsereal.bin","w") feats.save(f) f=LibSVMFile("tmp/fm_train_sparsereal.ascii","w") feats.save(f) f=BinaryFile("tmp/fm_train_sparsereal.bin") feats2.load(f) f=LibSVMFile("tmp/fm_train_sparsereal.ascii") feats2.load(f) feats=RealFeatures(fm_train_real) feats2=RealFeatures()
#!/usr/bin/env python from tools.load import LoadMatrix lm = LoadMatrix() fm_train_real = lm.load_numbers('../data/fm_train_real.dat') fm_test_real = lm.load_numbers('../data/fm_test_real.dat') label_train_multiclass = lm.load_labels('../data/label_train_multiclass.dat') parameter_list = [[ fm_train_real, fm_test_real, label_train_multiclass, 1.2, 1.2, 1e-5, 1, 0.001, 1.5 ], [ fm_train_real, fm_test_real, label_train_multiclass, 5, 1.2, 1e-2, 1, 0.001, 2 ]] def mkl_multiclass_modular(fm_train_real, fm_test_real, label_train_multiclass, width, C, epsilon, num_threads, mkl_epsilon, mkl_norm): from shogun.Features import CombinedFeatures, RealFeatures, MulticlassLabels from shogun.Kernel import CombinedKernel, GaussianKernel, LinearKernel, PolyKernel from shogun.Classifier import MKLMulticlass kernel = CombinedKernel() feats_train = CombinedFeatures() feats_test = CombinedFeatures() subkfeats_train = RealFeatures(fm_train_real) subkfeats_test = RealFeatures(fm_test_real)
from tools.load import LoadMatrix from numpy import ushort lm=LoadMatrix() traindat = ushort(lm.load_numbers('../data/fm_train_word.dat')) testdat = ushort(lm.load_numbers('../data/fm_test_word.dat')) parameter_list=[[traindat,testdat,1.2],[traindat,testdat,1.2]] def kernel_linear_word_modular (fm_train_word=traindat,fm_test_word=testdat,scale=1.2): from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer from shogun.Features import WordFeatures feats_train=WordFeatures(fm_train_word) feats_test=WordFeatures(fm_test_word) kernel=LinearKernel(feats_train, feats_train) kernel.set_normalizer(AvgDiagKernelNormalizer(scale)) kernel.init(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return kernel if __name__=='__main__': print 'LinearWord' kernel_linear_word_modular(*parameter_list[0])
#!/usr/bin/env python from tools.load import LoadMatrix lm = LoadMatrix() data = lm.load_numbers('../data/fm_train_real.dat') label = lm.load_numbers('../data/label_train_twoclass.dat') parameter_list = [[data, label]] def features_io_modular(fm_train_real, label_train_twoclass): import numpy from modshogun import SparseRealFeatures, RealFeatures, MulticlassLabels from modshogun import GaussianKernel from modshogun import LibSVMFile, CSVFile, BinaryFile, HDF5File feats = SparseRealFeatures(fm_train_real) feats2 = SparseRealFeatures() f = BinaryFile("tmp/fm_train_sparsereal.bin", "w") feats.save(f) f = LibSVMFile("tmp/fm_train_sparsereal.ascii", "w") feats.save(f) f = BinaryFile("tmp/fm_train_sparsereal.bin") feats2.load(f) f = LibSVMFile("tmp/fm_train_sparsereal.ascii") feats2.load(f) feats = RealFeatures(fm_train_real)
#!/usr/bin/env python from tools.load import LoadMatrix from modshogun import * lm=LoadMatrix() data = lm.load_numbers('../data/fm_train_real.dat') labels = lm.load_numbers('../data/label_train_multiclass.dat') parameter_list = [[data, labels, CANVAR_FLDA], [data, labels, CLASSIC_FLDA]] def preprocessor_fisherlda_modular (data, labels, method): from modshogun import RealFeatures, MulticlassLabels, CANVAR_FLDA from modshogun import FisherLda from modshogun import MulticlassLabels sg_features = RealFeatures(data) sg_labels = MulticlassLabels(labels) preprocessor=FisherLda(method) preprocessor.init(sg_features, sg_labels, 1) yn=preprocessor.apply_to_feature_matrix(sg_features) return yn if __name__=='__main__': print('FisherLda') preprocessor_fisherlda_modular(*parameter_list[0])
########################################################################### # kernel can be used to maximize AUC instead of margin in SVMs ########################################################################### from tools.load import LoadMatrix from numpy import double lm = LoadMatrix() traindat = double(lm.load_numbers("../data/fm_train_real.dat")) testdat = lm.load_labels("../data/label_train_twoclass.dat") parameter_list = [[traindat, testdat, 1.7], [traindat, testdat, 1.6]] def kernel_auc_modular(fm_train_real=traindat, label_train_real=testdat, width=1.7): from shogun.Kernel import GaussianKernel, AUCKernel from shogun.Features import RealFeatures, Labels feats_train = RealFeatures(fm_train_real) subkernel = GaussianKernel(feats_train, feats_train, width) kernel = AUCKernel(0, subkernel) kernel.setup_auc_maximization(Labels(label_train_real)) km_train = kernel.get_kernel_matrix() return kernel if __name__ == "__main__": print "AUC" kernel_auc_modular(*parameter_list[0])
########################################################################### # linear kernel on byte features ########################################################################### from tools.load import LoadMatrix from numpy import ubyte lm=LoadMatrix() traindat = ubyte(lm.load_numbers('../data/fm_train_byte.dat')) testdat = ubyte(lm.load_numbers('../data/fm_test_byte.dat')) parameter_list=[[traindat,testdat],[traindat,testdat]] def kernel_linear_byte_modular(fm_train_byte=traindat,fm_test_byte=testdat): from shogun.Kernel import LinearKernel from shogun.Features import ByteFeatures feats_train=ByteFeatures(fm_train_byte) feats_test=ByteFeatures(fm_test_byte) kernel=LinearKernel(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return kernel if __name__=='__main__': print('LinearByte') kernel_linear_byte_modular(*parameter_list[0])
########################################################################### # anova kernel ########################################################################### from tools.load import LoadMatrix from numpy import double lm = LoadMatrix() traindat = double(lm.load_numbers('../data/fm_train_real.dat')) testdat = double(lm.load_numbers('../data/fm_test_real.dat')) parameter_list = [[traindat, testdat, 2, 10], [traindat, testdat, 5, 10]] def kernel_anova_modular(fm_train_real=traindat, fm_test_real=testdat, cardinality=2, size_cache=10): from shogun.Kernel import ANOVAKernel from shogun.Features import RealFeatures feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) kernel = ANOVAKernel(feats_train, feats_train, cardinality, size_cache) for i in range(0, feats_train.get_num_vectors()): for j in range(0, feats_train.get_num_vectors()): k1 = kernel.compute_rec1(i, j) k2 = kernel.compute_rec2(i, j) if abs(k1 - k2) > 1e-10: print "|%s|%s|" % (k1, k2)
from tools.load import LoadMatrix from numpy import ushort from sg import sg lm=LoadMatrix() trainword=ushort(lm.load_numbers('../data/fm_test_word.dat')) testword=ushort(lm.load_numbers('../data/fm_test_word.dat')) parameter_list=[[trainword,testword,10,1.4], [trainword,testword,11,1.5]] def kernel_linearword (fm_train_word=trainword,fm_test_word=testword, size_cache=10, scale=1.4): sg('set_features', 'TRAIN', fm_train_word) sg('set_features', 'TEST', fm_test_word) sg('set_kernel', 'LINEAR', 'WORD', size_cache, scale) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') return km if __name__=='__main__': print('LinearWord') kernel_linearword(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() data = lm.load_numbers('../data/fm_train_real.dat') parameter_list = [[data,10],[data,20]] def converter_locallylinearembedding_modular(data,k): from shogun.Features import RealFeatures from shogun.Converter import LocallyLinearEmbedding features = RealFeatures(data) converter = LocallyLinearEmbedding() converter.set_target_dim(1) converter.set_k(k) converter.apply(features) return features if __name__=='__main__': print 'LocallyLinearEmbedding' converter_locallylinearembedding_modular(*parameter_list[0])
########################################################################### # anova kernel ########################################################################### from tools.load import LoadMatrix from numpy import double lm=LoadMatrix() traindat = double(lm.load_numbers('../data/fm_train_real.dat')) testdat = double(lm.load_numbers('../data/fm_test_real.dat')) parameter_list = [[traindat,testdat,2,10], [traindat,testdat,5,10]] def kernel_anova_modular (fm_train_real=traindat,fm_test_real=testdat,cardinality=2, size_cache=10): from shogun.Kernel import ANOVAKernel from shogun.Features import RealFeatures feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) kernel=ANOVAKernel(feats_train, feats_train, cardinality, size_cache) for i in range(0,feats_train.get_num_vectors()): for j in range(0,feats_train.get_num_vectors()): k1 = kernel.compute_rec1(i,j) k2 = kernel.compute_rec2(i,j) #if abs(k1-k2) > 1e-10: # print "|%s|%s|" % (k1, k2) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train, km_test, kernel
#!/usr/bin/env python from tools.load import LoadMatrix from modshogun import * lm=LoadMatrix() data = lm.load_numbers('../data/fm_train_real.dat') labels = lm.load_numbers('../data/label_train_multiclass.dat') parameter_list = [[data, labels, CANVAR_FLDA], [data, labels, CLASSIC_FLDA]] def preprocessor_fisherlda_modular (data, labels, method): from modshogun import RealFeatures, MulticlassLabels, CANVAR_FLDA from modshogun import FisherLda from modshogun import MulticlassLabels sg_features = RealFeatures(data) sg_labels = MulticlassLabels(labels) preprocessor=FisherLda(method) preprocessor.fit(sg_features, sg_labels, 1) yn=preprocessor.apply_to_feature_matrix(sg_features) return yn if __name__=='__main__': print('FisherLda') preprocessor_fisherlda_modular(*parameter_list[0])
from tools.load import LoadMatrix import numpy lm = LoadMatrix() data = lm.load_numbers("../data/fm_train_real.dat") parameter_list = [[data]] def converter_multidimensionalscaling_modular(data): from shogun.Features import RealFeatures from shogun.Converter import MultidimensionalScaling from shogun.Distance import EuclidianDistance features = RealFeatures(data) distance_before = EuclidianDistance() distance_before.init(features, features) converter = MultidimensionalScaling() converter.set_target_dim(2) converter.set_landmark(False) embedding = converter.apply(features) distance_after = EuclidianDistance() distance_after.init(embedding, embedding) distance_matrix_after = distance_after.get_distance_matrix() distance_matrix_before = distance_before.get_distance_matrix() return numpy.linalg.norm(distance_matrix_after - distance_matrix_before) / numpy.linalg.norm(distance_matrix_before)
from tools.load import LoadMatrix from sg import sg lm = LoadMatrix() trainbyte = ubyte(lm.load_numbers("../data/fm_train_byte.dat")) testbyte = ubyte(lm.load_numbers("../data/fm_test_byte.dat")) parameter_list = [[trainbyte, testbyte], [trainbyte, testbyte]] def kernel_linearbyte(fm_train_byte=trainbyte, fm_test_byte=testbyte): # import pdb # pdb.set_trace() sg("set_features", "TRAIN", fm_train_byte) sg("set_features", "TEST", fm_test_byte, "RAWBYTE") sg("set_kernel", "LINEAR", "BYTE", 10) km = sg("get_kernel_matrix", "TRAIN") km = sg("get_kernel_matrix", "TEST") return km if __name__ == "__main__": print "LinearByte" kernel_linearbyte(*parameter_list[0])
from tools.load import LoadMatrix from sg import sg lm=LoadMatrix() traindat=lm.load_numbers('../data/fm_train_real.dat') testdat=lm.load_numbers('../data/fm_test_real.dat') parameter_list=[[traindat,testdat,1.4,10,True],[traindat,testdat,1.5,11,True]] def preproc_prunevarsubmean (fm_train_real=traindat,fm_test_real=testdat, width=1.4,size_cache=10,divide_by_std=True): sg('add_preproc', 'PRUNEVARSUBMEAN', divide_by_std) sg('set_kernel', 'CHI2', 'REAL', size_cache, width) sg('set_features', 'TRAIN', fm_train_real) sg('attach_preproc', 'TRAIN') km=sg('get_kernel_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) sg('attach_preproc', 'TEST') km=sg('get_kernel_matrix', 'TEST') return km if __name__=='__main__': print 'PruneVarSubMean' preproc_prunevarsubmean(*parameter_list[0])
########################################################################### # linear kernel on byte features ########################################################################### from tools.load import LoadMatrix from numpy import ubyte lm = LoadMatrix() traindat = ubyte(lm.load_numbers('../data/fm_train_byte.dat')) testdat = ubyte(lm.load_numbers('../data/fm_test_byte.dat')) parameter_list = [[traindat, testdat], [traindat, testdat]] def kernel_linear_byte_modular(fm_train_byte=traindat, fm_test_byte=testdat): from shogun.Kernel import LinearKernel from shogun.Features import ByteFeatures feats_train = ByteFeatures(fm_train_byte) feats_test = ByteFeatures(fm_test_byte) kernel = LinearKernel(feats_train, feats_train) km_train = kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test = kernel.get_kernel_matrix() return kernel if __name__ == '__main__': print 'LinearByte' kernel_linear_byte_modular(*parameter_list[0])