def kernel_linear_modular(train_fname=traindat, test_fname=testdat, scale=1.2): from modshogun import RealFeatures, LinearKernel, AvgDiagKernelNormalizer, CSVFile feats_train = RealFeatures(CSVFile(train_fname)) feats_test = RealFeatures(CSVFile(test_fname)) kernel = LinearKernel() kernel.set_normalizer(AvgDiagKernelNormalizer(scale)) kernel.init(feats_train, feats_train) km_train = kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test = kernel.get_kernel_matrix() return km_train, km_test, kernel
def get_kernel_mat(fm_train_dna, fm_test_dna, N, M, pseudo=1e-1,order=1,gap=0,reverse=False): # train HMM for positive class print "hmm training" charfeat=StringCharFeatures(fm_train_dna, DNA) #charfeat.io.set_loglevel(MSG_DEBUG) hmm_train=StringWordFeatures(charfeat.get_alphabet()) hmm_train.obtain_from_char(charfeat, order-1, order, gap, reverse) pos=HMM(hmm_train, N, M, pseudo) pos.baum_welch_viterbi_train(BW_NORMAL) neg = HMM(pos) print "Kernel training data" charfeat=StringCharFeatures(fm_train_dna, DNA) wordfeats_train=StringWordFeatures(charfeat.get_alphabet()) wordfeats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) print "Kernel testing data" charfeat=StringCharFeatures(fm_test_dna, DNA) wordfeats_test=StringWordFeatures(charfeat.get_alphabet()) wordfeats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) print "get kernel on training data" pos.set_observations(wordfeats_train) neg.set_observations(wordfeats_train) feats_train=FKFeatures(10, pos, neg) feats_train.set_opt_a(-1) #estimate prior print 'getting feature matrix' v0 = feats_train.get_feature_vector(0) v1 = feats_train.get_feature_vector(1) print np.dot(v0, v1) kernel=LinearKernel(feats_train, feats_train) #kernel=PolyKernel(feats_train, feats_train, *kargs) km_train=kernel.get_kernel_matrix() print km_train.shape, km_train[0, 1] print "get kernel on testing data" pos_clone=HMM(pos) neg_clone=HMM(neg) pos_clone.set_observations(wordfeats_test) neg_clone.set_observations(wordfeats_test) feats_test=FKFeatures(10, pos_clone, neg_clone) feats_test.set_a(feats_train.get_a()) #use prior from training data kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel
def BuildModel(self, data, labels, options): k = re.search("-k ([^\s]+)", options) c = re.search("-c (\d+)", options) g = re.search("-g (\d+)", options) self.C = 1.0 if not c else float(c.group(1)) self.gamma = 0.0 if not g else float(g.group(1)) if not k or k.group(1) == "gaussian": self.kernel = GaussianKernel(data, data, 1) elif k.group(1) == "polynomial": d = re.search('-D (\d+)', options) d = 1 if not d else int(d.group(1)) self.kernel = PolyKernel(data, data, d, True) elif k.group(1) == "linear": self.kernel = LinearKernel(data, data) elif k.group(1) == "hyptan": self.kernel = SigmoidKernel(data, data, 2, 1.0, 1.0) else: self.kernel = GaussianKernel(data, data, 1) # Create and train the classifier. svm = LibSvm(self.C, self.kernel, labels) svm.train() return svm
def kernel_sparse_linear_modular(fm_train_real=traindat, fm_test_real=testdat, scale=1.1): from modshogun import SparseRealFeatures from modshogun import LinearKernel, AvgDiagKernelNormalizer feats_train = SparseRealFeatures(fm_train_real) feats_test = SparseRealFeatures(fm_test_real) kernel = LinearKernel() kernel.set_normalizer(AvgDiagKernelNormalizer(scale)) kernel.init(feats_train, feats_train) km_train = kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test = kernel.get_kernel_matrix() return km_train, km_test, kernel
def RunKPCAShogun(): totalTimer = Timer() try: # Load input dataset. Log.Info("Loading dataset", self.verbose) data = np.genfromtxt(self.dataset, delimiter=',') dataFeat = RealFeatures(data.T) with totalTimer: # Get the new dimensionality, if it is necessary. if "new_dimensionality" in options: d = int(options.pop("new_dimensionality")) if (d > data.shape[1]): Log.Fatal("New dimensionality (" + str(d) + ") cannot be greater " + "than existing dimensionality (" + str(data.shape[1]) + ")!") return -1 else: d = data.shape[1] # Get the kernel type and make sure it is valid. if "kernel" in options: kernel = str(options.pop("kernel")) else: Log.Fatal("Choose kernel type, valid choices are 'linear'," + " 'hyptan', 'polynomial' and 'gaussian'.") return -1 if "degree" in options: degree = int(options.pop("degree")) if len(options) > 0: Log.Fatal("Unknown parameters: " + str(options)) raise Exception("unknown parameters") if kernel == "polynomial": kernel = PolyKernel(dataFeat, dataFeat, degree, True) elif kernel == "gaussian": kernel = GaussianKernel(dataFeat, dataFeat, 2.0) elif kernel == "linear": kernel = LinearKernel(dataFeat, dataFeat) elif kernel == "hyptan": kernel = SigmoidKernel(dataFeat, dataFeat, 2, 1.0, 1.0) else: Log.Fatal("Invalid kernel type (" + kernel.group(1) + "); valid " + "choices are 'linear', 'hyptan', 'polynomial' and 'gaussian'.") return -1 # Perform Kernel Principal Components Analysis. model = KernelPCA(kernel) model.set_target_dim(d) model.init(dataFeat) model.apply_to_feature_matrix(dataFeat) except Exception as e: return -1 return totalTimer.ElapsedTime()
def kernel_linear_word_modular(fm_train_word=traindat, fm_test_word=testdat, scale=1.2): from modshogun import LinearKernel, AvgDiagKernelNormalizer from modshogun import WordFeatures feats_train = WordFeatures(fm_train_word) feats_test = WordFeatures(fm_test_word) kernel = LinearKernel(feats_train, feats_train) kernel.set_normalizer(AvgDiagKernelNormalizer(scale)) kernel.init(feats_train, feats_train) km_train = kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test = kernel.get_kernel_matrix() return kernel
def kernel_linear_modular (train_fname=traindat,test_fname=testdat,scale=1.2): from modshogun import RealFeatures, LinearKernel, AvgDiagKernelNormalizer, CSVFile feats_train=RealFeatures(CSVFile(train_fname)) feats_test=RealFeatures(CSVFile(test_fname)) kernel=LinearKernel() kernel.set_normalizer(AvgDiagKernelNormalizer(scale)) kernel.init(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel
def kernel_sparse_linear_modular (fm_train_real=traindat,fm_test_real=testdat,scale=1.1): from modshogun import SparseRealFeatures from modshogun import LinearKernel, AvgDiagKernelNormalizer feats_train=SparseRealFeatures(fm_train_real) feats_test=SparseRealFeatures(fm_test_real) kernel=LinearKernel() kernel.set_normalizer(AvgDiagKernelNormalizer(scale)) kernel.init(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel
def kernel_linear_word_modular (fm_train_word=traindat,fm_test_word=testdat,scale=1.2): from modshogun import LinearKernel, AvgDiagKernelNormalizer from modshogun import WordFeatures feats_train=WordFeatures(fm_train_word) feats_test=WordFeatures(fm_test_word) kernel=LinearKernel(feats_train, feats_train) kernel.set_normalizer(AvgDiagKernelNormalizer(scale)) kernel.init(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return kernel
def evaluate4svm(labels, feats, params={'c': 1, 'kernal': 'gauss'}, Nsplit=2): """ Run Cross-validation to evaluate the SVM. Parameters ---------- labels: 2d array Data set labels. feats: array Data set feats. params: dictionary Search scope parameters. Nsplit: int, default = 2 The n for n-fold cross validation. """ c = params.get('c') if params.get('kernal' == 'gauss'): kernal = GaussianKernel() kernal.set_width(80) elif params.get('kernal' == 'sigmoid'): kernal = SigmoidKernel() else: kernal = LinearKernel() split = CrossValidationSplitting(labels, Nsplit) split.build_subsets() accuracy = np.zeros(Nsplit) time_test = np.zeros(accuracy.shape) for i in range(Nsplit): idx_train = split.generate_subset_inverse(i) idx_test = split.generate_subset_indices(i) feats.add_subset(idx_train) labels.add_subset(idx_train) print c, kernal, labels svm = GMNPSVM(c, kernal, labels) _ = svm.train(feats) out = svm.apply(feats_test) evaluator = MulticlassAccuracy() accuracy[i] = evaluator.evaluate(out, labels_test) feats.remove_subset() labels.remove_subset() feats.add_subset(idx_test) labels.add_subset(idx_test) t_start = time.clock() time_test[i] = (time.clock() - t_start) / labels.get_num_labels() feats.remove_subset() labels.remove_subset() return accuracy
def mkl(train_features, train_labels, test_features, test_labels, width=5, C=1.2, epsilon=1e-2, mkl_epsilon=0.001, mkl_norm=2): from modshogun import CombinedKernel, CombinedFeatures from modshogun import GaussianKernel, LinearKernel, PolyKernel from modshogun import MKLMulticlass, MulticlassAccuracy kernel = CombinedKernel() feats_train = CombinedFeatures() feats_test = CombinedFeatures() feats_train.append_feature_obj(train_features) feats_test.append_feature_obj(test_features) subkernel = GaussianKernel(10, width) kernel.append_kernel(subkernel) feats_train.append_feature_obj(train_features) feats_test.append_feature_obj(test_features) subkernel = LinearKernel() kernel.append_kernel(subkernel) feats_train.append_feature_obj(train_features) feats_test.append_feature_obj(test_features) subkernel = PolyKernel(10, 2) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_train) mkl = MKLMulticlass(C, kernel, train_labels) mkl.set_epsilon(epsilon) mkl.set_mkl_epsilon(mkl_epsilon) mkl.set_mkl_norm(mkl_norm) mkl.train() train_output = mkl.apply() kernel.init(feats_train, feats_test) test_output = mkl.apply() evaluator = MulticlassAccuracy() print 'MKL training error is %.4f' % ( (1 - evaluator.evaluate(train_output, train_labels)) * 100) print 'MKL test error is %.4f' % ( (1 - evaluator.evaluate(test_output, test_labels)) * 100)
def mkl_multiclass_modular (fm_train_real, fm_test_real, label_train_multiclass, width, C, epsilon, num_threads, mkl_epsilon, mkl_norm): from modshogun import CombinedFeatures, RealFeatures, MulticlassLabels from modshogun import CombinedKernel, GaussianKernel, LinearKernel,PolyKernel from modshogun import MKLMulticlass kernel = CombinedKernel() feats_train = CombinedFeatures() feats_test = CombinedFeatures() subkfeats_train = RealFeatures(fm_train_real) subkfeats_test = RealFeatures(fm_test_real) subkernel = GaussianKernel(10, width) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures(fm_train_real) subkfeats_test = RealFeatures(fm_test_real) subkernel = LinearKernel() feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures(fm_train_real) subkfeats_test = RealFeatures(fm_test_real) subkernel = PolyKernel(10,2) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_train) labels = MulticlassLabels(label_train_multiclass) mkl = MKLMulticlass(C, kernel, labels) mkl.set_epsilon(epsilon); mkl.parallel.set_num_threads(num_threads) mkl.set_mkl_epsilon(mkl_epsilon) mkl.set_mkl_norm(mkl_norm) mkl.train() kernel.init(feats_train, feats_test) out = mkl.apply().get_labels() return out
def converter_kernellocallylinearembedding_modular(data_fname, k): try: from modshogun import RealFeatures, KernelLocallyLinearEmbedding, LinearKernel, CSVFile features = RealFeatures(CSVFile(data_fname)) kernel = LinearKernel() converter = KernelLocallyLinearEmbedding(kernel) converter.set_target_dim(1) converter.set_k(k) converter.apply(features) return features except ImportError: print('No Eigen3 available')
def get_kernel_mat(fm_train_dna, fm_test_dna, N, M, pseudo=1e-1, order=1, gap=0, reverse=False): # train HMM for positive class print "hmm training" charfeat = StringCharFeatures(fm_train_dna, DNA) #charfeat.io.set_loglevel(MSG_DEBUG) hmm_train = StringWordFeatures(charfeat.get_alphabet()) hmm_train.obtain_from_char(charfeat, order - 1, order, gap, reverse) pos = HMM(hmm_train, N, M, pseudo) pos.baum_welch_viterbi_train(BW_NORMAL) neg = HMM(pos) print "Kernel training data" charfeat = StringCharFeatures(fm_train_dna, DNA) wordfeats_train = StringWordFeatures(charfeat.get_alphabet()) wordfeats_train.obtain_from_char(charfeat, order - 1, order, gap, reverse) print "Kernel testing data" charfeat = StringCharFeatures(fm_test_dna, DNA) wordfeats_test = StringWordFeatures(charfeat.get_alphabet()) wordfeats_test.obtain_from_char(charfeat, order - 1, order, gap, reverse) print "get kernel on training data" pos.set_observations(wordfeats_train) neg.set_observations(wordfeats_train) feats_train = FKFeatures(10, pos, neg) feats_train.set_opt_a(-1) #estimate prior print 'getting feature matrix' v0 = feats_train.get_feature_vector(0) v1 = feats_train.get_feature_vector(1) print np.dot(v0, v1) kernel = LinearKernel(feats_train, feats_train) #kernel=PolyKernel(feats_train, feats_train, *kargs) km_train = kernel.get_kernel_matrix() print km_train.shape, km_train[0, 1] print "get kernel on testing data" pos_clone = HMM(pos) neg_clone = HMM(neg) pos_clone.set_observations(wordfeats_test) neg_clone.set_observations(wordfeats_test) feats_test = FKFeatures(10, pos_clone, neg_clone) feats_test.set_a(feats_train.get_a()) #use prior from training data kernel.init(feats_train, feats_test) km_test = kernel.get_kernel_matrix() return km_train, km_test, kernel
def kernel_linear_byte_modular (train_fname=traindat,test_fname=testdat): from modshogun import LinearKernel, ByteFeatures, CSVFile feats_train=ByteFeatures(CSVFile(train_fname)) feats_test=ByteFeatures(CSVFile(test_fname)) kernel=LinearKernel(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return kernel
def kernel_director_linear_modular(fm_train_real=traindat, fm_test_real=testdat, scale=1.2): try: from modshogun import DirectorKernel except ImportError: print("recompile shogun with --enable-swig-directors") return class DirectorLinearKernel(DirectorKernel): def __init__(self): DirectorKernel.__init__(self, True) def kernel_function(self, idx_a, idx_b): seq1 = self.get_lhs().get_feature_vector(idx_a) seq2 = self.get_rhs().get_feature_vector(idx_b) return numpy.dot(seq1, seq2) from modshogun import LinearKernel, AvgDiagKernelNormalizer from modshogun import Time feats_train = RealFeatures(fm_train_real) #feats_train.io.set_loglevel(MSG_DEBUG) feats_train.parallel.set_num_threads(1) feats_test = RealFeatures(fm_test_real) kernel = LinearKernel() kernel.set_normalizer(AvgDiagKernelNormalizer(scale)) kernel.init(feats_train, feats_train) dkernel = DirectorLinearKernel() dkernel.set_normalizer(AvgDiagKernelNormalizer(scale)) dkernel.init(feats_train, feats_train) #print "km_train" t = Time() km_train = kernel.get_kernel_matrix() #t1=t.cur_time_diff(True) #print "dkm_train" t = Time() dkm_train = dkernel.get_kernel_matrix() #t2=t.cur_time_diff(True) #print "km_train", km_train #print "dkm_train", dkm_train return km_train, dkm_train
def BuildModel(self, data, labels, options): if "kernel" in options: k = str(options.pop("kernel")) else: Log.Fatal("Required parameter 'kernel' not specified!") raise Exception("missing parameter") if "c" in options: self.C = float(options.pop("c")) if "gamma" in options: self.gamma = float(options.pop("gamma")) if k == "gaussian": self.kernel = GaussianKernel(data, data, 1) elif k == "polynomial": if "degree" in options: d = int(options.pop("degree")) else: d = 1 self.kernel = PolyKernel(data, data, d, True) elif k == "linear": self.kernel = LinearKernel(data, data) elif k == "hyptan": self.kernel = SigmoidKernel(data, data, 2, 1.0, 1.0) else: self.kernel = GaussianKernel(data, data, 1) if len(options) > 0: Log.Fatal("Unknown parameters: " + str(options)) raise Exception("unknown parameters") # Create and train the classifier. svm = LibSvm(self.C, self.kernel, labels) svm.train() return svm
def RunKPCAShogun(q): totalTimer = Timer() try: # Load input dataset. Log.Info("Loading dataset", self.verbose) data = np.genfromtxt(self.dataset, delimiter=',') dataFeat = RealFeatures(data.T) with totalTimer: # Get the new dimensionality, if it is necessary. dimension = re.search('-d (\d+)', options) if not dimension: d = data.shape[1] else: d = int(dimension.group(1)) if (d > data.shape[1]): Log.Fatal("New dimensionality (" + str(d) + ") cannot be greater " + "than existing dimensionality (" + str(data.shape[1]) + ")!") q.put(-1) return -1 # Get the kernel type and make sure it is valid. kernel = re.search("-k ([^\s]+)", options) if not kernel: Log.Fatal( "Choose kernel type, valid choices are 'linear'," + " 'hyptan', 'polynomial' and 'gaussian'.") q.put(-1) return -1 elif kernel.group(1) == "polynomial": degree = re.search('-D (\d+)', options) degree = 1 if not degree else int(degree.group(1)) kernel = PolyKernel(dataFeat, dataFeat, degree, True) elif kernel.group(1) == "gaussian": kernel = GaussianKernel(dataFeat, dataFeat, 2.0) elif kernel.group(1) == "linear": kernel = LinearKernel(dataFeat, dataFeat) elif kernel.group(1) == "hyptan": kernel = SigmoidKernel(dataFeat, dataFeat, 2, 1.0, 1.0) else: Log.Fatal( "Invalid kernel type (" + kernel.group(1) + "); valid " + "choices are 'linear', 'hyptan', 'polynomial' and 'gaussian'." ) q.put(-1) return -1 # Perform Kernel Principal Components Analysis. model = KernelPCA(kernel) model.set_target_dim(d) model.init(dataFeat) model.apply_to_feature_matrix(dataFeat) except Exception as e: q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
C=0.017 epsilon=1e-5 tube_epsilon=1e-2 svm=LibSVM() svm.set_C(C, C) svm.set_epsilon(epsilon) svm.set_tube_epsilon(tube_epsilon) for i in range(3): data_train=random.rand(num_feats, num_vec) data_test=random.rand(num_feats, num_vec) feats_train=RealFeatures(data_train) feats_test=RealFeatures(data_test) labels=Labels(random.rand(num_vec).round()*2-1) svm.set_kernel(LinearKernel(size_cache, scale)) svm.set_labels(labels) kernel=svm.get_kernel() print("kernel cache size: %s" % (kernel.get_cache_size())) kernel.init(feats_test, feats_test) svm.train() kernel.init(feats_train, feats_test) print(svm.apply().get_labels()) #kernel.remove_lhs_and_rhs() #import pdb #pdb.set_trace()
# The views and conclusions contained in the software and documentation are those # of the authors and should not be interpreted as representing official policies, # either expressed or implied, of the Shogun Development Team. import argparse import logging from contextlib import contextmanager, closing from modshogun import (LibSVMFile, GaussianKernel, MulticlassLibSVM, SerializableHdf5File, LinearKernel) from utils import get_features_and_labels, track_execution LOGGER = logging.getLogger(__file__) KERNELS = { 'linear': lambda feats, width: LinearKernel(feats, feats), 'gaussian': lambda feats, width: GaussianKernel(feats, feats, width), } def parse_arguments(): parser = argparse.ArgumentParser(description="Train a multiclass SVM \ stored in libsvm format") parser.add_argument('--dataset', required=True, type=str, help='Path to training dataset in LibSVM format.') parser.add_argument('--capacity', default=1.0, type=float, help='SVM capacity parameter') parser.add_argument('--width', default=2.1, type=float, help='Width of the Gaussian Kernel to approximate') parser.add_argument('--epsilon', default=0.01, type=float, help='SVMOcas epsilon parameter') parser.add_argument('--kernel', type=str, default='linear',