def classifier_svmlight_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,C=1.2,epsilon=1e-5,num_threads=1): from shogun.Features import StringCharFeatures, Labels, DNA from shogun.Kernel import WeightedDegreeStringKernel try: from shogun.Classifier import SVMLight except ImportError: print 'No support for SVMLight available.' return feats_train=StringCharFeatures(DNA) feats_train.set_features(fm_train_dna) feats_test=StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) degree=20 kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree) labels=Labels(label_train_dna) svm=SVMLight(C, kernel, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) svm.apply().get_labels() return kernel
def classifier_svmlight_linear_term_modular(fm_train_dna=traindna,fm_test_dna=testdna, \ label_train_dna=label_traindna,degree=3, \ C=10,epsilon=1e-5,num_threads=1): from shogun.Features import StringCharFeatures, Labels, DNA from shogun.Kernel import WeightedDegreeStringKernel from shogun.Classifier import SVMLight feats_train = StringCharFeatures(DNA) feats_train.set_features(fm_train_dna) feats_test = StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels = Labels(label_train_dna) svm = SVMLight(C, kernel, labels) svm.set_qpsize(3) svm.set_linear_term( -numpy.array([1, 2, 3, 4, 5, 6, 7, 8, 7, 6], dtype=numpy.double)) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) out = svm.apply().get_labels() return out, kernel
def classifier_svmlight_linear_term_modular(fm_train_dna=traindna,fm_test_dna=testdna, \ label_train_dna=label_traindna,degree=3, \ C=10,epsilon=1e-5,num_threads=1): from shogun.Features import StringCharFeatures, BinaryLabels, DNA from shogun.Kernel import WeightedDegreeStringKernel from shogun.Classifier import SVMLight feats_train=StringCharFeatures(DNA) feats_train.set_features(fm_train_dna) feats_test=StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree) labels=BinaryLabels(label_train_dna) svm=SVMLight(C, kernel, labels) svm.set_qpsize(3) svm.set_linear_term(-numpy.array([1,2,3,4,5,6,7,8,7,6], dtype=numpy.double)); svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) out = svm.apply().get_labels() return out,kernel
def classifier_svmlight_batch_linadd_modular(fm_train_dna, fm_test_dna, label_train_dna, degree, C, epsilon, num_threads): from shogun.Features import StringCharFeatures, Labels, DNA from shogun.Kernel import WeightedDegreeStringKernel, MSG_DEBUG try: from shogun.Classifier import SVMLight except ImportError: print 'No support for SVMLight available.' return feats_train = StringCharFeatures(DNA) #feats_train.io.set_loglevel(MSG_DEBUG) feats_train.set_features(fm_train_dna) feats_test = StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) degree = 20 kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels = Labels(label_train_dna) svm = SVMLight(C, kernel, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) #print 'SVMLight Objective: %f num_sv: %d' % \ # (svm.get_objective(), svm.get_num_support_vectors()) svm.set_batch_computation_enabled(False) svm.set_linadd_enabled(False) svm.apply().get_labels() svm.set_batch_computation_enabled(True) labels = svm.apply().get_labels() return labels, svm
def classifier_svmlight_batch_linadd_modular(fm_train_dna, fm_test_dna, label_train_dna, degree, C, epsilon, num_threads): from shogun.Features import StringCharFeatures, BinaryLabels, DNA from shogun.Kernel import WeightedDegreeStringKernel, MSG_DEBUG try: from shogun.Classifier import SVMLight except ImportError: print('No support for SVMLight available.') return feats_train=StringCharFeatures(DNA) #feats_train.io.set_loglevel(MSG_DEBUG) feats_train.set_features(fm_train_dna) feats_test=StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) degree=20 kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree) labels=BinaryLabels(label_train_dna) svm=SVMLight(C, kernel, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) #print('SVMLight Objective: %f num_sv: %d' % \) # (svm.get_objective(), svm.get_num_support_vectors()) svm.set_batch_computation_enabled(False) svm.set_linadd_enabled(False) svm.apply().get_labels() svm.set_batch_computation_enabled(True) labels = svm.apply().get_labels() return labels, svm
class ShogunPredictor(object): """ basic single-task promoter model using string kernels """ def __init__(self, degree=4, shifts=32, kernel_cache=10000, cost=1.0): #TODO: clean up degree self.degree = degree self.degree_wdk = degree self.degree_spectrum = degree self.shifts = shifts self.kernel_cache = kernel_cache self.cost = cost self.center_offset = 50 self.center_pos = 1200 self.epsilon = 10e-2 self.num_threads = 4 def train(self, data, labels): kernel = create_promoter_kernel(data, self.center_offset, self.center_pos, self.degree_wdk, self.degree_spectrum, self.shifts, kernel_cache=self.kernel_cache) print "len(labels) = %i" % (len(labels)) lab = create_labels(labels) self.svm = SVMLight(self.cost, kernel, lab) # show debugging output self.svm.io.enable_progress() self.svm.io.set_loglevel(MSG_DEBUG) # optimization settings num_threads = self.num_threads self.svm.parallel.set_num_threads(num_threads) self.svm.set_epsilon(self.epsilon) self.svm.train() return self def predict(self, data): feat = create_promoter_features(data, self.center_offset, self.center_pos) out = self.svm.apply(feat).get_values() return out
def serialization_svmlight_modular(num, dist, width, C): from shogun.IO import MSG_DEBUG from shogun.Features import RealFeatures, BinaryLabels, DNA, Alphabet from shogun.Kernel import WeightedDegreeStringKernel, GaussianKernel from shogun.Classifier import SVMLight from numpy import concatenate, ones from numpy.random import randn, seed import sys import types import random import bz2 try: import cPickle as pickle except ImportError: import pickle as pickle import inspect def save(filename, myobj): """ save object to file using pickle @param filename: name of destination file @type filename: str @param myobj: object to save (has to be pickleable) @type myobj: obj """ try: f = bz2.BZ2File(filename, "wb") except IOError as details: sys.stderr.write("File " + filename + " cannot be written\n") sys.stderr.write(details) return pickle.dump(myobj, f, protocol=2) f.close() def load(filename): """ Load from filename using pickle @param filename: name of file to load from @type filename: str """ try: f = bz2.BZ2File(filename, "rb") except IOError as details: sys.stderr.write("File " + filename + " cannot be read\n") sys.stderr.write(details) return myobj = pickle.load(f) f.close() return myobj ################################################## # set up toy data and svm traindata_real = concatenate((randn(2, num) - dist, randn(2, num) + dist), axis=1) testdata_real = concatenate((randn(2, num) - dist, randn(2, num) + dist), axis=1) trainlab = concatenate((-ones(num), ones(num))) testlab = concatenate((-ones(num), ones(num))) feats_train = RealFeatures(traindata_real) feats_test = RealFeatures(testdata_real) kernel = GaussianKernel(feats_train, feats_train, width) # kernel.io.set_loglevel(MSG_DEBUG) labels = BinaryLabels(trainlab) svm = SVMLight(C, kernel, labels) svm.train() # svm.io.set_loglevel(MSG_DEBUG) ################################################## # serialize to file fn = "serialized_svm.bz2" # print("serializing SVM to file", fn) save(fn, svm) ################################################## # unserialize and sanity check # print("unserializing SVM") svm2 = load(fn) # print("comparing objectives") svm2.train() # print("objective before serialization:", svm.get_objective()) # print("objective after serialization:", svm2.get_objective()) # print("comparing predictions") out = svm.apply(feats_test).get_labels() out2 = svm2.apply(feats_test).get_labels() # assert outputs are close for i in xrange(len(out)): assert abs(out[i] - out2[i] < 0.000001) # print("all checks passed.") return True
def serialization_string_kernels_modular(n_data, num_shifts, size): """ serialize svm with string kernels """ ################################################## # set up toy data and svm train_xt, train_lt = generate_random_data(n_data) test_xt, test_lt = generate_random_data(n_data) feats_train = construct_features(train_xt) feats_test = construct_features(test_xt) max_len = len(train_xt[0]) kernel_wdk = WeightedDegreePositionStringKernel(size, 5) shifts_vector = numpy.ones(max_len, dtype=numpy.int32) * num_shifts kernel_wdk.set_shifts(shifts_vector) ######## # set up spectrum use_sign = False kernel_spec_1 = WeightedCommWordStringKernel(size, use_sign) kernel_spec_2 = WeightedCommWordStringKernel(size, use_sign) ######## # combined kernel kernel = CombinedKernel() kernel.append_kernel(kernel_wdk) kernel.append_kernel(kernel_spec_1) kernel.append_kernel(kernel_spec_2) # init kernel labels = BinaryLabels(train_lt) svm = SVMLight(1.0, kernel, labels) #svm.io.set_loglevel(MSG_DEBUG) svm.train(feats_train) ################################################## # serialize to file fn = "serialized_svm.bz2" #print("serializing SVM to file", fn) save(fn, svm) ################################################## # unserialize and sanity check #print("unserializing SVM") svm2 = load(fn) #print("comparing predictions") out = svm.apply(feats_test).get_labels() out2 = svm2.apply(feats_test).get_labels() # assert outputs are close for i in xrange(len(out)): assert abs(out[i] - out2[i] < 0.000001) #print("all checks passed.") return out, out2
def serialization_string_kernels_modular(n_data, num_shifts, size): """ serialize svm with string kernels """ ################################################## # set up toy data and svm train_xt, train_lt = generate_random_data(n_data) test_xt, test_lt = generate_random_data(n_data) feats_train = construct_features(train_xt) feats_test = construct_features(test_xt) max_len = len(train_xt[0]) kernel_wdk = WeightedDegreePositionStringKernel(size, 5) shifts_vector = numpy.ones(max_len, dtype=numpy.int32)*num_shifts kernel_wdk.set_shifts(shifts_vector) ######## # set up spectrum use_sign = False kernel_spec_1 = WeightedCommWordStringKernel(size, use_sign) kernel_spec_2 = WeightedCommWordStringKernel(size, use_sign) ######## # combined kernel kernel = CombinedKernel() kernel.append_kernel(kernel_wdk) kernel.append_kernel(kernel_spec_1) kernel.append_kernel(kernel_spec_2) # init kernel labels = BinaryLabels(train_lt); svm = SVMLight(1.0, kernel, labels) #svm.io.set_loglevel(MSG_DEBUG) svm.train(feats_train) ################################################## # serialize to file fn = "serialized_svm.bz2" #print("serializing SVM to file", fn) save(fn, svm) ################################################## # unserialize and sanity check #print("unserializing SVM") svm2 = load(fn) #print("comparing predictions") out = svm.apply(feats_test).get_labels() out2 = svm2.apply(feats_test).get_labels() # assert outputs are close for i in range(len(out)): assert abs(out[i] - out2[i] < 0.000001) #print("all checks passed.") return out,out2
def serialization_svmlight_modular(num, dist, width, C): from shogun.IO import MSG_DEBUG from shogun.Features import RealFeatures, BinaryLabels, DNA, Alphabet from shogun.Kernel import WeightedDegreeStringKernel, GaussianKernel from shogun.Classifier import SVMLight from numpy import concatenate, ones from numpy.random import randn, seed import sys import types import random import bz2 try: import cPickle as pickle except ImportError: import pickle as pickle import inspect def save(filename, myobj): """ save object to file using pickle @param filename: name of destination file @type filename: str @param myobj: object to save (has to be pickleable) @type myobj: obj """ try: f = bz2.BZ2File(filename, 'wb') except IOError as details: sys.stderr.write('File ' + filename + ' cannot be written\n') sys.stderr.write(details) return pickle.dump(myobj, f, protocol=2) f.close() def load(filename): """ Load from filename using pickle @param filename: name of file to load from @type filename: str """ try: f = bz2.BZ2File(filename, 'rb') except IOError as details: sys.stderr.write('File ' + filename + ' cannot be read\n') sys.stderr.write(details) return myobj = pickle.load(f) f.close() return myobj ################################################## # set up toy data and svm traindata_real = concatenate((randn(2, num) - dist, randn(2, num) + dist), axis=1) testdata_real = concatenate((randn(2, num) - dist, randn(2, num) + dist), axis=1) trainlab = concatenate((-ones(num), ones(num))) testlab = concatenate((-ones(num), ones(num))) feats_train = RealFeatures(traindata_real) feats_test = RealFeatures(testdata_real) kernel = GaussianKernel(feats_train, feats_train, width) #kernel.io.set_loglevel(MSG_DEBUG) labels = BinaryLabels(trainlab) svm = SVMLight(C, kernel, labels) svm.train() #svm.io.set_loglevel(MSG_DEBUG) ################################################## # serialize to file fn = "serialized_svm.bz2" print("serializing SVM to file", fn) save(fn, svm) ################################################## # unserialize and sanity check print("unserializing SVM") svm2 = load(fn) print("comparing objectives") svm2.train() print("objective before serialization:", svm.get_objective()) print("objective after serialization:", svm2.get_objective()) print("comparing predictions") out = svm.apply(feats_test).get_labels() out2 = svm2.apply(feats_test).get_labels() # assert outputs are close for i in xrange(len(out)): assert abs(out[i] - out2[i] < 0.000001) print("all checks passed.") return True
class ShogunPredictor(object): """ basic promoter model using string kernels """ def __init__(self, param): self.param = param def train(self, data, labels): """ model training """ # centered WDK/WDK-shift if self.param["shifts"] == 0: kernel_center = WeightedDegreeStringKernel(self.param["degree"]) else: kernel_center = WeightedDegreePositionStringKernel( 10, self.param["degree"]) shifts_vector = numpy.ones( self.param["center_offset"] * 2, dtype=numpy.int32) * self.param["shifts"] kernel_center.set_shifts(shifts_vector) kernel_center.set_cache_size(self.param["kernel_cache"] / 3) # border spetrum kernels size = self.param["kernel_cache"] / 3 use_sign = False kernel_left = WeightedCommWordStringKernel(size, use_sign) kernel_right = WeightedCommWordStringKernel(size, use_sign) # assemble combined kernel kernel = CombinedKernel() kernel.append_kernel(kernel_center) kernel.append_kernel(kernel_left) kernel.append_kernel(kernel_right) ## building features feat = create_features(data, self.param["center_offset"], self.param["center_pos"]) # init combined kernel kernel.init(feat, feat) print "len(labels) = %i" % (len(labels)) lab = BinaryLabels(numpy.double(labels)) self.svm = SVMLight(self.param["cost"], kernel, lab) # show debugging output self.svm.io.enable_progress() self.svm.io.set_loglevel(MSG_DEBUG) # optimization settings num_threads = 2 self.svm.parallel.set_num_threads(num_threads) self.svm.set_epsilon(10e-8) self.svm.train() return self def predict(self, data): """ model prediction """ feat = create_features(data, self.param["center_offset"], self.param["center_pos"]) out = self.svm.apply(feat).get_values() return out
class ShogunPredictor(object): """ basic promoter model using string kernels """ def __init__(self, param): self.param = param def train(self, data, labels): """ model training """ # centered WDK/WDK-shift if self.param["shifts"] == 0: kernel_center = WeightedDegreeStringKernel(self.param["degree"]) else: kernel_center = WeightedDegreePositionStringKernel(10, self.param["degree"]) shifts_vector = numpy.ones(self.param["center_offset"]*2, dtype=numpy.int32)*self.param["shifts"] kernel_center.set_shifts(shifts_vector) kernel_center.set_cache_size(self.param["kernel_cache"]/3) # border spetrum kernels size = self.param["kernel_cache"]/3 use_sign = False kernel_left = WeightedCommWordStringKernel(size, use_sign) kernel_right = WeightedCommWordStringKernel(size, use_sign) # assemble combined kernel kernel = CombinedKernel() kernel.append_kernel(kernel_center) kernel.append_kernel(kernel_left) kernel.append_kernel(kernel_right) ## building features feat = create_features(data, self.param["center_offset"], self.param["center_pos"]) # init combined kernel kernel.init(feat, feat) print "len(labels) = %i" % (len(labels)) lab = BinaryLabels(numpy.double(labels)) self.svm = SVMLight(self.param["cost"], kernel, lab) # show debugging output self.svm.io.enable_progress() self.svm.io.set_loglevel(MSG_DEBUG) # optimization settings num_threads = 2 self.svm.parallel.set_num_threads(num_threads) self.svm.set_epsilon(10e-8) self.svm.train() return self def predict(self, data): """ model prediction """ feat = create_features(data, self.param["center_offset"], self.param["center_pos"]) out = self.svm.apply(feat).get_values() return out