def svm_light ():
	print 'SVMLight'

	from shogun.Features import StringCharFeatures, Labels, DNA
	from shogun.Kernel import WeightedDegreeStringKernel
	try:
		from shogun.Classifier import SVMLight
	except ImportError:
		print 'No support for SVMLight available.'
		return

	feats_train=StringCharFeatures(DNA)
	feats_train.set_features(fm_train_dna)
	feats_test=StringCharFeatures(DNA)
	feats_test.set_features(fm_test_dna)
	degree=20

	kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree)

	C=1.2
	epsilon=1e-5
	num_threads=1
	labels=Labels(label_train_dna)

	svm=SVMLight(C, kernel, labels)
	svm.set_epsilon(epsilon)
	svm.parallel.set_num_threads(num_threads)
	svm.train()

	kernel.init(feats_train, feats_test)
	svm.classify().get_labels()
def classifier_svmlight_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,C=1.2,epsilon=1e-5,num_threads=1):
	from shogun.Features import StringCharFeatures, Labels, DNA
	from shogun.Kernel import WeightedDegreeStringKernel
	try:
		from shogun.Classifier import SVMLight
	except ImportError:
		print 'No support for SVMLight available.'
		return

	feats_train=StringCharFeatures(DNA)
	feats_train.set_features(fm_train_dna)
	feats_test=StringCharFeatures(DNA)
	feats_test.set_features(fm_test_dna)
	degree=20

	kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree)

	labels=Labels(label_train_dna)

	svm=SVMLight(C, kernel, labels)
	svm.set_epsilon(epsilon)
	svm.parallel.set_num_threads(num_threads)
	svm.train()

	kernel.init(feats_train, feats_test)
	svm.apply().get_labels()
	return kernel
def classifier_svmlight_linear_term_modular(fm_train_dna=traindna,fm_test_dna=testdna, \
                                                label_train_dna=label_traindna,degree=3, \
                                                C=10,epsilon=1e-5,num_threads=1):
    
    from shogun.Features import StringCharFeatures, BinaryLabels, DNA
    from shogun.Kernel import WeightedDegreeStringKernel
    from shogun.Classifier import SVMLight
    
    feats_train=StringCharFeatures(DNA)
    feats_train.set_features(fm_train_dna)
    feats_test=StringCharFeatures(DNA)
    feats_test.set_features(fm_test_dna)
    
    kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree)
    
    labels=BinaryLabels(label_train_dna)
    
    svm=SVMLight(C, kernel, labels)
    svm.set_qpsize(3)
    svm.set_linear_term(-numpy.array([1,2,3,4,5,6,7,8,7,6], dtype=numpy.double));
    svm.set_epsilon(epsilon)
    svm.parallel.set_num_threads(num_threads)
    svm.train()
    
    kernel.init(feats_train, feats_test)
    out = svm.apply().get_labels()
    return out,kernel
Esempio n. 4
0
def create_kernel(examples, param):
    """
    kernel factory
    
    @param examples: list/array of examples
    @type examples: list
    @param param: parameter object
    @type param: Parameter
    
    @return subclass of shogun Kernel object
    @rtype: Kernel
    """


    # first create feature object of correct type
    feat = create_features(examples, param)
    
    
    kernel = None
    

    if param.kernel == "WeightedDegreeStringKernel":        
        kernel = WeightedDegreeStringKernel(feat, feat, param.wdk_degree)
        kernel.set_cache_size(200)
        
        
    elif param.kernel == "LinearKernel":
        kernel = LinearKernel(feat, feat)
        
    
    elif param.kernel == "PolyKernel":
        kernel = PolyKernel(feat, feat, 1, False)        
        
        
    elif param.kernel == "GaussianKernel":
        kernel = GaussianKernel(feat, feat, param.sigma)
    
    
    elif param.kernel == "WeightedDegreeRBFKernel":
        size_cache = 200
        nof_properties = 20
        sigma = param.base_similarity
        kernel = WeightedDegreeRBFKernel(feat, feat, sigma, param.wdk_degree, nof_properties, size_cache)

    elif param.kernel == "Promoter":
        kernel = create_promoter_kernel(examples, param.flags)

    
    else:
        raise Exception, "Unknown kernel type."
    
    
    if hasattr(param, "flags") and param.flags.has_key("cache_size"):
        kernel.set_cache_size(param.flags["cache_size"])
    
    if param.flags.has_key("debug"):    
        kernel.io.set_loglevel(shogun.Kernel.MSG_DEBUG)
    
    return kernel
def kernel_weighted_degree_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,degree=20):
	from shogun.Features import StringCharFeatures, DNA
	from shogun.Kernel import WeightedDegreeStringKernel, MSG_DEBUG

	feats_train=StringCharFeatures(fm_train_dna, DNA)
	#feats_train.io.set_loglevel(MSG_DEBUG)
	feats_test=StringCharFeatures(fm_test_dna, DNA)
	
	kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree)

	from numpy import arange,double
	weights=arange(1,degree+1,dtype=double)[::-1]/ \
		sum(arange(1,degree+1,dtype=double))
	kernel.set_wd_weights(weights)
	#from numpy import ones,float64,int32
	#kernel.set_position_weights(ones(len(fm_train_dna[0]), dtype=float64))

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()

    #this is how to serializate the kernel
	#import pickle
	#pickle.dump(kernel, file('kernel_obj.dump','w'), protocol=2)
	#k=pickle.load(file('kernel_obj.dump','r'))


	return km_train, km_test, kernel
def weighted_degree_string ():
	print 'WeightedDegreeString'
	from shogun.Features import StringCharFeatures, DNA
	from shogun.Kernel import WeightedDegreeStringKernel

	feats_train=StringCharFeatures(fm_train_dna, DNA)
	feats_test=StringCharFeatures(fm_test_dna, DNA)
	degree=20

	kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree)

	#weights=arange(1,degree+1,dtype=double)[::-1]/ \
	#	sum(arange(1,degree+1,dtype=double))
	#kernel.set_wd_weights(weights)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
Esempio n. 7
0
def create_empty_promoter_kernel(param):
    """
    creates an uninitialized promoter kernel
   
    @param param:
    """


    # centered WDK/WDK-shift
    if param["shifts"] == 0:
        kernel_center = WeightedDegreeStringKernel(param["degree"])
    else:
        kernel_center = WeightedDegreePositionStringKernel(10, param["degree"])
        shifts_vector = numpy.ones(param["center_offset"]*2, dtype=numpy.int32)*param["shifts"]
        kernel_center.set_shifts(shifts_vector)

    kernel_center.set_cache_size(param["kernel_cache"]/3)

    # border spetrum kernels
    size = param["kernel_cache"]/3
    use_sign = False
    kernel_left = WeightedCommWordStringKernel(size, use_sign)
    kernel_right = WeightedCommWordStringKernel(size, use_sign)

    # assemble combined kernel
    kernel = CombinedKernel()
    kernel.append_kernel(kernel_center)
    kernel.append_kernel(kernel_left)
    kernel.append_kernel(kernel_right)


    return kernel
def do_batch_linadd ():
	print 'SVMlight batch'

	from shogun.Features import StringCharFeatures, Labels, DNA
	from shogun.Kernel import WeightedDegreeStringKernel
	try:
		from shogun.Classifier import SVMLight
	except ImportError:
		print 'No support for SVMLight available.'
		return

	feats_train=StringCharFeatures(DNA)
	feats_train.set_features(fm_train_dna)
	feats_test=StringCharFeatures(DNA)
	feats_test.set_features(fm_test_dna)
	degree=20

	kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree)

	C=1
	epsilon=1e-5
	num_threads=2
	labels=Labels(label_train_dna)

	svm=SVMLight(C, kernel, labels)
	svm.set_epsilon(epsilon)
	svm.parallel.set_num_threads(num_threads)
	svm.train()

	kernel.init(feats_train, feats_test)

	#print 'SVMLight Objective: %f num_sv: %d' % \
	#	(svm.get_objective(), svm.get_num_support_vectors())
	svm.set_batch_computation_enabled(False)
	svm.set_linadd_enabled(False)
	svm.classify().get_labels()

	svm.set_batch_computation_enabled(True)
	svm.classify().get_labels()
def classifier_svmlight_batch_linadd_modular(fm_train_dna, fm_test_dna,
		label_train_dna, degree, C, epsilon, num_threads):

	from shogun.Features import StringCharFeatures, BinaryLabels, DNA
	from shogun.Kernel import WeightedDegreeStringKernel, MSG_DEBUG
	try:
		from shogun.Classifier import SVMLight
	except ImportError:
		print('No support for SVMLight available.')
		return

	feats_train=StringCharFeatures(DNA)
	#feats_train.io.set_loglevel(MSG_DEBUG)
	feats_train.set_features(fm_train_dna)
	feats_test=StringCharFeatures(DNA)
	feats_test.set_features(fm_test_dna)
	degree=20

	kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree)

	labels=BinaryLabels(label_train_dna)

	svm=SVMLight(C, kernel, labels)
	svm.set_epsilon(epsilon)
	svm.parallel.set_num_threads(num_threads)
	svm.train()

	kernel.init(feats_train, feats_test)

	#print('SVMLight Objective: %f num_sv: %d' % \)
	#	(svm.get_objective(), svm.get_num_support_vectors())
	svm.set_batch_computation_enabled(False)
	svm.set_linadd_enabled(False)
	svm.apply().get_labels()

	svm.set_batch_computation_enabled(True)
	labels = svm.apply().get_labels()
	return labels, svm
Esempio n. 10
0
def create_empty_kernel(param):
    """
    kernel factory
    
    @param param: parameter object
    @type param: Parameter
    
    @return subclass of shogun Kernel object
    @rtype: Kernel
    """

    
    kernel = None
    

    if param.kernel == "WeightedDegreeStringKernel":        
        kernel = WeightedDegreeStringKernel(param.wdk_degree)
        
        
    elif param.kernel == "LinearKernel":
        kernel = LinearKernel()
        
    
    elif param.kernel == "PolyKernel":
        kernel = PolyKernel(10, 1, False)        
        
        
    elif param.kernel == "GaussianKernel":
        kernel = GaussianKernel(10, param.sigma)
    
    elif param.kernel == "WeightedDegreeRBFKernel":
        size_cache = 50
        nof_properties = 5 #20
        sigma = param.transform
        kernel = WeightedDegreeRBFKernel(size_cache, sigma, param.wdk_degree, nof_properties)
     
    
    else:
        
        raise Exception, "Unknown kernel type:" + param.kernel
    
    if hasattr(param, "flags") and param.flags.has_key("cache_size"):
        kernel.set_cache_size(param.flags["cache_size"])

    if param.flags.has_key("debug"):    
        kernel.io.set_loglevel(shogun.Kernel.MSG_DEBUG)
    
    return kernel
Esempio n. 11
0
    def train(self, data, labels):
        """
        model training 
        """

        # centered WDK/WDK-shift
        if self.param["shifts"] == 0:
            kernel_center = WeightedDegreeStringKernel(self.param["degree"])
        else:
            kernel_center = WeightedDegreePositionStringKernel(10, self.param["degree"])
            shifts_vector = numpy.ones(self.param["center_offset"]*2, dtype=numpy.int32)*self.param["shifts"]
            kernel_center.set_shifts(shifts_vector)

        kernel_center.set_cache_size(self.param["kernel_cache"]/3)

        # border spetrum kernels
        size = self.param["kernel_cache"]/3
        use_sign = False
        kernel_left = WeightedCommWordStringKernel(size, use_sign)
        kernel_right = WeightedCommWordStringKernel(size, use_sign)
        
        # assemble combined kernel
        kernel = CombinedKernel()
        kernel.append_kernel(kernel_center)
        kernel.append_kernel(kernel_left)
        kernel.append_kernel(kernel_right)

        ## building features 
        feat = create_features(data, self.param["center_offset"], self.param["center_pos"])
        
        # init combined kernel
        kernel.init(feat, feat)

        print "len(labels) = %i" % (len(labels))
        lab = BinaryLabels(numpy.double(labels))
        self.svm = SVMLight(self.param["cost"], kernel, lab)

        # show debugging output
        self.svm.io.enable_progress()
        self.svm.io.set_loglevel(MSG_DEBUG)

        # optimization settings
        num_threads = 2
        self.svm.parallel.set_num_threads(num_threads)
        self.svm.set_epsilon(10e-8)

        self.svm.train()

        return self
Esempio n. 12
0
class svm_splice_model(object):
    def __init__(self, order, traindat, alphas, b, (window_left, offset,
                                                    window_right), consensus):

        f = StringCharFeatures(traindat, DNA)
        wd_kernel = WeightedDegreeStringKernel(f, f, int(order))
        wd_kernel.io.set_target_to_stderr()

        self.svm = SVM(wd_kernel, alphas,
                       numpy.arange(len(alphas), dtype=numpy.int32), b)
        self.svm.io.set_target_to_stderr()
        self.svm.parallel.set_num_threads(self.svm.parallel.get_num_cpus())
        self.svm.set_linadd_enabled(False)
        self.svm.set_batch_computation_enabled(False)

        self.window_left = int(window_left)
        self.window_right = int(window_right)

        self.consensus = consensus
        self.wd_kernel = wd_kernel
        self.traindat = f
        self.offset = offset
feat = StringCharFeatures(DNA)
feat.set_features(examples)
lab = Labels(numpy.array(labels))

N = subset_size



##################################################################
# internal modification
##################################################################

task_vector = [0]*(N/2)
task_vector.extend([1]*(N/2))

base_wdk = WeightedDegreeStringKernel(feat, feat, 1)


normalizer = MultitaskKernelNormalizer(task_vector)

#wdk.set_task_vector(task_vector) #, task_vector)

for i in xrange(2):
    for j in xrange(2):

        if i==j:
            normalizer.set_task_similarity(i,j, 4.0)
        else:
            normalizer.set_task_similarity(i,j, 1.0)

		      'CGACGGCCGGGGGGGCGTA']
label_test_dna=numpy.array(5*[-1.0] + 5*[1.0])


print 'SVMLight'

from shogun.Features import StringCharFeatures, Labels, DNA
from shogun.Kernel import WeightedDegreeStringKernel
from shogun.Classifier import SVMLight

feats_train=StringCharFeatures(DNA)
feats_train.set_features(fm_train_dna)
feats_test=StringCharFeatures(DNA)
feats_test.set_features(fm_test_dna)

kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree)

C=10
epsilon=1e-5
num_threads=1
labels=Labels(label_train_dna)

svm=SVMLight(C, kernel, labels)
svm.set_qpsize(3)
svm.set_linear_term(-numpy.array([1,2,3,4,5,6,7,8,7,6], dtype=numpy.double));
svm.set_epsilon(epsilon)
svm.parallel.set_num_threads(num_threads)
svm.train()

kernel.init(feats_train, feats_test)
out = svm.classify().get_labels()
labels[19] = 1

feat = StringCharFeatures(DNA)
feat.set_features(examples)
lab = Labels(numpy.array(labels))

N = subset_size

##################################################################
# internal modification
##################################################################

task_vector = [0] * (N / 2)
task_vector.extend([1] * (N / 2))

base_wdk = WeightedDegreeStringKernel(feat, feat, 1)

normalizer = MultitaskKernelNormalizer(task_vector)

#wdk.set_task_vector(task_vector) #, task_vector)

for i in xrange(2):
    for j in xrange(2):

        if i == j:
            normalizer.set_task_similarity(i, j, 4.0)
        else:
            normalizer.set_task_similarity(i, j, 1.0)

base_wdk.set_normalizer(normalizer)
d = dat["thaliana"]
subset_size = 2000

examples = [i.example for i in d[0:subset_size]]
labels = [i.label for i in d[0:subset_size]]

labels[2] = 1
labels[12] = 1
labels[15] = 1
labels[8] = 1
labels[19] = 1


feat = StringCharFeatures(DNA)
feat.set_features(examples)
wdk = WeightedDegreeStringKernel(feat, feat, 1)
lab = Labels(numpy.array(labels))


svm = SVMLight(1, wdk, lab)
svm.train()

svm.set_shrinking_enabled(False)

print "simple svm", svm.get_objective()

print "len(examples)", len(examples)

print "##############"

Esempio n. 17
0
def solver_mtk_shogun(C, all_xt, all_lt, task_indicator, M, L, eps,
                      target_obj):
    """
    implementation using multitask kernel
    """

    xt = numpy.array(all_xt)
    lt = numpy.array(all_lt)
    tt = numpy.array(task_indicator, dtype=numpy.int32)
    tsm = numpy.array(M)

    print "task_sim:", tsm

    num_tasks = L.shape[0]

    # sanity checks
    assert len(xt) == len(lt) == len(tt)
    assert M.shape == L.shape
    assert num_tasks == len(set(tt))

    # set up shogun objects
    if type(xt[0]) == numpy.string_:
        feat = StringCharFeatures(DNA)
        xt = [str(a) for a in xt]
        feat.set_features(xt)
        base_kernel = WeightedDegreeStringKernel(feat, feat, 8)
    else:
        feat = RealFeatures(xt.T)
        base_kernel = LinearKernel(feat, feat)

    lab = Labels(lt)

    # set up normalizer
    normalizer = MultitaskKernelNormalizer(tt.tolist())

    for i in xrange(num_tasks):
        for j in xrange(num_tasks):
            normalizer.set_task_similarity(i, j, M[i, j])

    print "num of unique tasks: ", normalizer.get_num_unique_tasks(
        task_indicator)

    # set up kernel
    base_kernel.set_cache_size(2000)
    base_kernel.set_normalizer(normalizer)
    base_kernel.init_normalizer()

    # set up svm
    svm = SVMLight()  #LibSVM()

    svm.set_epsilon(eps)
    #print "reducing num threads to one"
    #svm.parallel.set_num_threads(1)
    #print "using one thread"

    # how often do we like to compute objective etc
    svm.set_record_interval(0)
    svm.set_target_objective(target_obj)

    svm.set_linadd_enabled(False)
    svm.set_batch_computation_enabled(False)
    svm.io.set_loglevel(MSG_DEBUG)
    #SET THREADS TO 1

    svm.set_C(C, C)
    svm.set_bias_enabled(False)

    # prepare for training
    svm.set_labels(lab)
    svm.set_kernel(base_kernel)

    # train svm
    svm.train()

    train_times = svm.get_training_times()
    objectives = [-obj for obj in svm.get_dual_objectives()]

    if False:

        # get model parameters
        sv_idx = svm.get_support_vectors()
        sparse_alphas = svm.get_alphas()

        assert len(sv_idx) == len(sparse_alphas)

        # compute dense alpha (remove label)
        alphas = numpy.zeros(len(xt))
        for id_sparse, id_dense in enumerate(sv_idx):
            alphas[id_dense] = sparse_alphas[id_sparse] * lt[id_dense]

        # print alphas
        W = alphas_to_w(alphas, xt, lt, task_indicator, M)
        primal_obj = compute_primal_objective(
            W.reshape(W.shape[0] * W.shape[1]), C, all_xt, all_lt,
            task_indicator, L)
        objectives.append(primal_obj)
        train_times.append(train_times[-1] + 100)

    return objectives, train_times
print "len(examples)", len(examples)
print "string length", len(examples[0])

labels[2] = 1
labels[12] = 1
labels[15] = 1
labels[8] = 1
labels[19] = 1

feat = StringCharFeatures(DNA)
feat.set_features(examples)

helper.save("/tmp/feat", feat)
feat2 = helper.load("/tmp/feat")

wdk = WeightedDegreeStringKernel(feat, feat, 1)

print "PY: saving kernel"
wdk.io.set_loglevel(MSG_DEBUG)
helper.save("/tmp/awesome", wdk)
#print wdk.toString()
#print "PY: kernel saved, loading kernel"
wdk2 = helper.load("/tmp/awesome")
print "PY: kernel loaded"

#wdk2 = WeightedDegreeStringKernel(feat2, feat2, 1)

lab = Labels(numpy.array(labels))

svm = SVMLight(1, wdk2, lab)
#print "saving SVM"
Esempio n. 19
0
#############################################
#    compute pre-svm
#############################################

examples_presvm = [i.example for i in d[0:subset_size]]
labels_presvm = [i.label for i in d[0:subset_size]]

labels_presvm[2] = 1
labels_presvm[12] = 1
labels_presvm[15] = 1
labels_presvm[8] = 1
labels_presvm[19] = 1

feat_presvm = StringCharFeatures(DNA)
feat_presvm.set_features(examples_presvm)
wdk_presvm = WeightedDegreeStringKernel(feat_presvm, feat_presvm, 1)
lab_presvm = Labels(numpy.array(labels_presvm))

presvm = SVMLight(1, wdk_presvm, lab_presvm)
presvm.train()

presvm2 = LibSVM(1, wdk_presvm, lab_presvm)
presvm2.train()

print "svmlight", presvm.get_objective()
print "libsvm", presvm2.get_objective()

assert (abs(presvm.get_objective() - presvm2.get_objective()) <= 0.001)

print "simple svm", presvm.get_objective()