def svm_train_classify(trimat_tr, seqids_tr, labels_tr, fullmat_te, options, icv):

	sys.stderr.write('..kernel building..\n')

	kernel=CustomKernel()
	kernel.set_triangle_kernel_matrix_from_triangle(trimat_tr)

	sys.stderr.write('..svm learning..\n')
	svm = svm_learn(kernel, labels_tr, options.svmC, options.epsilon, options.weight)

	global g_svm_bias
	g_svm_bias = svm.get_bias()

	if (options.alphaprefix != "") and (len(seqids_tr) > 0):
		save_cv_taining_result(svm, options, seqids_tr, icv)

	sys.stderr.write('..svm classifying..\n')
	kernel.set_full_kernel_matrix_from_full(fullmat_te)

	###################################################
	#for testing
	#alphas = svm.get_alphas()
	#svids  = svm.get_support_vectors()

	#for j in xrange(len(preds)):
	#	p = svm.get_bias()
	#	for i in xrange(len(alphas)):
	#		p += (alphas[i]*fullmat_te[int(svids[i]),j])

	#	print preds[j], p

	#sys.exit(0)
	###################################################

	return svm.classify().get_labels().tolist()
Beispiel #2
0
def runSVM(options, args):
    """
	set global variable
	"""
    if (options.ktype == 1 or options.ktype == 5
            or options.ktype == 6) and (options.kmerlen <= 8):
        global g_kmers
        global g_rcmap

        g_kmers = generate_kmers(options.kmerlen)
        g_rcmap = generate_rcmap_table(options.kmerlen, g_kmers)

    print 'Read genome sequence.\n'
    genome = preprocessGenome(args[0], options.subs)

    print 'Get sliding window.\n'
    seqs, sids = sliding_window(genome, options.window, options.step)
    print 'Get features and kernel.\n'

    if options.ktype == 1:
        get_features = get_spectrum_features
        get_kernel = get_spectrum_kernel
    elif options.ktype == 2:
        get_features = get_weighted_spectrum_features
        get_kernel = get_weighted_spectrum_kernel
    elif options.ktype == 3:
        get_features = get_char_features
        get_kernel = get_wd_kernel
    elif options.ktype == 5:
        get_features = get_char_features
        get_kernel = get_gaussian_kernel
    elif options.ktype == 6:
        get_features = get_char_features
        get_kernel = get_linear_kernel

    if options.ktype == 4:
        print 'This is custom kernel.\n'
        npos = len(sids)
        nneg = 0
        fullmat = get_full_matrix(options.matrixFile, npos, nneg)
        kernel = CustomKernel()
        kernel.set_full_kernel_matrix_from_full(fullmat)
    else:
        feats = get_features(seqs, options)
        kernel = get_kernel(feats, options)

    print '\nSVM training.\n'
    svm = svm_learn(kernel, options)

    processSVMOutput(svm, sids, options)
def kernel_combined_custom_poly_modular(fm_train_real=traindat,
                                        fm_test_real=testdat,
                                        fm_label_twoclass=label_traindat):
    from shogun.Features import CombinedFeatures, RealFeatures, Labels
    from shogun.Kernel import CombinedKernel, PolyKernel, CustomKernel
    from shogun.Classifier import LibSVM

    kernel = CombinedKernel()
    feats_train = CombinedFeatures()

    tfeats = RealFeatures(fm_train_real)
    tkernel = PolyKernel(10, 3)
    tkernel.init(tfeats, tfeats)
    K = tkernel.get_kernel_matrix()
    kernel.append_kernel(CustomKernel(K))

    subkfeats_train = RealFeatures(fm_train_real)
    feats_train.append_feature_obj(subkfeats_train)
    subkernel = PolyKernel(10, 2)
    kernel.append_kernel(subkernel)

    kernel.init(feats_train, feats_train)

    labels = Labels(fm_label_twoclass)
    svm = LibSVM(1.0, kernel, labels)
    svm.train()

    kernel = CombinedKernel()
    feats_pred = CombinedFeatures()

    pfeats = RealFeatures(fm_test_real)
    tkernel = PolyKernel(10, 3)
    tkernel.init(tfeats, pfeats)
    K = tkernel.get_kernel_matrix()
    kernel.append_kernel(CustomKernel(K))

    subkfeats_test = RealFeatures(fm_test_real)
    feats_pred.append_feature_obj(subkfeats_test)
    subkernel = PolyKernel(10, 2)
    kernel.append_kernel(subkernel)
    kernel.init(feats_train, feats_pred)

    svm.set_kernel(kernel)
    svm.classify()
    km_train = kernel.get_kernel_matrix()
    return km_train, kernel
def classifier_custom_kernel_modular (C=1,dim=7):
	from shogun.Features import RealFeatures, BinaryLabels
	from shogun.Kernel import CustomKernel
	from shogun.Classifier import LibSVM

	from numpy import diag,ones,sign
	from numpy.random import rand,seed

	seed((C,dim))

	lab=sign(2*rand(dim) - 1)
	data=rand(dim, dim)
	symdata=data*data.T + diag(ones(dim))
    
	kernel=CustomKernel()
	kernel.set_full_kernel_matrix_from_full(data)
	labels=BinaryLabels(lab)
	svm=LibSVM(C, kernel, labels)
	svm.train()
	predictions =svm.apply() 
	out=svm.apply().get_labels()
	return svm,out
Beispiel #5
0
def classifier_custom_kernel_modular(C=1, dim=7):
    from shogun.Features import RealFeatures, Labels
    from shogun.Kernel import CustomKernel
    from shogun.Classifier import LibSVM

    from numpy import diag, ones, sign
    from numpy.random import rand, seed

    seed((C, dim))

    lab = sign(2 * rand(dim) - 1)
    data = rand(dim, dim)
    symdata = data * data.T + diag(ones(dim))

    kernel = CustomKernel()
    kernel.set_full_kernel_matrix_from_full(data)
    labels = Labels(lab)
    svm = LibSVM(C, kernel, labels)
    svm.train()
    predictions = svm.apply()
    out = svm.apply().get_labels()
    return svm, out
def custom ():
	print 'Custom'
	from numpy.random import rand
	from numpy import array
	from shogun.Features import RealFeatures
	from shogun.Kernel import CustomKernel

	dim=7
	data=rand(dim, dim)
	feats=RealFeatures(data)
	symdata=data+data.T
	lowertriangle=array([symdata[(x,y)] for x in xrange(symdata.shape[1])
		for y in xrange(symdata.shape[0]) if y<=x])

	kernel=CustomKernel()

	kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle)
	km_triangletriangle=kernel.get_kernel_matrix()

	kernel.set_triangle_kernel_matrix_from_full(symdata)
	km_fulltriangle=kernel.get_kernel_matrix()

	kernel.set_full_kernel_matrix_from_full(data)
	km_fullfull=kernel.get_kernel_matrix()
Beispiel #7
0
def svm_train_classify(trimat_tr, seqids_tr, labels_tr, fullmat_te, options,
                       icv):

    sys.stderr.write('..kernel building..\n')

    kernel = CustomKernel()
    kernel.set_triangle_kernel_matrix_from_triangle(trimat_tr)

    sys.stderr.write('..svm learning..\n')
    svm = svm_learn(kernel, labels_tr, options.svmC, options.epsilon,
                    options.weight)

    global g_svm_bias
    g_svm_bias = svm.get_bias()

    if (options.alphaprefix != "") and (len(seqids_tr) > 0):
        save_cv_taining_result(svm, options, seqids_tr, icv)

    sys.stderr.write('..svm classifying..\n')
    kernel.set_full_kernel_matrix_from_full(fullmat_te)

    ###################################################
    #for testing
    #alphas = svm.get_alphas()
    #svids  = svm.get_support_vectors()

    #for j in xrange(len(preds)):
    #	p = svm.get_bias()
    #	for i in xrange(len(alphas)):
    #		p += (alphas[i]*fullmat_te[int(svids[i]),j])

    #	print preds[j], p

    #sys.exit(0)
    ###################################################

    return svm.classify().get_labels().tolist()
def mkl_binclass_modular(fm_train_real=traindat,
                         fm_test_real=testdat,
                         fm_label_twoclass=label_traindat):

    ##################################
    # set up and train

    # create some poly train/test matrix
    tfeats = RealFeatures(fm_train_real)
    tkernel = PolyKernel(10, 3)
    tkernel.init(tfeats, tfeats)
    K_train = tkernel.get_kernel_matrix()

    pfeats = RealFeatures(fm_test_real)
    tkernel.init(tfeats, pfeats)
    K_test = tkernel.get_kernel_matrix()

    # create combined train features
    feats_train = CombinedFeatures()
    feats_train.append_feature_obj(RealFeatures(fm_train_real))

    # and corresponding combined kernel
    kernel = CombinedKernel()
    kernel.append_kernel(CustomKernel(K_train))
    kernel.append_kernel(PolyKernel(10, 2))
    kernel.init(feats_train, feats_train)

    # train mkl
    labels = BinaryLabels(fm_label_twoclass)
    mkl = MKLClassification()

    # which norm to use for MKL
    mkl.set_mkl_norm(1)  #2,3

    # set cost (neg, pos)
    mkl.set_C(1, 1)

    # set kernel and labels
    mkl.set_kernel(kernel)
    mkl.set_labels(labels)

    # train
    mkl.train()
    #w=kernel.get_subkernel_weights()
    #kernel.set_subkernel_weights(w)

    ##################################
    # test

    # create combined test features
    feats_pred = CombinedFeatures()
    feats_pred.append_feature_obj(RealFeatures(fm_test_real))

    # and corresponding combined kernel
    kernel = CombinedKernel()
    kernel.append_kernel(CustomKernel(K_test))
    kernel.append_kernel(PolyKernel(10, 2))
    kernel.init(feats_train, feats_pred)

    # and classify
    mkl.set_kernel(kernel)
    mkl.apply()
    return mkl.apply(), kernel
km = wdk.get_kernel_matrix()

for i in xrange(N):
    for j in xrange(N):
        km[i,j] = km[i,j]*relate_tasks(i,j)
        #km = km*1.0

print km
#precompute kernel matrix using shogun
y = numpy.array(labels)
K = numpy.transpose(y.flatten() * (km*y.flatten()).transpose())
f = -numpy.ones(N)
C = 1.0

# Important!! QP does not accept ndarray as a type, it must be an array
p = QP(K, f, Aeq=y, beq=0, lb=numpy.zeros(N), ub=C*numpy.ones(N))
r = p.solve('cvxopt_qp', iprint = 0)

#print "cvxopt objective:", r.ff
print "externally modified kernel. objective:", r.ff

ck = CustomKernel()
ck.set_full_kernel_matrix_from_full(km)
#
svm = LibSVM(1, ck, lab)
svm.train()

print "externally modified kernel. objective:", svm.get_objective()


##################################################################

km = wdk.get_kernel_matrix()

for i in xrange(N):
    for j in xrange(N):
        km[i, j] = km[i, j] * relate_tasks(i, j)
        #km = km*1.0

print km
#precompute kernel matrix using shogun
y = numpy.array(labels)
K = numpy.transpose(y.flatten() * (km * y.flatten()).transpose())
f = -numpy.ones(N)
C = 1.0

# Important!! QP does not accept ndarray as a type, it must be an array
p = QP(K, f, Aeq=y, beq=0, lb=numpy.zeros(N), ub=C * numpy.ones(N))
r = p.solve('cvxopt_qp', iprint=0)

#print "cvxopt objective:", r.ff
print "externally modified kernel. objective:", r.ff

ck = CustomKernel()
ck.set_full_kernel_matrix_from_full(km)
#
svm = LibSVM(1, ck, lab)
svm.train()

print "externally modified kernel. objective:", svm.get_objective()
Beispiel #11
0
def statistics_quadratic_time_mmd (m,dim,difference):
	from shogun.Features import RealFeatures
	from shogun.Features import MeanShiftDataGenerator
	from shogun.Kernel import GaussianKernel, CustomKernel
	from shogun.Statistics import QuadraticTimeMMD
	from shogun.Statistics import BOOTSTRAP, MMD2_SPECTRUM, MMD2_GAMMA, BIASED, UNBIASED
	from shogun.Mathematics import Statistics, IntVector, RealVector, Math
	
	# init seed for reproducability
	Math.init_random(1)

	# number of examples kept low in order to make things fast

	# streaming data generator for mean shift distributions
	gen_p=MeanShiftDataGenerator(0, dim);
	gen_q=MeanShiftDataGenerator(difference, dim);

	# stream some data from generator
	feat_p=gen_p.get_streamed_features(m);
	feat_q=gen_q.get_streamed_features(m);

	# set kernel a-priori. usually one would do some kernel selection. See
	# other examples for this.
	width=10;
	kernel=GaussianKernel(10, width);

	# create quadratic time mmd instance. Note that this constructor
	# copies p and q and does not reference them
	mmd=QuadraticTimeMMD(kernel, feat_p, feat_q);

	# perform test: compute p-value and test if null-hypothesis is rejected for
	# a test level of 0.05
	alpha=0.05;
	
	# using bootstrapping (slow, not the most reliable way. Consider pre-
	# computing the kernel when using it, see below).
	# Also, in practice, use at least 250 iterations
	mmd.set_null_approximation_method(BOOTSTRAP);
	mmd.set_bootstrap_iterations(3);
	p_value_boot=mmd.perform_test();
	# reject if p-value is smaller than test level
	#print "bootstrap: p!=q: ", p_value_boot<alpha

	# using spectrum method. Use at least 250 samples from null.
	# This is consistent but sometimes breaks, always monitor type I error.
	# See tutorial for number of eigenvalues to use .
	# Only works with BIASED statistic
	mmd.set_statistic_type(BIASED);
	mmd.set_null_approximation_method(MMD2_SPECTRUM);
	mmd.set_num_eigenvalues_spectrum(3);
	mmd.set_num_samples_sepctrum(250);
	p_value_spectrum=mmd.perform_test();
	# reject if p-value is smaller than test level
	#print "spectrum: p!=q: ", p_value_spectrum<alpha

	# using gamma method. This is a quick hack, which works most of the time
	# but is NOT guaranteed to. See tutorial for details.
	# Only works with BIASED statistic
	mmd.set_statistic_type(BIASED);
	mmd.set_null_approximation_method(MMD2_GAMMA);
	p_value_gamma=mmd.perform_test();
	# reject if p-value is smaller than test level
	#print "gamma: p!=q: ", p_value_gamma<alpha

	# compute tpye I and II error (use many more trials in practice).
	# Type I error is not necessary if one uses bootstrapping. We do it here
	# anyway, but note that this is an efficient way of computing it.
	# Also note that testing has to happen on
	# difference data than kernel selection, but the linear time mmd does this
	# implicitly and we used a fixed kernel here.
	mmd.set_null_approximation_method(BOOTSTRAP);
	mmd.set_bootstrap_iterations(5);
	num_trials=5;
	type_I_errors=RealVector(num_trials);
	type_II_errors=RealVector(num_trials);
	inds=int32(array([x for x in range(2*m)])) # numpy
	p_and_q=mmd.get_p_and_q();

	# use a precomputed kernel to be faster
	kernel.init(p_and_q, p_and_q);
	precomputed=CustomKernel(kernel);
	mmd.set_kernel(precomputed);
	for i in range(num_trials):
		# this effectively means that p=q - rejecting is tpye I error
		inds=random.permutation(inds) # numpy permutation
		precomputed.add_row_subset(inds);
		precomputed.add_col_subset(inds);
		type_I_errors[i]=mmd.perform_test()>alpha;
		precomputed.remove_row_subset();
		precomputed.remove_col_subset();

		# on normal data, this gives type II error
		type_II_errors[i]=mmd.perform_test()>alpha;
		
	return type_I_errors.get(),type_I_errors.get(),p_value_boot,p_value_spectrum,p_value_gamma, 
Beispiel #12
0
def kernel_custom_modular(dim=7):
    from numpy.random import rand, seed
    from numpy import array, float32
    from shogun.Features import RealFeatures
    from shogun.Kernel import CustomKernel

    seed(17)
    data = rand(dim, dim)
    feats = RealFeatures(data)
    symdata = data + data.T
    lowertriangle = array([
        symdata[(x, y)] for x in range(symdata.shape[1])
        for y in range(symdata.shape[0]) if y <= x
    ])

    kernel = CustomKernel()

    # once with float64's
    kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle)
    km_triangletriangle = kernel.get_kernel_matrix()

    kernel.set_triangle_kernel_matrix_from_full(symdata)
    km_fulltriangle = kernel.get_kernel_matrix()

    kernel.set_full_kernel_matrix_from_full(symdata)
    km_fullfull = kernel.get_kernel_matrix()

    # now once with float32's
    data = array(data, dtype=float32)

    kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle)
    km_triangletriangle = kernel.get_kernel_matrix()

    kernel.set_triangle_kernel_matrix_from_full(symdata)
    km_fulltriangle = kernel.get_kernel_matrix()

    kernel.set_full_kernel_matrix_from_full(symdata)
    km_fullfull = kernel.get_kernel_matrix()
    return km_fullfull, kernel
def kernel_custom_modular (dim=7):
	from numpy.random import rand, seed
	from numpy import array, float32
	from shogun.Features import RealFeatures
	from shogun.Kernel import CustomKernel

	seed(17)
	data=rand(dim, dim)
	feats=RealFeatures(data)
	symdata=data+data.T
	lowertriangle=array([symdata[(x,y)] for x in range(symdata.shape[1])
		for y in range(symdata.shape[0]) if y<=x])

	kernel=CustomKernel()

	# once with float64's
	kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle)
	km_triangletriangle=kernel.get_kernel_matrix()

	kernel.set_triangle_kernel_matrix_from_full(symdata)
	km_fulltriangle=kernel.get_kernel_matrix()

	kernel.set_full_kernel_matrix_from_full(symdata)
	km_fullfull=kernel.get_kernel_matrix()

	# now once with float32's
	data=array(data,dtype=float32)

	kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle)
	km_triangletriangle=kernel.get_kernel_matrix()

	kernel.set_triangle_kernel_matrix_from_full(symdata)
	km_fulltriangle=kernel.get_kernel_matrix()

	kernel.set_full_kernel_matrix_from_full(symdata)
	km_fullfull=kernel.get_kernel_matrix()
	return km_fullfull,kernel
Beispiel #14
0
    def _train(self, train_data, param):
        """
        training procedure using training examples and labels
        
        @param train_data: Data relevant to SVM training
        @type train_data: dict<str, list<instances> >
        @param param: Parameters for the training procedure
        @type param: ParameterSvm
        """
        
                
        # merge data sets
        data = PreparedMultitaskData(train_data, shuffle=False)

                
        # create shogun data objects
        base_wdk = shogun_factory.create_kernel(data.examples, param)
        kernel_matrix = base_wdk.get_kernel_matrix()
        lab = shogun_factory.create_labels(data.labels)
        

        # fetch taxonomy from parameter object
        taxonomy = param.taxonomy.data

        # create name to leaf map
        nodes = taxonomy.get_all_nodes()


        ########################################################
        print "creating a kernel for each node:"
        ########################################################


        # assemble combined kernel
        from shogun.Kernel import CombinedKernel, CustomKernel
        
        combined_kernel = CombinedKernel()
        
        # indicator to which task each example belongs
        task_vector = data.task_vector_names
        
        for node in nodes:
            
            print "creating kernel for ", node.name
            
            # fetch sub-tree
            leaf_names = [leaf.name for leaf in node.get_leaves()]
            
            print "masking all entries other than:", leaf_names
            
            # init matrix
            kernel_matrix_node = numpy.zeros(kernel_matrix.shape)
            
            # fill matrix for node
            for (i, task_lhs) in enumerate(task_vector):
                for (j, task_rhs) in enumerate(task_vector):
                    
                    # only copy values, if both tasks are present in subtree
                    if task_lhs in leaf_names and task_rhs in leaf_names:
                        kernel_matrix_node[i,j] = kernel_matrix[i,j]
                    
            # create custom kernel
            kernel_node = CustomKernel()
            kernel_node.set_full_kernel_matrix_from_full(kernel_matrix_node)
            
            
            # append custom kernel to CombinedKernel
            combined_kernel.append_kernel(kernel_node)                
            
            print "------"
        

        print "subkernel weights:", combined_kernel.get_subkernel_weights()

        svm = None
                
        
        print "using MKL:", (param.transform >= 1.0)
        
        if param.transform >= 1.0:
        
        
            num_threads = 4

            
            svm = MKLClassification()
            
            svm.set_mkl_norm(param.transform)
            svm.set_solver_type(ST_GLPK) #DIRECT) #NEWTON)#ST_CPLEX)
        
            svm.set_C(param.cost, param.cost)
            
            svm.set_kernel(combined_kernel)
            svm.set_labels(lab)
            
            svm.parallel.set_num_threads(num_threads)
            #svm.set_linadd_enabled(False)
            #svm.set_batch_computation_enabled(False)
            
            svm.train()
        
            print "subkernel weights (after):", combined_kernel.get_subkernel_weights()    
            
        else:
            
            # create SVM (disable unsupported optimizations)
            svm = SVMLight(param.cost, combined_kernel, lab)
            svm.set_linadd_enabled(False)
            svm.set_batch_computation_enabled(False)
            
            svm.train()


        ########################################################
        print "svm objective:"
        print svm.get_objective()
        ########################################################
        
        
        # wrap up predictors
        svms = {}
            
        # use a reference to the same svm several times
        for task_id in train_data.keys():
            svms[task_id] = svm


        return svms
from numpy import *
from numpy.random import rand
from shogun.Features import RealFeatures, Labels
from shogun.Kernel import CustomKernel
from shogun.Classifier import LibSVM

C=1
dim=7

lab=sign(2*rand(dim) - 1)
data=rand(dim, dim)
symdata=data*data.T

kernel=CustomKernel()
kernel.set_full_kernel_matrix_from_full(data)
labels=Labels(lab)
svm=LibSVM(C, kernel, labels)
svm.train()
out=svm.classify().get_labels()