def classifier_domainadaptationsvm_modular(fm_train_dna=traindna,fm_test_dna=testdna, \
                                                label_train_dna=label_traindna, \
                                               label_test_dna=label_testdna,fm_train_dna2=traindna2,fm_test_dna2=testdna2, \
                                               label_train_dna2=label_traindna2,label_test_dna2=label_testdna2,C=1,degree=3):

    feats_train = StringCharFeatures(fm_train_dna, DNA)
    feats_test = StringCharFeatures(fm_test_dna, DNA)
    kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree)
    labels = BinaryLabels(label_train_dna)
    svm = SVMLight(C, kernel, labels)
    svm.train()
    #svm.io.set_loglevel(MSG_DEBUG)

    #####################################

    #print("obtaining DA SVM from previously trained SVM")

    feats_train2 = StringCharFeatures(fm_train_dna, DNA)
    feats_test2 = StringCharFeatures(fm_test_dna, DNA)
    kernel2 = WeightedDegreeStringKernel(feats_train, feats_train, degree)
    labels2 = BinaryLabels(label_train_dna)

    # we regularize against the previously obtained solution
    dasvm = DomainAdaptationSVM(C, kernel2, labels2, svm, 1.0)
    dasvm.train()

    out = dasvm.apply_binary(feats_test2)

    return out  #,dasvm TODO
def classifier_svmlight_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,C=1.2,epsilon=1e-5,num_threads=1):
	from shogun.Features import StringCharFeatures, Labels, DNA
	from shogun.Kernel import WeightedDegreeStringKernel
	try:
		from shogun.Classifier import SVMLight
	except ImportError:
		print 'No support for SVMLight available.'
		return

	feats_train=StringCharFeatures(DNA)
	feats_train.set_features(fm_train_dna)
	feats_test=StringCharFeatures(DNA)
	feats_test.set_features(fm_test_dna)
	degree=20

	kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree)

	labels=Labels(label_train_dna)

	svm=SVMLight(C, kernel, labels)
	svm.set_epsilon(epsilon)
	svm.parallel.set_num_threads(num_threads)
	svm.train()

	kernel.init(feats_train, feats_test)
	svm.apply().get_labels()
	return kernel
def kernel_weighted_degree_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,degree=20):
	from shogun.Features import StringCharFeatures, DNA
	from shogun.Kernel import WeightedDegreeStringKernel, MSG_DEBUG

	feats_train=StringCharFeatures(fm_train_dna, DNA)
	#feats_train.io.set_loglevel(MSG_DEBUG)
	feats_test=StringCharFeatures(fm_test_dna, DNA)
	
	kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree)

	from numpy import arange,double
	weights=arange(1,degree+1,dtype=double)[::-1]/ \
		sum(arange(1,degree+1,dtype=double))
	kernel.set_wd_weights(weights)
	#from numpy import ones,float64,int32
	#kernel.set_position_weights(ones(len(fm_train_dna[0]), dtype=float64))

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()

    #this is how to serializate the kernel
	#import pickle
	#pickle.dump(kernel, file('kernel_obj.dump','w'), protocol=2)
	#k=pickle.load(file('kernel_obj.dump','r'))


	return km_train, km_test, kernel
Esempio n. 4
0
def classifier_svmlight_linear_term_modular(fm_train_dna=traindna,fm_test_dna=testdna, \
                                                label_train_dna=label_traindna,degree=3, \
                                                C=10,epsilon=1e-5,num_threads=1):

    from shogun.Features import StringCharFeatures, Labels, DNA
    from shogun.Kernel import WeightedDegreeStringKernel
    from shogun.Classifier import SVMLight

    feats_train = StringCharFeatures(DNA)
    feats_train.set_features(fm_train_dna)
    feats_test = StringCharFeatures(DNA)
    feats_test.set_features(fm_test_dna)

    kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree)

    labels = Labels(label_train_dna)

    svm = SVMLight(C, kernel, labels)
    svm.set_qpsize(3)
    svm.set_linear_term(
        -numpy.array([1, 2, 3, 4, 5, 6, 7, 8, 7, 6], dtype=numpy.double))
    svm.set_epsilon(epsilon)
    svm.parallel.set_num_threads(num_threads)
    svm.train()

    kernel.init(feats_train, feats_test)
    out = svm.apply().get_labels()
    return out, kernel
def create_empty_promoter_kernel(param):
    """
    creates an uninitialized promoter kernel
   
    @param param:
    """

    # centered WDK/WDK-shift
    if param["shifts"] == 0:
        kernel_center = WeightedDegreeStringKernel(param["degree"])
    else:
        kernel_center = WeightedDegreePositionStringKernel(10, param["degree"])
        shifts_vector = numpy.ones(param["center_offset"] * 2,
                                   dtype=numpy.int32) * param["shifts"]
        kernel_center.set_shifts(shifts_vector)

    kernel_center.set_cache_size(param["kernel_cache"] / 3)

    # border spetrum kernels
    size = param["kernel_cache"] / 3
    use_sign = False
    kernel_left = WeightedCommWordStringKernel(size, use_sign)
    kernel_right = WeightedCommWordStringKernel(size, use_sign)

    # assemble combined kernel
    kernel = CombinedKernel()
    kernel.append_kernel(kernel_center)
    kernel.append_kernel(kernel_left)
    kernel.append_kernel(kernel_right)

    return kernel
Esempio n. 6
0
class svm_splice_model(object):
    def __init__(self, order, traindat, alphas, b, (window_left, offset,
                                                    window_right), consensus):

        f = StringCharFeatures(traindat, DNA)
        wd_kernel = WeightedDegreeStringKernel(f, f, int(order))
        wd_kernel.io.set_target_to_stdout()

        self.svm = LibSVM()
        self.svm.set_kernel(wd_kernel)
        self.svm.set_alphas(alphas)
        self.svm.set_support_vectors(
            numpy.arange(len(alphas), dtype=numpy.int32))
        self.svm.set_bias(b)
        self.svm.io.set_target_to_stdout()
        self.svm.parallel.set_num_threads(self.svm.parallel.get_num_cpus())
        self.svm.set_linadd_enabled(True)
        self.svm.set_batch_computation_enabled(True)

        self.window_left = int(window_left)
        self.window_right = int(window_right)

        self.consensus = consensus
        self.wd_kernel = wd_kernel
        self.traindat = f
        self.offset = offset
Esempio n. 7
0
    def train(self, data, labels):
        """
        model training 
        """

        # centered WDK/WDK-shift
        if self.param["shifts"] == 0:
            kernel_center = WeightedDegreeStringKernel(self.param["degree"])
        else:
            kernel_center = WeightedDegreePositionStringKernel(
                10, self.param["degree"])
            shifts_vector = numpy.ones(
                self.param["center_offset"] * 2,
                dtype=numpy.int32) * self.param["shifts"]
            kernel_center.set_shifts(shifts_vector)

        kernel_center.set_cache_size(self.param["kernel_cache"] / 3)

        # border spetrum kernels
        size = self.param["kernel_cache"] / 3
        use_sign = False
        kernel_left = WeightedCommWordStringKernel(size, use_sign)
        kernel_right = WeightedCommWordStringKernel(size, use_sign)

        # assemble combined kernel
        kernel = CombinedKernel()
        kernel.append_kernel(kernel_center)
        kernel.append_kernel(kernel_left)
        kernel.append_kernel(kernel_right)

        ## building features
        feat = create_features(data, self.param["center_offset"],
                               self.param["center_pos"])

        # init combined kernel
        kernel.init(feat, feat)

        print "len(labels) = %i" % (len(labels))
        lab = BinaryLabels(numpy.double(labels))
        self.svm = SVMLight(self.param["cost"], kernel, lab)

        # show debugging output
        self.svm.io.enable_progress()
        self.svm.io.set_loglevel(MSG_DEBUG)

        # optimization settings
        num_threads = 2
        self.svm.parallel.set_num_threads(num_threads)
        self.svm.set_epsilon(10e-8)

        self.svm.train()

        return self
def create_kernel(examples, param):
    """
    kernel factory
    
    @param examples: list/array of examples
    @type examples: list
    @param param: parameter object
    @type param: Parameter
    
    @return subclass of shogun Kernel object
    @rtype: Kernel
    """

    # first create feature object of correct type
    feat = create_features(examples, param)

    kernel = None

    if param.kernel == "WeightedDegreeStringKernel":
        kernel = WeightedDegreeStringKernel(feat, feat, param.wdk_degree)
        kernel.set_cache_size(200)

    elif param.kernel == "LinearKernel":
        kernel = LinearKernel(feat, feat)

    elif param.kernel == "PolyKernel":
        kernel = PolyKernel(feat, feat, 1, False)

    elif param.kernel == "GaussianKernel":
        kernel = GaussianKernel(feat, feat, param.sigma)

    elif param.kernel == "WeightedDegreeRBFKernel":
        size_cache = 200
        nof_properties = 20
        sigma = param.base_similarity
        kernel = WeightedDegreeRBFKernel(feat, feat, sigma, param.wdk_degree,
                                         nof_properties, size_cache)

    elif param.kernel == "Promoter":
        kernel = create_promoter_kernel(examples, param.flags)

    else:
        raise Exception, "Unknown kernel type."

    if hasattr(param, "flags") and param.flags.has_key("cache_size"):
        kernel.set_cache_size(param.flags["cache_size"])

    if param.flags.has_key("debug"):
        kernel.io.set_loglevel(shogun.Kernel.MSG_DEBUG)

    return kernel
def create_empty_kernel(param):
    """
    kernel factory
    
    @param param: parameter object
    @type param: Parameter
    
    @return subclass of shogun Kernel object
    @rtype: Kernel
    """

    kernel = None

    if param.kernel == "WeightedDegreeStringKernel":
        kernel = WeightedDegreeStringKernel(param.wdk_degree)

    elif param.kernel == "LinearKernel":
        kernel = LinearKernel()

    elif param.kernel == "PolyKernel":
        kernel = PolyKernel(10, 1, False)

    elif param.kernel == "GaussianKernel":
        kernel = GaussianKernel(10, param.sigma)

    elif param.kernel == "WeightedDegreeRBFKernel":
        size_cache = 50
        nof_properties = 5  #20
        sigma = param.transform
        kernel = WeightedDegreeRBFKernel(size_cache, sigma, param.wdk_degree,
                                         nof_properties)

    else:

        raise Exception, "Unknown kernel type:" + param.kernel

    if hasattr(param, "flags") and param.flags.has_key("cache_size"):
        kernel.set_cache_size(param.flags["cache_size"])

    if param.flags.has_key("debug"):
        kernel.io.set_loglevel(shogun.Kernel.MSG_DEBUG)

    return kernel
Esempio n. 10
0
def classifier_svmlight_batch_linadd_modular(fm_train_dna, fm_test_dna,
                                             label_train_dna, degree, C,
                                             epsilon, num_threads):

    from shogun.Features import StringCharFeatures, Labels, DNA
    from shogun.Kernel import WeightedDegreeStringKernel, MSG_DEBUG
    try:
        from shogun.Classifier import SVMLight
    except ImportError:
        print 'No support for SVMLight available.'
        return

    feats_train = StringCharFeatures(DNA)
    #feats_train.io.set_loglevel(MSG_DEBUG)
    feats_train.set_features(fm_train_dna)
    feats_test = StringCharFeatures(DNA)
    feats_test.set_features(fm_test_dna)
    degree = 20

    kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree)

    labels = Labels(label_train_dna)

    svm = SVMLight(C, kernel, labels)
    svm.set_epsilon(epsilon)
    svm.parallel.set_num_threads(num_threads)
    svm.train()

    kernel.init(feats_train, feats_test)

    #print 'SVMLight Objective: %f num_sv: %d' % \
    #	(svm.get_objective(), svm.get_num_support_vectors())
    svm.set_batch_computation_enabled(False)
    svm.set_linadd_enabled(False)
    svm.apply().get_labels()

    svm.set_batch_computation_enabled(True)
    labels = svm.apply().get_labels()
    return labels, svm
labels[19] = 1

feat = StringCharFeatures(DNA)
feat.set_features(examples)
lab = Labels(numpy.array(labels))

N = subset_size

##################################################################
# internal modification
##################################################################

task_vector = [0] * (N / 2)
task_vector.extend([1] * (N / 2))

base_wdk = WeightedDegreeStringKernel(feat, feat, 1)

normalizer = MultitaskKernelNormalizer(task_vector)

#wdk.set_task_vector(task_vector) #, task_vector)

for i in xrange(2):
    for j in xrange(2):

        if i == j:
            normalizer.set_task_similarity(i, j, 4.0)
        else:
            normalizer.set_task_similarity(i, j, 1.0)

base_wdk.set_normalizer(normalizer)
Esempio n. 12
0
def solver_mtk_shogun(C, all_xt, all_lt, task_indicator, M, L, eps,
                      target_obj):
    """
    implementation using multitask kernel
    """

    xt = numpy.array(all_xt)
    lt = numpy.array(all_lt)
    tt = numpy.array(task_indicator, dtype=numpy.int32)
    tsm = numpy.array(M)

    print "task_sim:", tsm

    num_tasks = L.shape[0]

    # sanity checks
    assert len(xt) == len(lt) == len(tt)
    assert M.shape == L.shape
    assert num_tasks == len(set(tt))

    # set up shogun objects
    if type(xt[0]) == numpy.string_:
        feat = StringCharFeatures(DNA)
        xt = [str(a) for a in xt]
        feat.set_features(xt)
        base_kernel = WeightedDegreeStringKernel(feat, feat, 8)
    else:
        feat = RealFeatures(xt.T)
        base_kernel = LinearKernel(feat, feat)

    lab = Labels(lt)

    # set up normalizer
    normalizer = MultitaskKernelNormalizer(tt.tolist())

    for i in xrange(num_tasks):
        for j in xrange(num_tasks):
            normalizer.set_task_similarity(i, j, M[i, j])

    print "num of unique tasks: ", normalizer.get_num_unique_tasks(
        task_indicator)

    # set up kernel
    base_kernel.set_cache_size(2000)
    base_kernel.set_normalizer(normalizer)
    base_kernel.init_normalizer()

    # set up svm
    svm = SVMLight()  #LibSVM()

    svm.set_epsilon(eps)
    #print "reducing num threads to one"
    #svm.parallel.set_num_threads(1)
    #print "using one thread"

    # how often do we like to compute objective etc
    svm.set_record_interval(0)
    svm.set_target_objective(target_obj)

    svm.set_linadd_enabled(False)
    svm.set_batch_computation_enabled(False)
    svm.io.set_loglevel(MSG_DEBUG)
    #SET THREADS TO 1

    svm.set_C(C, C)
    svm.set_bias_enabled(False)

    # prepare for training
    svm.set_labels(lab)
    svm.set_kernel(base_kernel)

    # train svm
    svm.train()

    train_times = svm.get_training_times()
    objectives = [-obj for obj in svm.get_dual_objectives()]

    if False:

        # get model parameters
        sv_idx = svm.get_support_vectors()
        sparse_alphas = svm.get_alphas()

        assert len(sv_idx) == len(sparse_alphas)

        # compute dense alpha (remove label)
        alphas = numpy.zeros(len(xt))
        for id_sparse, id_dense in enumerate(sv_idx):
            alphas[id_dense] = sparse_alphas[id_sparse] * lt[id_dense]

        # print alphas
        W = alphas_to_w(alphas, xt, lt, task_indicator, M)
        primal_obj = compute_primal_objective(
            W.reshape(W.shape[0] * W.shape[1]), C, all_xt, all_lt,
            task_indicator, L)
        objectives.append(primal_obj)
        train_times.append(train_times[-1] + 100)

    return objectives, train_times
Esempio n. 13
0
#############################################
#    compute pre-svm
#############################################

examples_presvm = [i.example for i in d[0:subset_size]]
labels_presvm = [i.label for i in d[0:subset_size]]

labels_presvm[2] = 1
labels_presvm[12] = 1
labels_presvm[15] = 1
labels_presvm[8] = 1
labels_presvm[19] = 1

feat_presvm = StringCharFeatures(DNA)
feat_presvm.set_features(examples_presvm)
wdk_presvm = WeightedDegreeStringKernel(feat_presvm, feat_presvm, 1)
lab_presvm = Labels(numpy.array(labels_presvm))

presvm = SVMLight(1, wdk_presvm, lab_presvm)
presvm.train()

presvm2 = LibSVM(1, wdk_presvm, lab_presvm)
presvm2.train()

print "svmlight", presvm.get_objective()
print "libsvm", presvm2.get_objective()

assert (abs(presvm.get_objective() - presvm2.get_objective()) <= 0.001)

print "simple svm", presvm.get_objective()