コード例 #1
0
def classifier_svmlight_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,C=1.2,epsilon=1e-5,num_threads=1):
	from shogun.Features import StringCharFeatures, Labels, DNA
	from shogun.Kernel import WeightedDegreeStringKernel
	try:
		from shogun.Classifier import SVMLight
	except ImportError:
		print 'No support for SVMLight available.'
		return

	feats_train=StringCharFeatures(DNA)
	feats_train.set_features(fm_train_dna)
	feats_test=StringCharFeatures(DNA)
	feats_test.set_features(fm_test_dna)
	degree=20

	kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree)

	labels=Labels(label_train_dna)

	svm=SVMLight(C, kernel, labels)
	svm.set_epsilon(epsilon)
	svm.parallel.set_num_threads(num_threads)
	svm.train()

	kernel.init(feats_train, feats_test)
	svm.apply().get_labels()
	return kernel
コード例 #2
0
ファイル: kmersvm_train.py プロジェクト: aasoni/igm-research
def svm_learn(kernel, labels, options):
    """train SVM using SVMLight or LibSVM

	Arguments:
	kernel -- kernel object from Shogun toolbox
	lebels -- list of labels
	options -- object containing option data 

	Return:
	trained svm object 
	"""

    try:
        svm = SVMLight(options.svmC, kernel,
                       Labels(numpy.array(labels, dtype=numpy.double)))
    except NameError:
        svm = LibSVM(options.svmC, kernel,
                     Labels(numpy.array(labels, dtype=numpy.double)))

    if options.quiet == False:
        svm.io.set_loglevel(MSG_INFO)
        svm.io.set_target_to_stderr()

    svm.set_epsilon(options.epsilon)
    svm.parallel.set_num_threads(1)
    if options.weight != 1.0:
        svm.set_C(options.svmC, options.svmC * options.weight)
    svm.train()

    if options.quiet == False:
        svm.io.set_loglevel(MSG_ERROR)

    return svm
コード例 #3
0
def classifier_svmlight_linear_term_modular(fm_train_dna=traindna,fm_test_dna=testdna, \
                                                label_train_dna=label_traindna,degree=3, \
                                                C=10,epsilon=1e-5,num_threads=1):

    from shogun.Features import StringCharFeatures, Labels, DNA
    from shogun.Kernel import WeightedDegreeStringKernel
    from shogun.Classifier import SVMLight

    feats_train = StringCharFeatures(DNA)
    feats_train.set_features(fm_train_dna)
    feats_test = StringCharFeatures(DNA)
    feats_test.set_features(fm_test_dna)

    kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree)

    labels = Labels(label_train_dna)

    svm = SVMLight(C, kernel, labels)
    svm.set_qpsize(3)
    svm.set_linear_term(
        -numpy.array([1, 2, 3, 4, 5, 6, 7, 8, 7, 6], dtype=numpy.double))
    svm.set_epsilon(epsilon)
    svm.parallel.set_num_threads(num_threads)
    svm.train()

    kernel.init(feats_train, feats_test)
    out = svm.apply().get_labels()
    return out, kernel
コード例 #4
0
def svm_light ():
	print 'SVMLight'

	from shogun.Features import StringCharFeatures, Labels, DNA
	from shogun.Kernel import WeightedDegreeStringKernel
	try:
		from shogun.Classifier import SVMLight
	except ImportError:
		print 'No support for SVMLight available.'
		return

	feats_train=StringCharFeatures(DNA)
	feats_train.set_features(fm_train_dna)
	feats_test=StringCharFeatures(DNA)
	feats_test.set_features(fm_test_dna)
	degree=20

	kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree)

	C=1.2
	epsilon=1e-5
	num_threads=1
	labels=Labels(label_train_dna)

	svm=SVMLight(C, kernel, labels)
	svm.set_epsilon(epsilon)
	svm.parallel.set_num_threads(num_threads)
	svm.train()

	kernel.init(feats_train, feats_test)
	svm.classify().get_labels()
コード例 #5
0
def classifier_svmlight_linear_term_modular(fm_train_dna=traindna,fm_test_dna=testdna, \
                                                label_train_dna=label_traindna,degree=3, \
                                                C=10,epsilon=1e-5,num_threads=1):
    
    from shogun.Features import StringCharFeatures, BinaryLabels, DNA
    from shogun.Kernel import WeightedDegreeStringKernel
    from shogun.Classifier import SVMLight
    
    feats_train=StringCharFeatures(DNA)
    feats_train.set_features(fm_train_dna)
    feats_test=StringCharFeatures(DNA)
    feats_test.set_features(fm_test_dna)
    
    kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree)
    
    labels=BinaryLabels(label_train_dna)
    
    svm=SVMLight(C, kernel, labels)
    svm.set_qpsize(3)
    svm.set_linear_term(-numpy.array([1,2,3,4,5,6,7,8,7,6], dtype=numpy.double));
    svm.set_epsilon(epsilon)
    svm.parallel.set_num_threads(num_threads)
    svm.train()
    
    kernel.init(feats_train, feats_test)
    out = svm.apply().get_labels()
    return out,kernel
コード例 #6
0
def svm_learn(kernel, labels, options):
	"""train SVM using SVMLight or LibSVM

	Arguments:
	kernel -- kernel object from Shogun toolbox
	lebels -- list of labels
	options -- object containing option data 

	Return:
	trained svm object 
	"""

	try: 
		svm=SVMLight(options.svmC, kernel, Labels(numpy.array(labels, dtype=numpy.double)))
	except NameError:
		svm=LibSVM(options.svmC, kernel, Labels(numpy.array(labels, dtype=numpy.double)))

	if options.quiet == False:
		svm.io.set_loglevel(MSG_INFO)
		svm.io.set_target_to_stderr()

	svm.set_epsilon(options.epsilon)
	svm.parallel.set_num_threads(1)
	if options.weight != 1.0:
		svm.set_C(options.svmC, options.svmC*options.weight)
	svm.train()

	if options.quiet == False:
		svm.io.set_loglevel(MSG_ERROR)

	return svm
コード例 #7
0
ファイル: model.py プロジェクト: kuod/genomeutils
class ShogunPredictor(object):
    """
    basic single-task promoter model using string kernels
    """

    def __init__(self, degree=4, shifts=32, kernel_cache=10000, cost=1.0):
        #TODO: clean up degree
        self.degree = degree
        self.degree_wdk = degree
        self.degree_spectrum = degree
        self.shifts = shifts
        self.kernel_cache = kernel_cache
        self.cost = cost
        self.center_offset = 50
        self.center_pos = 1200
        self.epsilon = 10e-2
        self.num_threads = 4


    def train(self, data, labels):

        kernel = create_promoter_kernel(data, self.center_offset, self.center_pos, self.degree_wdk, self.degree_spectrum, self.shifts, kernel_cache=self.kernel_cache)

        print "len(labels) = %i" % (len(labels))
        lab = create_labels(labels)
        self.svm = SVMLight(self.cost, kernel, lab)

        # show debugging output
        self.svm.io.enable_progress()
        self.svm.io.set_loglevel(MSG_DEBUG)

        # optimization settings
        num_threads = self.num_threads
        self.svm.parallel.set_num_threads(num_threads)
        self.svm.set_epsilon(self.epsilon)

        self.svm.train()

        return self


    def predict(self, data):

        feat = create_promoter_features(data, self.center_offset, self.center_pos)
        out = self.svm.apply(feat).get_values()

        return out
コード例 #8
0
def svm_learn(kernel, labels, svmC, epsilon, weight):
	"""
	"""
	try: 
		svm=SVMLight(svmC, kernel, Labels(numpy.array(labels, dtype=numpy.double)))
	except NameError:
		print 'No support for SVMLight available.'
		return

	svm.io.set_loglevel(MSG_INFO)
	svm.io.set_target_to_stderr()

	svm.set_epsilon(epsilon)
	svm.parallel.set_num_threads(1)
	if weight != 1.0:
		svm.set_C(svmC, svmC*weight)
	svm.train()
	svm.io.set_loglevel(MSG_ERROR)

	return svm
コード例 #9
0
ファイル: cksvmcv2.py プロジェクト: aasoni/igm-research
def svm_learn(kernel, labels, svmC, epsilon, weight):
    """
	"""
    try:
        svm = SVMLight(svmC, kernel,
                       Labels(numpy.array(labels, dtype=numpy.double)))
    except NameError:
        print 'No support for SVMLight available.'
        return

    svm.io.set_loglevel(MSG_INFO)
    svm.io.set_target_to_stderr()

    svm.set_epsilon(epsilon)
    svm.parallel.set_num_threads(1)
    if weight != 1.0:
        svm.set_C(svmC, svmC * weight)
    svm.train()
    svm.io.set_loglevel(MSG_ERROR)

    return svm
コード例 #10
0
def classifier_svmlight_batch_linadd_modular(fm_train_dna, fm_test_dna,
                                             label_train_dna, degree, C,
                                             epsilon, num_threads):

    from shogun.Features import StringCharFeatures, Labels, DNA
    from shogun.Kernel import WeightedDegreeStringKernel, MSG_DEBUG
    try:
        from shogun.Classifier import SVMLight
    except ImportError:
        print 'No support for SVMLight available.'
        return

    feats_train = StringCharFeatures(DNA)
    #feats_train.io.set_loglevel(MSG_DEBUG)
    feats_train.set_features(fm_train_dna)
    feats_test = StringCharFeatures(DNA)
    feats_test.set_features(fm_test_dna)
    degree = 20

    kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree)

    labels = Labels(label_train_dna)

    svm = SVMLight(C, kernel, labels)
    svm.set_epsilon(epsilon)
    svm.parallel.set_num_threads(num_threads)
    svm.train()

    kernel.init(feats_train, feats_test)

    #print 'SVMLight Objective: %f num_sv: %d' % \
    #	(svm.get_objective(), svm.get_num_support_vectors())
    svm.set_batch_computation_enabled(False)
    svm.set_linadd_enabled(False)
    svm.apply().get_labels()

    svm.set_batch_computation_enabled(True)
    labels = svm.apply().get_labels()
    return labels, svm
def do_batch_linadd ():
	print 'SVMlight batch'

	from shogun.Features import StringCharFeatures, Labels, DNA
	from shogun.Kernel import WeightedDegreeStringKernel
	try:
		from shogun.Classifier import SVMLight
	except ImportError:
		print 'No support for SVMLight available.'
		return

	feats_train=StringCharFeatures(DNA)
	feats_train.set_features(fm_train_dna)
	feats_test=StringCharFeatures(DNA)
	feats_test.set_features(fm_test_dna)
	degree=20

	kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree)

	C=1
	epsilon=1e-5
	num_threads=2
	labels=Labels(label_train_dna)

	svm=SVMLight(C, kernel, labels)
	svm.set_epsilon(epsilon)
	svm.parallel.set_num_threads(num_threads)
	svm.train()

	kernel.init(feats_train, feats_test)

	#print 'SVMLight Objective: %f num_sv: %d' % \
	#	(svm.get_objective(), svm.get_num_support_vectors())
	svm.set_batch_computation_enabled(False)
	svm.set_linadd_enabled(False)
	svm.classify().get_labels()

	svm.set_batch_computation_enabled(True)
	svm.classify().get_labels()
コード例 #12
0
def classifier_svmlight_batch_linadd_modular(fm_train_dna, fm_test_dna,
		label_train_dna, degree, C, epsilon, num_threads):

	from shogun.Features import StringCharFeatures, BinaryLabels, DNA
	from shogun.Kernel import WeightedDegreeStringKernel, MSG_DEBUG
	try:
		from shogun.Classifier import SVMLight
	except ImportError:
		print('No support for SVMLight available.')
		return

	feats_train=StringCharFeatures(DNA)
	#feats_train.io.set_loglevel(MSG_DEBUG)
	feats_train.set_features(fm_train_dna)
	feats_test=StringCharFeatures(DNA)
	feats_test.set_features(fm_test_dna)
	degree=20

	kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree)

	labels=BinaryLabels(label_train_dna)

	svm=SVMLight(C, kernel, labels)
	svm.set_epsilon(epsilon)
	svm.parallel.set_num_threads(num_threads)
	svm.train()

	kernel.init(feats_train, feats_test)

	#print('SVMLight Objective: %f num_sv: %d' % \)
	#	(svm.get_objective(), svm.get_num_support_vectors())
	svm.set_batch_computation_enabled(False)
	svm.set_linadd_enabled(False)
	svm.apply().get_labels()

	svm.set_batch_computation_enabled(True)
	labels = svm.apply().get_labels()
	return labels, svm
コード例 #13
0

print 'SVMLight'

from shogun.Features import StringCharFeatures, Labels, DNA
from shogun.Kernel import WeightedDegreeStringKernel
from shogun.Classifier import SVMLight

feats_train=StringCharFeatures(DNA)
feats_train.set_features(fm_train_dna)
feats_test=StringCharFeatures(DNA)
feats_test.set_features(fm_test_dna)

kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree)

C=10
epsilon=1e-5
num_threads=1
labels=Labels(label_train_dna)

svm=SVMLight(C, kernel, labels)
svm.set_qpsize(3)
svm.set_linear_term(-numpy.array([1,2,3,4,5,6,7,8,7,6], dtype=numpy.double));
svm.set_epsilon(epsilon)
svm.parallel.set_num_threads(num_threads)
svm.train()

kernel.init(feats_train, feats_test)
out = svm.classify().get_labels()

コード例 #14
0
def solver_mtk_shogun(C, all_xt, all_lt, task_indicator, M, L, eps,
                      target_obj):
    """
    implementation using multitask kernel
    """

    xt = numpy.array(all_xt)
    lt = numpy.array(all_lt)
    tt = numpy.array(task_indicator, dtype=numpy.int32)
    tsm = numpy.array(M)

    print "task_sim:", tsm

    num_tasks = L.shape[0]

    # sanity checks
    assert len(xt) == len(lt) == len(tt)
    assert M.shape == L.shape
    assert num_tasks == len(set(tt))

    # set up shogun objects
    if type(xt[0]) == numpy.string_:
        feat = StringCharFeatures(DNA)
        xt = [str(a) for a in xt]
        feat.set_features(xt)
        base_kernel = WeightedDegreeStringKernel(feat, feat, 8)
    else:
        feat = RealFeatures(xt.T)
        base_kernel = LinearKernel(feat, feat)

    lab = Labels(lt)

    # set up normalizer
    normalizer = MultitaskKernelNormalizer(tt.tolist())

    for i in xrange(num_tasks):
        for j in xrange(num_tasks):
            normalizer.set_task_similarity(i, j, M[i, j])

    print "num of unique tasks: ", normalizer.get_num_unique_tasks(
        task_indicator)

    # set up kernel
    base_kernel.set_cache_size(2000)
    base_kernel.set_normalizer(normalizer)
    base_kernel.init_normalizer()

    # set up svm
    svm = SVMLight()  #LibSVM()

    svm.set_epsilon(eps)
    #print "reducing num threads to one"
    #svm.parallel.set_num_threads(1)
    #print "using one thread"

    # how often do we like to compute objective etc
    svm.set_record_interval(0)
    svm.set_target_objective(target_obj)

    svm.set_linadd_enabled(False)
    svm.set_batch_computation_enabled(False)
    svm.io.set_loglevel(MSG_DEBUG)
    #SET THREADS TO 1

    svm.set_C(C, C)
    svm.set_bias_enabled(False)

    # prepare for training
    svm.set_labels(lab)
    svm.set_kernel(base_kernel)

    # train svm
    svm.train()

    train_times = svm.get_training_times()
    objectives = [-obj for obj in svm.get_dual_objectives()]

    if False:

        # get model parameters
        sv_idx = svm.get_support_vectors()
        sparse_alphas = svm.get_alphas()

        assert len(sv_idx) == len(sparse_alphas)

        # compute dense alpha (remove label)
        alphas = numpy.zeros(len(xt))
        for id_sparse, id_dense in enumerate(sv_idx):
            alphas[id_dense] = sparse_alphas[id_sparse] * lt[id_dense]

        # print alphas
        W = alphas_to_w(alphas, xt, lt, task_indicator, M)
        primal_obj = compute_primal_objective(
            W.reshape(W.shape[0] * W.shape[1]), C, all_xt, all_lt,
            task_indicator, L)
        objectives.append(primal_obj)
        train_times.append(train_times[-1] + 100)

    return objectives, train_times
コード例 #15
0
class ShogunPredictor(object):
    """
    basic promoter model using string kernels
    """
    def __init__(self, param):
        self.param = param

    def train(self, data, labels):
        """
        model training 
        """

        # centered WDK/WDK-shift
        if self.param["shifts"] == 0:
            kernel_center = WeightedDegreeStringKernel(self.param["degree"])
        else:
            kernel_center = WeightedDegreePositionStringKernel(
                10, self.param["degree"])
            shifts_vector = numpy.ones(
                self.param["center_offset"] * 2,
                dtype=numpy.int32) * self.param["shifts"]
            kernel_center.set_shifts(shifts_vector)

        kernel_center.set_cache_size(self.param["kernel_cache"] / 3)

        # border spetrum kernels
        size = self.param["kernel_cache"] / 3
        use_sign = False
        kernel_left = WeightedCommWordStringKernel(size, use_sign)
        kernel_right = WeightedCommWordStringKernel(size, use_sign)

        # assemble combined kernel
        kernel = CombinedKernel()
        kernel.append_kernel(kernel_center)
        kernel.append_kernel(kernel_left)
        kernel.append_kernel(kernel_right)

        ## building features
        feat = create_features(data, self.param["center_offset"],
                               self.param["center_pos"])

        # init combined kernel
        kernel.init(feat, feat)

        print "len(labels) = %i" % (len(labels))
        lab = BinaryLabels(numpy.double(labels))
        self.svm = SVMLight(self.param["cost"], kernel, lab)

        # show debugging output
        self.svm.io.enable_progress()
        self.svm.io.set_loglevel(MSG_DEBUG)

        # optimization settings
        num_threads = 2
        self.svm.parallel.set_num_threads(num_threads)
        self.svm.set_epsilon(10e-8)

        self.svm.train()

        return self

    def predict(self, data):
        """
        model prediction 
        """

        feat = create_features(data, self.param["center_offset"],
                               self.param["center_pos"])
        out = self.svm.apply(feat).get_values()

        return out
コード例 #16
0
ファイル: promoter_kernel.py プロジェクト: kuod/genomeutils
class ShogunPredictor(object):
    """
    basic promoter model using string kernels
    """

    def __init__(self, param):
        self.param = param


    def train(self, data, labels):
        """
        model training 
        """

        # centered WDK/WDK-shift
        if self.param["shifts"] == 0:
            kernel_center = WeightedDegreeStringKernel(self.param["degree"])
        else:
            kernel_center = WeightedDegreePositionStringKernel(10, self.param["degree"])
            shifts_vector = numpy.ones(self.param["center_offset"]*2, dtype=numpy.int32)*self.param["shifts"]
            kernel_center.set_shifts(shifts_vector)

        kernel_center.set_cache_size(self.param["kernel_cache"]/3)

        # border spetrum kernels
        size = self.param["kernel_cache"]/3
        use_sign = False
        kernel_left = WeightedCommWordStringKernel(size, use_sign)
        kernel_right = WeightedCommWordStringKernel(size, use_sign)
        
        # assemble combined kernel
        kernel = CombinedKernel()
        kernel.append_kernel(kernel_center)
        kernel.append_kernel(kernel_left)
        kernel.append_kernel(kernel_right)

        ## building features 
        feat = create_features(data, self.param["center_offset"], self.param["center_pos"])
        
        # init combined kernel
        kernel.init(feat, feat)

        print "len(labels) = %i" % (len(labels))
        lab = BinaryLabels(numpy.double(labels))
        self.svm = SVMLight(self.param["cost"], kernel, lab)

        # show debugging output
        self.svm.io.enable_progress()
        self.svm.io.set_loglevel(MSG_DEBUG)

        # optimization settings
        num_threads = 2
        self.svm.parallel.set_num_threads(num_threads)
        self.svm.set_epsilon(10e-8)

        self.svm.train()

        return self


    def predict(self, data):
        """
        model prediction 
        """
        
        feat = create_features(data, self.param["center_offset"], self.param["center_pos"])
        out = self.svm.apply(feat).get_values()

        return out