def classifier_svmlight_linear_term_modular (fm_train_dna=traindna,fm_test_dna=testdna, \
                                                label_train_dna=label_traindna,degree=3, \
                                                C=10,epsilon=1e-5,num_threads=1):

    from modshogun import StringCharFeatures, BinaryLabels, DNA
    from modshogun import WeightedDegreeStringKernel
    try:
    	from modshogun import SVMLight
    except ImportError:
    	print("SVMLight is not available")
    	exit(0)

    feats_train=StringCharFeatures(DNA)
    feats_train.set_features(fm_train_dna)
    feats_test=StringCharFeatures(DNA)
    feats_test.set_features(fm_test_dna)

    kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree)

    labels=BinaryLabels(label_train_dna)

    svm=SVMLight(C, kernel, labels)
    svm.set_qpsize(3)
    svm.set_linear_term(-numpy.array([1,2,3,4,5,6,7,8,7,6], dtype=numpy.double));
    svm.set_epsilon(epsilon)
    svm.parallel.set_num_threads(num_threads)
    svm.train()

    kernel.init(feats_train, feats_test)
    out = svm.apply().get_labels()
    return out,kernel
def classifier_svmlight_modular(fm_train_dna=traindat,
                                fm_test_dna=testdat,
                                label_train_dna=label_traindat,
                                C=1.2,
                                epsilon=1e-5,
                                num_threads=1):
    from modshogun import StringCharFeatures, BinaryLabels, DNA
    from modshogun import WeightedDegreeStringKernel
    try:
        from modshogun import SVMLight
    except ImportError:
        print('No support for SVMLight available.')
        return

    feats_train = StringCharFeatures(DNA)
    feats_train.set_features(fm_train_dna)
    feats_test = StringCharFeatures(DNA)
    feats_test.set_features(fm_test_dna)
    degree = 20

    kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree)

    labels = BinaryLabels(label_train_dna)

    svm = SVMLight(C, kernel, labels)
    svm.set_epsilon(epsilon)
    svm.parallel.set_num_threads(num_threads)
    svm.train()

    kernel.init(feats_train, feats_test)
    svm.apply().get_labels()
    return kernel
Esempio n. 3
0
def classifier_domainadaptationsvm_modular (fm_train_dna=traindna,fm_test_dna=testdna, \
                                                label_train_dna=label_traindna, \
                                               label_test_dna=label_testdna,fm_train_dna2=traindna2,fm_test_dna2=testdna2, \
                                               label_train_dna2=label_traindna2,label_test_dna2=label_testdna2,C=1,degree=3):




	feats_train = StringCharFeatures(fm_train_dna, DNA)
	feats_test = StringCharFeatures(fm_test_dna, DNA)
	kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree)
	labels = BinaryLabels(label_train_dna)
	svm = SVMLight(C, kernel, labels)
	svm.train()
	#svm.io.set_loglevel(MSG_DEBUG)

	#####################################

	#print("obtaining DA SVM from previously trained SVM")

	feats_train2 = StringCharFeatures(fm_train_dna, DNA)
	feats_test2 = StringCharFeatures(fm_test_dna, DNA)
	kernel2 = WeightedDegreeStringKernel(feats_train, feats_train, degree)
	labels2 = BinaryLabels(label_train_dna)

	# we regularize against the previously obtained solution
	dasvm = DomainAdaptationSVM(C, kernel2, labels2, svm, 1.0)
	dasvm.train()

	out = dasvm.apply_binary(feats_test2)

	return out #,dasvm TODO
def classifier_svmlight_linear_term_modular (fm_train_dna=traindna,fm_test_dna=testdna, \
                                                label_train_dna=label_traindna,degree=3, \
                                                C=10,epsilon=1e-5,num_threads=1):

    from modshogun import StringCharFeatures, BinaryLabels, DNA
    from modshogun import WeightedDegreeStringKernel
    try:
        from modshogun import SVMLight
    except ImportError:
        print("SVMLight is not available")
        exit(0)

    feats_train = StringCharFeatures(DNA)
    feats_train.set_features(fm_train_dna)
    feats_test = StringCharFeatures(DNA)
    feats_test.set_features(fm_test_dna)

    kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree)

    labels = BinaryLabels(label_train_dna)

    svm = SVMLight(C, kernel, labels)
    svm.set_qpsize(3)
    svm.set_linear_term(
        -numpy.array([1, 2, 3, 4, 5, 6, 7, 8, 7, 6], dtype=numpy.double))
    svm.set_epsilon(epsilon)
    svm.parallel.set_num_threads(num_threads)
    svm.train()

    kernel.init(feats_train, feats_test)
    out = svm.apply().get_labels()
    return out, kernel
def classifier_svmlight_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,C=1.2,epsilon=1e-5,num_threads=1):
	from modshogun import StringCharFeatures, BinaryLabels, DNA
	from modshogun import WeightedDegreeStringKernel
	try:
		from modshogun import SVMLight
	except ImportError:
		print('No support for SVMLight available.')
		return

	feats_train=StringCharFeatures(DNA)
	feats_train.set_features(fm_train_dna)
	feats_test=StringCharFeatures(DNA)
	feats_test.set_features(fm_test_dna)
	degree=20

	kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree)

	labels=BinaryLabels(label_train_dna)

	svm=SVMLight(C, kernel, labels)
	svm.set_epsilon(epsilon)
	svm.parallel.set_num_threads(num_threads)
	svm.train()

	kernel.init(feats_train, feats_test)
	svm.apply().get_labels()
	return kernel
def kernel_weighted_degree_string_modular(fm_train_dna=traindat,
                                          fm_test_dna=testdat,
                                          degree=20):
    from modshogun import StringCharFeatures, DNA
    from modshogun import WeightedDegreeStringKernel, MSG_DEBUG

    feats_train = StringCharFeatures(fm_train_dna, DNA)
    #feats_train.io.set_loglevel(MSG_DEBUG)
    feats_test = StringCharFeatures(fm_test_dna, DNA)

    kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree)

    from numpy import arange, double
    weights=arange(1,degree+1,dtype=double)[::-1]/ \
     sum(arange(1,degree+1,dtype=double))
    kernel.set_wd_weights(weights)
    #from numpy import ones,float64,int32
    #kernel.set_position_weights(ones(len(fm_train_dna[0]), dtype=float64))

    km_train = kernel.get_kernel_matrix()
    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()

    #this is how to serializate the kernel
    #import pickle
    #pickle.dump(kernel, file('kernel_obj.dump','w'), protocol=2)
    #k=pickle.load(file('kernel_obj.dump','r'))

    return km_train, km_test, kernel
def kernel_weighted_degree_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,degree=20):
	from modshogun import StringCharFeatures, DNA
	from modshogun import WeightedDegreeStringKernel, MSG_DEBUG

	feats_train=StringCharFeatures(fm_train_dna, DNA)
	#feats_train.io.set_loglevel(MSG_DEBUG)
	feats_test=StringCharFeatures(fm_test_dna, DNA)

	kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree)

	from numpy import arange,double
	weights=arange(1,degree+1,dtype=double)[::-1]/ \
		sum(arange(1,degree+1,dtype=double))
	kernel.set_wd_weights(weights)
	#from numpy import ones,float64,int32
	#kernel.set_position_weights(ones(len(fm_train_dna[0]), dtype=float64))

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()

    #this is how to serializate the kernel
	#import pickle
	#pickle.dump(kernel, file('tmp/kernel_obj.dump','w'), protocol=2)
	#k=pickle.load(file('tmp/kernel_obj.dump','r'))


	return km_train, km_test, kernel
Esempio n. 8
0
def embed(file='data/mml.txt'):
	strings = []
	with open(file) as f:
		strings = [s.rstrip() for s in f.readlines()]
	
	features = StringCharFeatures(strings,DNA)
	kernel = WeightedDegreeStringKernel(10)
	distance = KernelDistance(1.0,kernel)
	distance.init(features,features)
	converter = MultidimensionalScaling()
	converter.set_target_dim(2)
	return converter.embed_distance(distance).get_feature_matrix(), strings
def classifier_svmlight_batch_linadd_modular(
    fm_train_dna, fm_test_dna, label_train_dna, degree, C, epsilon, num_threads
):

    from modshogun import StringCharFeatures, BinaryLabels, DNA
    from modshogun import WeightedDegreeStringKernel, MSG_DEBUG

    try:
        from modshogun import SVMLight
    except ImportError:
        print("No support for SVMLight available.")
        return

    feats_train = StringCharFeatures(DNA)
    # feats_train.io.set_loglevel(MSG_DEBUG)
    feats_train.set_features(fm_train_dna)
    feats_test = StringCharFeatures(DNA)
    feats_test.set_features(fm_test_dna)
    degree = 20

    kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree)

    labels = BinaryLabels(label_train_dna)

    svm = SVMLight(C, kernel, labels)
    svm.set_epsilon(epsilon)
    svm.parallel.set_num_threads(num_threads)
    svm.train()

    kernel.init(feats_train, feats_test)

    # print('SVMLight Objective: %f num_sv: %d' % \)
    # 	(svm.get_objective(), svm.get_num_support_vectors())
    svm.set_batch_computation_enabled(False)
    svm.set_linadd_enabled(False)
    svm.apply().get_labels()

    svm.set_batch_computation_enabled(True)
    labels = svm.apply().get_labels()
    return labels, svm
Esempio n. 10
0
def runShogunSVMDNAWDNoPositionKernel(train_xt, train_lt, test_xt):
    """
	run svm with non-position WD kernel
	"""

    ##################################################
    # set up svm
    feats_train = StringCharFeatures(train_xt, DNA)
    feats_test = StringCharFeatures(test_xt, DNA)

    kernel = WeightedDegreeStringKernel(feats_train, feats_train, DEGREE)
    kernel.io.set_loglevel(MSG_DEBUG)

    weights=arange(1,DEGREE+1,dtype=double)[::-1]/ \
     sum(arange(1,DEGREE+1,dtype=double))
    kernel.set_wd_weights(weights)

    # init kernel
    labels = BinaryLabels(train_lt)

    # run svm model
    print "Ready to train!"
    svm = LibSVM(SVMC, kernel, labels)
    svm.io.set_loglevel(MSG_DEBUG)
    svm.train()

    # predictions
    print "Making predictions!"
    out1 = svm.apply(feats_train).get_labels()
    kernel.init(feats_train, feats_test)
    out2 = svm.apply(feats_test).get_labels()

    return out1, out2
def classifier_svmlight_batch_linadd_modular(fm_train_dna, fm_test_dna,
                                             label_train_dna, degree, C,
                                             epsilon, num_threads):

    from modshogun import StringCharFeatures, BinaryLabels, DNA
    from modshogun import WeightedDegreeStringKernel, MSG_DEBUG
    try:
        from modshogun import SVMLight
    except ImportError:
        print('No support for SVMLight available.')
        return

    feats_train = StringCharFeatures(DNA)
    #feats_train.io.set_loglevel(MSG_DEBUG)
    feats_train.set_features(fm_train_dna)
    feats_test = StringCharFeatures(DNA)
    feats_test.set_features(fm_test_dna)
    degree = 20

    kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree)

    labels = BinaryLabels(label_train_dna)

    svm = SVMLight(C, kernel, labels)
    svm.set_epsilon(epsilon)
    svm.parallel.set_num_threads(num_threads)
    svm.train()

    kernel.init(feats_train, feats_test)

    #print('SVMLight Objective: %f num_sv: %d' % \)
    #	(svm.get_objective(), svm.get_num_support_vectors())
    svm.set_batch_computation_enabled(False)
    svm.set_linadd_enabled(False)
    svm.apply().get_labels()

    svm.set_batch_computation_enabled(True)
    labels = svm.apply().get_labels()
    return labels, svm
Esempio n. 12
0
def embed(file='mml.pickle',N=500):
	strings = []
	print '%s reading %s' % (datetime.datetime.now(), file)
	file_contents = load(file)
	print '%s there are %d strings in %s' % (datetime.datetime.now(), len(file_contents['examples']), file)
	
	positives = numpy.where(numpy.array(file_contents['labels'])>0)[0]
	selected_idxs = random.sample(positives,N)
	for i in selected_idxs:
		strings.append(file_contents['examples'][i])
	
	features = StringCharFeatures(strings,DNA)
	kernel = WeightedDegreeStringKernel(10)
	distance = KernelDistance(1.0,kernel)
	distance.init(features,features)
	converter = MultidimensionalScaling()
	converter.set_target_dim(2)
	return converter.embed_distance(distance).get_feature_matrix(), strings
Esempio n. 13
0
def embed(file='mml.pickle'):
    strings = []

    print '%s reading %s' % (datetime.datetime.now(), file)
    file_contents = load(file)
    print '%s there are %d strings in %s' % (
        datetime.datetime.now(), len(file_contents['examples']), file)
    count = 0
    for i in xrange(len(file_contents['labels'])):
        if file_contents['labels'][i] > 0.0 and count < 1000:
            strings.append(file_contents['examples'][i])
            count += 1
    features = StringCharFeatures(strings, DNA)
    kernel = WeightedDegreeStringKernel(10)
    distance = KernelDistance(1.0, kernel)
    distance.init(features, features)
    converter = MultidimensionalScaling()
    converter.set_target_dim(2)
    return converter.embed_distance(distance).get_feature_matrix(), strings
Esempio n. 14
0
class svm_splice_model(object):
	def __init__(self, order, traindat, alphas, b, (window_left,offset,window_right), consensus):

		f=StringCharFeatures(DNA)
		f.set_features(traindat)
		wd_kernel = WeightedDegreeStringKernel(f,f, int(order))
		wd_kernel.io.set_target_to_stderr()

		self.svm=KernelMachine(wd_kernel, alphas, numpy.arange(len(alphas), dtype=numpy.int32), b)
		self.svm.io.set_target_to_stderr()
		self.svm.parallel.set_num_threads(self.svm.parallel.get_num_cpus())
		self.svm.set_linadd_enabled(False)
		self.svm.set_batch_computation_enabled(False)

		self.window_left=int(window_left)
		self.window_right=int(window_right)

		self.consensus=consensus
		self.wd_kernel=wd_kernel
		self.traindat=f
		self.offset=offset