예제 #1
0
def classifier_domainadaptationsvm_modular (fm_train_dna=traindna,fm_test_dna=testdna, \
                                                label_train_dna=label_traindna, \
                                               label_test_dna=label_testdna,fm_train_dna2=traindna2,fm_test_dna2=testdna2, \
                                               label_train_dna2=label_traindna2,label_test_dna2=label_testdna2,C=1,degree=3):




	feats_train = StringCharFeatures(fm_train_dna, DNA)
	feats_test = StringCharFeatures(fm_test_dna, DNA)
	kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree)
	labels = BinaryLabels(label_train_dna)
	svm = SVMLight(C, kernel, labels)
	svm.train()
	#svm.io.set_loglevel(MSG_DEBUG)

	#####################################

	#print("obtaining DA SVM from previously trained SVM")

	feats_train2 = StringCharFeatures(fm_train_dna, DNA)
	feats_test2 = StringCharFeatures(fm_test_dna, DNA)
	kernel2 = WeightedDegreeStringKernel(feats_train, feats_train, degree)
	labels2 = BinaryLabels(label_train_dna)

	# we regularize against the previously obtained solution
	dasvm = DomainAdaptationSVM(C, kernel2, labels2, svm, 1.0)
	dasvm.train()

	out = dasvm.apply_binary(feats_test2)

	return out #,dasvm TODO
def classifier_svmlight_modular(fm_train_dna=traindat,
                                fm_test_dna=testdat,
                                label_train_dna=label_traindat,
                                C=1.2,
                                epsilon=1e-5,
                                num_threads=1):
    from modshogun import StringCharFeatures, BinaryLabels, DNA
    from modshogun import WeightedDegreeStringKernel
    try:
        from modshogun import SVMLight
    except ImportError:
        print('No support for SVMLight available.')
        return

    feats_train = StringCharFeatures(DNA)
    feats_train.set_features(fm_train_dna)
    feats_test = StringCharFeatures(DNA)
    feats_test.set_features(fm_test_dna)
    degree = 20

    kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree)

    labels = BinaryLabels(label_train_dna)

    svm = SVMLight(C, kernel, labels)
    svm.set_epsilon(epsilon)
    svm.parallel.set_num_threads(num_threads)
    svm.train()

    kernel.init(feats_train, feats_test)
    svm.apply().get_labels()
    return kernel
def classifier_svmlight_linear_term_modular (fm_train_dna=traindna,fm_test_dna=testdna, \
                                                label_train_dna=label_traindna,degree=3, \
                                                C=10,epsilon=1e-5,num_threads=1):

    from modshogun import StringCharFeatures, BinaryLabels, DNA
    from modshogun import WeightedDegreeStringKernel
    try:
        from modshogun import SVMLight
    except ImportError:
        print("SVMLight is not available")
        exit(0)

    feats_train = StringCharFeatures(DNA)
    feats_train.set_features(fm_train_dna)
    feats_test = StringCharFeatures(DNA)
    feats_test.set_features(fm_test_dna)

    kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree)

    labels = BinaryLabels(label_train_dna)

    svm = SVMLight(C, kernel, labels)
    svm.set_qpsize(3)
    svm.set_linear_term(
        -numpy.array([1, 2, 3, 4, 5, 6, 7, 8, 7, 6], dtype=numpy.double))
    svm.set_epsilon(epsilon)
    svm.parallel.set_num_threads(num_threads)
    svm.train()

    kernel.init(feats_train, feats_test)
    out = svm.apply().get_labels()
    return out, kernel
예제 #4
0
파일: wdsvm.py 프로젝트: zhouyu/polyAcode
def svm_process(args_tuple):
    X_train, Y_train, X_test, Y_test, d, c = args_tuple

    kernel = WeightedDegreePositionStringKernel(X_train, X_train, d)
    kernel.set_shifts(np.ones(SEQ_LEN, dtype=np.int32))
    kernel.set_position_weights(np.ones(SEQ_LEN, dtype=np.float64))
    kernel.init(X_train, X_train)

    model = SVMLight(c, kernel, Y_train)
    model.train()

    Y_test_pred = model.apply(X_test).get_labels()
    Y_test_dist = model.apply(X_test).get_values()
    Y_test_proba = np.exp(Y_test_dist) / (1 + np.exp(Y_test_dist))

    accuracy = np.where(Y_test_pred - Y_test == 0)[0].size * 1.0 / Y_test.size

    return (accuracy, Y_test_proba)
def classifier_svmlight_batch_linadd_modular(fm_train_dna, fm_test_dna,
                                             label_train_dna, degree, C,
                                             epsilon, num_threads):

    from modshogun import StringCharFeatures, BinaryLabels, DNA
    from modshogun import WeightedDegreeStringKernel, MSG_DEBUG
    try:
        from modshogun import SVMLight
    except ImportError:
        print('No support for SVMLight available.')
        return

    feats_train = StringCharFeatures(DNA)
    #feats_train.io.set_loglevel(MSG_DEBUG)
    feats_train.set_features(fm_train_dna)
    feats_test = StringCharFeatures(DNA)
    feats_test.set_features(fm_test_dna)
    degree = 20

    kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree)

    labels = BinaryLabels(label_train_dna)

    svm = SVMLight(C, kernel, labels)
    svm.set_epsilon(epsilon)
    svm.parallel.set_num_threads(num_threads)
    svm.train()

    kernel.init(feats_train, feats_test)

    #print('SVMLight Objective: %f num_sv: %d' % \)
    #	(svm.get_objective(), svm.get_num_support_vectors())
    svm.set_batch_computation_enabled(False)
    svm.set_linadd_enabled(False)
    svm.apply().get_labels()

    svm.set_batch_computation_enabled(True)
    labels = svm.apply().get_labels()
    return labels, svm
def serialization_string_kernels_modular(n_data, num_shifts, size):
    """
    serialize svm with string kernels
    """

    ##################################################
    # set up toy data and svm
    train_xt, train_lt = generate_random_data(n_data)
    test_xt, test_lt = generate_random_data(n_data)

    feats_train = construct_features(train_xt)
    feats_test = construct_features(test_xt)

    max_len = len(train_xt[0])
    kernel_wdk = WeightedDegreePositionStringKernel(size, 5)
    shifts_vector = numpy.ones(max_len, dtype=numpy.int32) * num_shifts
    kernel_wdk.set_shifts(shifts_vector)

    ########
    # set up spectrum
    use_sign = False
    kernel_spec_1 = WeightedCommWordStringKernel(size, use_sign)
    kernel_spec_2 = WeightedCommWordStringKernel(size, use_sign)

    ########
    # combined kernel
    kernel = CombinedKernel()
    kernel.append_kernel(kernel_wdk)
    kernel.append_kernel(kernel_spec_1)
    kernel.append_kernel(kernel_spec_2)

    # init kernel
    labels = BinaryLabels(train_lt)

    svm = SVMLight(1.0, kernel, labels)
    #svm.io.set_loglevel(MSG_DEBUG)
    svm.train(feats_train)

    ##################################################
    # serialize to file

    fn = "serialized_svm.bz2"
    #print("serializing SVM to file", fn)
    save(fn, svm)

    ##################################################
    # unserialize and sanity check

    #print("unserializing SVM")
    svm2 = load(fn)

    #print("comparing predictions")
    out = svm.apply(feats_test).get_labels()
    out2 = svm2.apply(feats_test).get_labels()

    # assert outputs are close
    for i in range(len(out)):
        assert abs(out[i] - out2[i] < 0.000001)

    #print("all checks passed.")

    return out, out2
예제 #7
0
def serialization_svmlight_modular(num, dist, width, C):
    from modshogun import MSG_DEBUG
    from modshogun import RealFeatures, BinaryLabels, DNA, Alphabet
    from modshogun import WeightedDegreeStringKernel, GaussianKernel
    try:
        from modshogun import SVMLight
    except ImportError:
        print("SVMLight not available")
        exit(0)
    from numpy import concatenate, ones
    from numpy.random import randn, seed

    import sys
    import types
    import random
    import bz2
    import pickle
    import inspect

    def save(filename, myobj):
        """
        save object to file using pickle

        @param filename: name of destination file
        @type filename: str
        @param myobj: object to save (has to be pickleable)
        @type myobj: obj
        """

        try:
            f = bz2.BZ2File(filename, 'wb')
        except IOError as details:
            sys.stderr.write('File ' + filename + ' cannot be written\n')
            sys.stderr.write(details)
            return

        pickle.dump(myobj, f, protocol=2)
        f.close()

    def load(filename):
        """
        Load from filename using pickle

        @param filename: name of file to load from
        @type filename: str
        """

        try:
            f = bz2.BZ2File(filename, 'rb')
        except IOError as details:
            sys.stderr.write('File ' + filename + ' cannot be read\n')
            sys.stderr.write(details)
            return

        myobj = pickle.load(f)
        f.close()
        return myobj

    ##################################################
    # set up toy data and svm

    traindata_real = concatenate((randn(2, num) - dist, randn(2, num) + dist),
                                 axis=1)
    testdata_real = concatenate((randn(2, num) - dist, randn(2, num) + dist),
                                axis=1)

    trainlab = concatenate((-ones(num), ones(num)))
    testlab = concatenate((-ones(num), ones(num)))

    feats_train = RealFeatures(traindata_real)
    feats_test = RealFeatures(testdata_real)
    kernel = GaussianKernel(feats_train, feats_train, width)
    #kernel.io.set_loglevel(MSG_DEBUG)

    labels = BinaryLabels(trainlab)

    svm = SVMLight(C, kernel, labels)
    svm.train()
    #svm.io.set_loglevel(MSG_DEBUG)

    ##################################################
    # serialize to file

    fn = "serialized_svm.bz2"
    #print("serializing SVM to file", fn)
    save(fn, svm)

    ##################################################
    # unserialize and sanity check

    #print("unserializing SVM")
    svm2 = load(fn)

    #print("comparing objectives")

    svm2.train()

    #print("objective before serialization:", svm.get_objective())
    #print("objective after serialization:", svm2.get_objective())

    #print("comparing predictions")

    out = svm.apply(feats_test).get_labels()
    out2 = svm2.apply(feats_test).get_labels()

    # assert outputs are close
    for i in range(len(out)):
        assert abs(out[i] - out2[i] < 0.000001)

    #print("all checks passed.")

    return True