def runShogunSVRWDKernel(train_xt, train_lt, test_xt, svm_c=1, svr_param=0.1):
    """
	serialize svr with string kernels
	"""

    ##################################################
    # set up svr
    feats_train = construct_features(train_xt)
    feats_test = construct_features(test_xt)

    max_len = len(train_xt[0])
    kernel_wdk = WeightedDegreePositionStringKernel(SIZE, 5)
    shifts_vector = np.ones(max_len, dtype=np.int32) * NUMSHIFTS
    kernel_wdk.set_shifts(shifts_vector)

    ########
    # set up spectrum
    use_sign = False
    kernel_spec_1 = WeightedCommWordStringKernel(SIZE, use_sign)
    #kernel_spec_2 = WeightedCommWordStringKernel(SIZE, use_sign)

    ########
    # combined kernel
    kernel = CombinedKernel()
    kernel.append_kernel(kernel_wdk)
    kernel.append_kernel(kernel_spec_1)
    #kernel.append_kernel(kernel_spec_2)

    # init kernel
    labels = RegressionLabels(train_lt)

    # two svr models: epsilon and nu
    svr_epsilon = LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_EPSILON_SVR)
    print "Ready to train!"
    svr_epsilon.train(feats_train)
    #svr_nu=LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_NU_SVR)
    #svr_nu.train(feats_train)

    # predictions
    print "Making predictions!"
    kernel.init(feats_train, feats_test)
    out1_epsilon = svr_epsilon.apply().get_labels()
    out2_epsilon = svr_epsilon.apply(feats_test).get_labels()
    #out1_nu=svr_epsilon.apply().get_labels()
    #out2_nu=svr_epsilon.apply(feats_test).get_labels()

    ##################################################
    # serialize to file
    fEpsilon = open(FNEPSILON, 'w+')
    #fNu = open(FNNU, 'w+')
    svr_epsilon.save(fEpsilon)
    #svr_nu.save(fNu)
    fEpsilon.close()
    #fNu.close()

    ##################################################

    #return out1_epsilon,out2_epsilon,out1_nu,out2_nu ,kernel
    return out1_epsilon, out2_epsilon, kernel
def runShogunSVRWDKernel(train_xt, train_lt, test_xt, svm_c=1, svr_param=0.1):
	"""
	serialize svr with string kernels
	"""

    ##################################################
    # set up svr
	feats_train = construct_features(train_xt)
	feats_test = construct_features(test_xt)

	max_len = len(train_xt[0])
	kernel_wdk = WeightedDegreePositionStringKernel(SIZE, 5)
	shifts_vector = np.ones(max_len, dtype=np.int32)*NUMSHIFTS
	kernel_wdk.set_shifts(shifts_vector)

    ########
    # set up spectrum
	use_sign = False
	kernel_spec_1 = WeightedCommWordStringKernel(SIZE, use_sign)
	#kernel_spec_2 = WeightedCommWordStringKernel(SIZE, use_sign)

    ########
    # combined kernel
	kernel = CombinedKernel()
	kernel.append_kernel(kernel_wdk)
	kernel.append_kernel(kernel_spec_1)
	#kernel.append_kernel(kernel_spec_2)

    # init kernel
	labels = RegressionLabels(train_lt)
	
	# two svr models: epsilon and nu
	svr_epsilon=LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_EPSILON_SVR)
	print "Ready to train!"
	svr_epsilon.train(feats_train)
	#svr_nu=LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_NU_SVR)
	#svr_nu.train(feats_train)

	# predictions
	print "Making predictions!"
	kernel.init(feats_train, feats_test)
	out1_epsilon=svr_epsilon.apply().get_labels()
	out2_epsilon=svr_epsilon.apply(feats_test).get_labels()
	#out1_nu=svr_epsilon.apply().get_labels()
	#out2_nu=svr_epsilon.apply(feats_test).get_labels()

    ##################################################
    # serialize to file
	fEpsilon = open(FNEPSILON, 'w+')
	#fNu = open(FNNU, 'w+')
	svr_epsilon.save(fEpsilon)
	#svr_nu.save(fNu)
	fEpsilon.close()
	#fNu.close()

    ##################################################
	
	#return out1_epsilon,out2_epsilon,out1_nu,out2_nu ,kernel
	return out1_epsilon,out2_epsilon,kernel
def runShogunSVMDNAWDKernel(train_xt, train_lt, test_xt):
    """
	run svm with string kernels
	"""

    ##################################################
    # set up svm
    feats_train = StringCharFeatures(train_xt, DNA)
    feats_test = StringCharFeatures(test_xt, DNA)

    kernel = WeightedDegreePositionStringKernel(feats_train, feats_train,
                                                DEGREE)
    kernel.io.set_loglevel(MSG_DEBUG)
    kernel.set_shifts(NUMSHIFTS * ones(len(train_xt[0]), dtype=int32))
    kernel.set_position_weights(ones(len(train_xt[0]), dtype=float64))

    # init kernel
    labels = BinaryLabels(train_lt)

    # run svm model
    print "Ready to train!"
    svm = LibSVM(SVMC, kernel, labels)
    svm.io.set_loglevel(MSG_DEBUG)
    svm.train()

    # predictions
    print "Making predictions!"
    out1DecisionValues = svm.apply(feats_train)
    out1 = out1DecisionValues.get_labels()
    kernel.init(feats_train, feats_test)
    out2DecisionValues = svm.apply(feats_test)
    out2 = out2DecisionValues.get_labels()

    return out1, out2, out1DecisionValues, out2DecisionValues
Beispiel #4
0
def kernel_weighted_degree_position_string_modular(fm_train_dna=traindat,
                                                   fm_test_dna=testdat,
                                                   degree=20):
    from modshogun import StringCharFeatures, DNA
    from modshogun import WeightedDegreePositionStringKernel, MSG_DEBUG

    feats_train = StringCharFeatures(fm_train_dna, DNA)
    #feats_train.io.set_loglevel(MSG_DEBUG)
    feats_test = StringCharFeatures(fm_test_dna, DNA)

    kernel = WeightedDegreePositionStringKernel(feats_train, feats_train,
                                                degree)

    from numpy import zeros, ones, float64, int32
    kernel.set_shifts(10 * ones(len(fm_train_dna[0]), dtype=int32))
    kernel.set_position_weights(ones(len(fm_train_dna[0]), dtype=float64))

    km_train = kernel.get_kernel_matrix()
    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()
    return km_train, km_test, kernel
def kernel_weighted_degree_position_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,degree=20):
	from modshogun import StringCharFeatures, DNA
	from modshogun import WeightedDegreePositionStringKernel, MSG_DEBUG

	feats_train=StringCharFeatures(fm_train_dna, DNA)
	#feats_train.io.set_loglevel(MSG_DEBUG)
	feats_test=StringCharFeatures(fm_test_dna, DNA)

	kernel=WeightedDegreePositionStringKernel(feats_train, feats_train, degree)

	from numpy import zeros,ones,float64,int32
	kernel.set_shifts(10*ones(len(fm_train_dna[0]), dtype=int32))
	kernel.set_position_weights(ones(len(fm_train_dna[0]), dtype=float64))

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel
Beispiel #6
0
def svm_process(args_tuple):
    X_train, Y_train, X_test, Y_test, d, c = args_tuple

    kernel = WeightedDegreePositionStringKernel(X_train, X_train, d)
    kernel.set_shifts(np.ones(SEQ_LEN, dtype=np.int32))
    kernel.set_position_weights(np.ones(SEQ_LEN, dtype=np.float64))
    kernel.init(X_train, X_train)

    model = SVMLight(c, kernel, Y_train)
    model.train()

    Y_test_pred = model.apply(X_test).get_labels()
    Y_test_dist = model.apply(X_test).get_values()
    Y_test_proba = np.exp(Y_test_dist) / (1 + np.exp(Y_test_dist))

    accuracy = np.where(Y_test_pred - Y_test == 0)[0].size * 1.0 / Y_test.size

    return (accuracy, Y_test_proba)
Beispiel #7
0
def svm_process(args_tuple):
    X_train, Y_train, X_test, Y_test, d, c = args_tuple
    
    kernel = WeightedDegreePositionStringKernel(X_train, X_train, d)
    kernel.set_shifts(np.ones(SEQ_LEN, dtype=np.int32))
    kernel.set_position_weights(np.ones(SEQ_LEN, dtype=np.float64))
    kernel.init(X_train, X_train)
     
    model = SVMLight(c, kernel, Y_train)
    model.train()
        
    Y_test_pred = model.apply(X_test).get_labels()
    Y_test_dist = model.apply(X_test).get_values()
    Y_test_proba = np.exp(Y_test_dist)/(1 + np.exp(Y_test_dist))
        
    accuracy = np.where(Y_test_pred - Y_test == 0)[0].size*1.0/Y_test.size
    
    return (accuracy, Y_test_proba)
def runShogunSVRWDKernel(train_xt, train_lt, test_xt, svm_c=1):
	"""
	serialize svr with string kernels
	"""

    ##################################################
    # set up svr
	feats_train = StringCharFeatures(train_xt, PROTEIN)
	feats_test = StringCharFeatures(test_xt, PROTEIN)

	kernel = WeightedDegreePositionStringKernel(feats_train, feats_train, DEGREE)
	kernel.io.set_loglevel(MSG_DEBUG)
	kernel.set_shifts(NUMSHIFTS*ones(len(train_xt[0]), dtype=int32))
	kernel.set_position_weights(ones(len(train_xt[0]), dtype=float64))

    # init kernel
	labels = RegressionLabels(train_lt)
	
	# two svr models: epsilon and nu
	print "Ready to train!"
	svr_epsilon=LibSVR(svm_c, SVRPARAM, kernel, labels, LIBSVR_EPSILON_SVR)
	svr_epsilon.io.set_loglevel(MSG_DEBUG)
	svr_epsilon.train()
	#svr_nu=LibSVR(svm_c, SVRPARAM, kernel, labels, LIBSVR_NU_SVR)
	#svr_nu.train()

	# predictions
	print "Making predictions!"
	out1_epsilon=svr_epsilon.apply(feats_train).get_labels()
	kernel.init(feats_train, feats_test)
	out2_epsilon=svr_epsilon.apply(feats_test).get_labels()
	#out1_nu=svr_epsilon.apply(feats_train).get_labels()
	#out2_nu=svr_epsilon.apply(feats_test).get_labels()
	
	#return out1_epsilon,out2_epsilon,out1_nu,out2_nu ,kernel
	return out1_epsilon,out2_epsilon,kernel
def serialization_string_kernels_modular(n_data, num_shifts, size):
    """
    serialize svm with string kernels
    """

    ##################################################
    # set up toy data and svm
    train_xt, train_lt = generate_random_data(n_data)
    test_xt, test_lt = generate_random_data(n_data)

    feats_train = construct_features(train_xt)
    feats_test = construct_features(test_xt)

    max_len = len(train_xt[0])
    kernel_wdk = WeightedDegreePositionStringKernel(size, 5)
    shifts_vector = numpy.ones(max_len, dtype=numpy.int32) * num_shifts
    kernel_wdk.set_shifts(shifts_vector)

    ########
    # set up spectrum
    use_sign = False
    kernel_spec_1 = WeightedCommWordStringKernel(size, use_sign)
    kernel_spec_2 = WeightedCommWordStringKernel(size, use_sign)

    ########
    # combined kernel
    kernel = CombinedKernel()
    kernel.append_kernel(kernel_wdk)
    kernel.append_kernel(kernel_spec_1)
    kernel.append_kernel(kernel_spec_2)

    # init kernel
    labels = BinaryLabels(train_lt)

    svm = SVMLight(1.0, kernel, labels)
    #svm.io.set_loglevel(MSG_DEBUG)
    svm.train(feats_train)

    ##################################################
    # serialize to file

    fn = "serialized_svm.bz2"
    #print("serializing SVM to file", fn)
    save(fn, svm)

    ##################################################
    # unserialize and sanity check

    #print("unserializing SVM")
    svm2 = load(fn)

    #print("comparing predictions")
    out = svm.apply(feats_test).get_labels()
    out2 = svm2.apply(feats_test).get_labels()

    # assert outputs are close
    for i in range(len(out)):
        assert abs(out[i] - out2[i] < 0.000001)

    #print("all checks passed.")

    return out, out2
def serialization_string_kernels_modular(n_data, num_shifts, size):
    """
    serialize svm with string kernels
    """

    ##################################################
    # set up toy data and svm
    train_xt, train_lt = generate_random_data(n_data)
    test_xt, test_lt = generate_random_data(n_data)

    feats_train = construct_features(train_xt)
    feats_test = construct_features(test_xt)

    max_len = len(train_xt[0])
    kernel_wdk = WeightedDegreePositionStringKernel(size, 5)
    shifts_vector = numpy.ones(max_len, dtype=numpy.int32)*num_shifts
    kernel_wdk.set_shifts(shifts_vector)

    ########
    # set up spectrum
    use_sign = False
    kernel_spec_1 = WeightedCommWordStringKernel(size, use_sign)
    kernel_spec_2 = WeightedCommWordStringKernel(size, use_sign)

    ########
    # combined kernel
    kernel = CombinedKernel()
    kernel.append_kernel(kernel_wdk)
    kernel.append_kernel(kernel_spec_1)
    kernel.append_kernel(kernel_spec_2)

    # init kernel
    labels = BinaryLabels(train_lt);

    svm = SVMLight(1.0, kernel, labels)
    #svm.io.set_loglevel(MSG_DEBUG)
    svm.train(feats_train)

    ##################################################
    # serialize to file

    fn = "serialized_svm.bz2"
    #print("serializing SVM to file", fn)
    save(fn, svm)

    ##################################################
    # unserialize and sanity check

    #print("unserializing SVM")
    svm2 = load(fn)


    #print("comparing predictions")
    out =  svm.apply(feats_test).get_labels()
    out2 =  svm2.apply(feats_test).get_labels()

    # assert outputs are close
    for i in range(len(out)):
        assert abs(out[i] - out2[i] < 0.000001)

    #print("all checks passed.")

    return out,out2