Example #1
0
def training_run(options):
    """Conduct a training run and return a trained SVM kernel"""
    settings = MotifFinderSettings(kirmes_ini.MOTIF_LENGTH, options.window_width, options.replace)
    positives = MotifFinder(finder_settings=settings)
    positives.setFastaFile(options.positives)
    positives.setMotifs(options.pgff)
    pmotifs, ppositions = positives.getResults()
    negatives = MotifFinder(finder_settings=settings)
    negatives.setFastaFile(options.negatives)
    negatives.setMotifs(options.ngff)
    nmotifs, npositions = negatives.getResults()

    wds_kparams = kirmes_ini.WDS_KERNEL_PARAMETERS
    wds_svm = EasySVM.EasySVM(wds_kparams)
    num_positives = len(pmotifs.values()[0])
    num_negatives = len(nmotifs.values()[0])
    # Creating Kernel Objects
    kernel = CombinedKernel()
    features = CombinedFeatures()
    kernel_array = []
    motifs = pmotifs.keys()
    motifs.sort()
    # Adding Kmer Kernels
    for motif in motifs:
        all_examples = pmotifs[motif] + nmotifs[motif]
        motif_features = wds_svm.createFeatures(all_examples)
        wds_kernel = WeightedDegreePositionStringKernel(motif_features, motif_features, wds_kparams["degree"])
        wds_kernel.set_shifts(wds_kparams["shift"] * ones(wds_kparams["seqlength"], dtype=int32))
        features.append_feature_obj(motif_features)
        kernel_array.append(wds_kernel)
        kernel.append_kernel(wds_kernel)
    rbf_svm = EasySVM.EasySVM(kirmes_ini.RBF_KERNEL_PARAMETERS)
    positions = array(ppositions + npositions, dtype=float64).T
    position_features = rbf_svm.createFeatures(positions)
    features.append_feature_obj(position_features)
    motif_labels = append(ones(num_positives), -ones(num_negatives))
    complete_labels = Labels(motif_labels)
    rbf_kernel = GaussianKernel(position_features, position_features, kirmes_ini.RBF_KERNEL_PARAMETERS["width"])
    kernel_array.append(rbf_kernel)
    kernel.append_kernel(rbf_kernel)
    # Kernel init
    kernel.init(features, features)
    kernel.set_cache_size(kirmes_ini.K_CACHE_SIZE)
    svm = LibSVM(kirmes_ini.K_COMBINED_C, kernel, complete_labels)
    svm.parallel.set_num_threads(kirmes_ini.K_NUM_THREADS)
    # Training
    svm.train()
    if not os.path.exists(options.output_path):
        os.mkdir(options.output_path)
    html = {}
    if options.contrib:
        html["contrib"] = contrib(svm, kernel, motif_labels, kernel_array, motifs)
    if options.logos:
        html["poims"] = poims(svm, kernel, kernel_array, motifs, options.output_path)
    if options.query:
        html["query"] = evaluate(options, svm, kernel, features, motifs)
    htmlize(html, options.output_html)
def create_empty_promoter_kernel(param):
    """
    creates an uninitialized promoter kernel
   
    @param param:
    """

    # centered WDK/WDK-shift
    if param["shifts"] == 0:
        kernel_center = WeightedDegreeStringKernel(param["degree"])
    else:
        kernel_center = WeightedDegreePositionStringKernel(10, param["degree"])
        shifts_vector = numpy.ones(param["center_offset"] * 2,
                                   dtype=numpy.int32) * param["shifts"]
        kernel_center.set_shifts(shifts_vector)

    kernel_center.set_cache_size(param["kernel_cache"] / 3)

    # border spetrum kernels
    size = param["kernel_cache"] / 3
    use_sign = False
    kernel_left = WeightedCommWordStringKernel(size, use_sign)
    kernel_right = WeightedCommWordStringKernel(size, use_sign)

    # assemble combined kernel
    kernel = CombinedKernel()
    kernel.append_kernel(kernel_center)
    kernel.append_kernel(kernel_left)
    kernel.append_kernel(kernel_right)

    return kernel
Example #3
0
    def init_sensor(self, kernel, svs):
        f = StringCharFeatures(svs, DNA)

        kname = kernel['name']
        if kname == 'spectrum':
            wf = StringWordFeatures(f.get_alphabet())
            wf.obtain_from_char(f, kernel['order'] - 1, kernel['order'], 0,
                                False)

            pre = SortWordString()
            pre.init(wf)
            wf.add_preprocessor(pre)
            wf.apply_preprocessor()
            f = wf

            k = CommWordStringKernel(0, False)
            k.set_use_dict_diagonal_optimization(kernel['order'] < 8)
            self.preproc = pre

        elif kname == 'wdshift':
            k = WeightedDegreePositionStringKernel(0, kernel['order'])
            k.set_normalizer(IdentityKernelNormalizer())
            k.set_shifts(
                kernel['shift'] *
                numpy.ones(f.get_max_vector_length(), dtype=numpy.int32))
            k.set_position_weights(
                1.0 / f.get_max_vector_length() *
                numpy.ones(f.get_max_vector_length(), dtype=numpy.float64))
        else:
            raise "Currently, only wdshift and spectrum kernels supported"

        self.kernel = k
        self.train_features = f

        return (self.kernel, self.train_features)
def kernel_weighted_degree_position_string_modular(fm_train_dna=traindat,fm_test_dna=testdat,degree=20):
	from shogun.Features import StringCharFeatures, DNA
	from shogun.Kernel import WeightedDegreePositionStringKernel, MSG_DEBUG

	feats_train=StringCharFeatures(fm_train_dna, DNA)
	#feats_train.io.set_loglevel(MSG_DEBUG)
	feats_test=StringCharFeatures(fm_test_dna, DNA)

	kernel=WeightedDegreePositionStringKernel(feats_train, feats_train, degree)

	from numpy import zeros,ones,float64,int32
	kernel.set_shifts(10*ones(len(fm_train_dna[0]), dtype=int32))
	kernel.set_position_weights(ones(len(fm_train_dna[0]), dtype=float64))

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel
def kernel_weighted_degree_position_string_modular(fm_train_dna=traindat,fm_test_dna=testdat,degree=20):
	from shogun.Features import StringCharFeatures, DNA
	from shogun.Kernel import WeightedDegreePositionStringKernel, MSG_DEBUG

	feats_train=StringCharFeatures(fm_train_dna, DNA)
	#feats_train.io.set_loglevel(MSG_DEBUG)
	feats_test=StringCharFeatures(fm_test_dna, DNA)

	kernel=WeightedDegreePositionStringKernel(feats_train, feats_train, degree)

	from numpy import zeros,ones,float64,int32
	#kernel.set_shifts(zeros(len(fm_train_dna[0]), dtype=int32))
	#kernel.set_position_weights(ones(len(fm_train_dna[0]), dtype=float64))

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel
Example #6
0
    def train(self, data, labels):
        """
        model training 
        """

        # centered WDK/WDK-shift
        if self.param["shifts"] == 0:
            kernel_center = WeightedDegreeStringKernel(self.param["degree"])
        else:
            kernel_center = WeightedDegreePositionStringKernel(
                10, self.param["degree"])
            shifts_vector = numpy.ones(
                self.param["center_offset"] * 2,
                dtype=numpy.int32) * self.param["shifts"]
            kernel_center.set_shifts(shifts_vector)

        kernel_center.set_cache_size(self.param["kernel_cache"] / 3)

        # border spetrum kernels
        size = self.param["kernel_cache"] / 3
        use_sign = False
        kernel_left = WeightedCommWordStringKernel(size, use_sign)
        kernel_right = WeightedCommWordStringKernel(size, use_sign)

        # assemble combined kernel
        kernel = CombinedKernel()
        kernel.append_kernel(kernel_center)
        kernel.append_kernel(kernel_left)
        kernel.append_kernel(kernel_right)

        ## building features
        feat = create_features(data, self.param["center_offset"],
                               self.param["center_pos"])

        # init combined kernel
        kernel.init(feat, feat)

        print "len(labels) = %i" % (len(labels))
        lab = BinaryLabels(numpy.double(labels))
        self.svm = SVMLight(self.param["cost"], kernel, lab)

        # show debugging output
        self.svm.io.enable_progress()
        self.svm.io.set_loglevel(MSG_DEBUG)

        # optimization settings
        num_threads = 2
        self.svm.parallel.set_num_threads(num_threads)
        self.svm.set_epsilon(10e-8)

        self.svm.train()

        return self
Example #7
0
def create_kernel(kname, kparam, feats_train):
    """Call the corresponding constructor for the kernel"""

    if kname == 'gauss':
        kernel = GaussianKernel(feats_train, feats_train, kparam['width'])
    elif kname == 'linear':
        kernel = LinearKernel(feats_train, feats_train)
        kernel.set_normalizer(AvgDiagKernelNormalizer(kparam['scale']))
    elif kname == 'poly':
        kernel = PolyKernel(feats_train, feats_train, kparam['degree'],
                            kparam['inhomogene'], kparam['normal'])
    elif kname == 'wd':
        kernel = WeightedDegreePositionStringKernel(feats_train, feats_train,
                                                    kparam['degree'])
        kernel.set_normalizer(
            AvgDiagKernelNormalizer(float(kparam['seqlength'])))
        kernel.set_shifts(kparam['shift'] *
                          numpy.ones(kparam['seqlength'], dtype=numpy.int32))
        #kernel=WeightedDegreeStringKernel(feats_train, feats_train, kparam['degree'])
    elif kname == 'spec':
        kernel = CommUlongStringKernel(feats_train, feats_train)
    elif kname == 'cumspec':
        kernel = WeightedCommWordStringKernel(feats_train, feats_train)
        kernel.set_weights(numpy.ones(kparam['degree']))
    elif kname == 'spec2':
        kernel = CombinedKernel()
        k0 = CommWordStringKernel(feats_train['f0'], feats_train['f0'])
        k0.io.disable_progress()
        kernel.append_kernel(k0)
        k1 = CommWordStringKernel(feats_train['f1'], feats_train['f1'])
        k1.io.disable_progress()
        kernel.append_kernel(k1)
    elif kname == 'cumspec2':
        kernel = CombinedKernel()
        k0 = WeightedCommWordStringKernel(feats_train['f0'], feats_train['f0'])
        k0.set_weights(numpy.ones(kparam['degree']))
        k0.io.disable_progress()
        kernel.append_kernel(k0)
        k1 = WeightedCommWordStringKernel(feats_train['f1'], feats_train['f1'])
        k1.set_weights(numpy.ones(kparam['degree']))
        k1.io.disable_progress()
        kernel.append_kernel(k1)
    elif kname == 'localalign':
        kernel = LocalAlignmentStringKernel(feats_train, feats_train)
    elif kname == 'localimprove':
        kernel = LocalityImprovedStringKernel(feats_train, feats_train, kparam['length'],\
                                              kparam['indeg'], kparam['outdeg'])
    else:
        print 'Unknown kernel %s' % kname

    kernel.set_cache_size(32)
    return kernel
Example #8
0
def serialization_string_kernels_modular(n_data, num_shifts, size):
    """
    serialize svm with string kernels
    """

    ##################################################
    # set up toy data and svm
    train_xt, train_lt = generate_random_data(n_data)
    test_xt, test_lt = generate_random_data(n_data)

    feats_train = construct_features(train_xt)
    feats_test = construct_features(test_xt)

    max_len = len(train_xt[0])
    kernel_wdk = WeightedDegreePositionStringKernel(size, 5)
    shifts_vector = numpy.ones(max_len, dtype=numpy.int32) * num_shifts
    kernel_wdk.set_shifts(shifts_vector)

    ########
    # set up spectrum
    use_sign = False
    kernel_spec_1 = WeightedCommWordStringKernel(size, use_sign)
    kernel_spec_2 = WeightedCommWordStringKernel(size, use_sign)

    ########
    # combined kernel
    kernel = CombinedKernel()
    kernel.append_kernel(kernel_wdk)
    kernel.append_kernel(kernel_spec_1)
    kernel.append_kernel(kernel_spec_2)

    # init kernel
    labels = BinaryLabels(train_lt)

    svm = SVMLight(1.0, kernel, labels)
    #svm.io.set_loglevel(MSG_DEBUG)
    svm.train(feats_train)

    ##################################################
    # serialize to file

    fn = "serialized_svm.bz2"
    #print("serializing SVM to file", fn)
    save(fn, svm)

    ##################################################
    # unserialize and sanity check

    #print("unserializing SVM")
    svm2 = load(fn)

    #print("comparing predictions")
    out = svm.apply(feats_test).get_labels()
    out2 = svm2.apply(feats_test).get_labels()

    # assert outputs are close
    for i in xrange(len(out)):
        assert abs(out[i] - out2[i] < 0.000001)

    #print("all checks passed.")

    return out, out2
def serialization_string_kernels_modular(n_data, num_shifts, size):
    """
    serialize svm with string kernels
    """

    ##################################################
    # set up toy data and svm
    train_xt, train_lt = generate_random_data(n_data)
    test_xt, test_lt = generate_random_data(n_data)

    feats_train = construct_features(train_xt)
    feats_test = construct_features(test_xt)

    max_len = len(train_xt[0])
    kernel_wdk = WeightedDegreePositionStringKernel(size, 5)
    shifts_vector = numpy.ones(max_len, dtype=numpy.int32)*num_shifts
    kernel_wdk.set_shifts(shifts_vector)

    ########
    # set up spectrum
    use_sign = False
    kernel_spec_1 = WeightedCommWordStringKernel(size, use_sign)
    kernel_spec_2 = WeightedCommWordStringKernel(size, use_sign)

    ########
    # combined kernel
    kernel = CombinedKernel()
    kernel.append_kernel(kernel_wdk)
    kernel.append_kernel(kernel_spec_1)
    kernel.append_kernel(kernel_spec_2)

    # init kernel
    labels = BinaryLabels(train_lt);

    svm = SVMLight(1.0, kernel, labels)
    #svm.io.set_loglevel(MSG_DEBUG)
    svm.train(feats_train)

    ##################################################
    # serialize to file

    fn = "serialized_svm.bz2"
    #print("serializing SVM to file", fn)
    save(fn, svm)

    ##################################################
    # unserialize and sanity check

    #print("unserializing SVM")
    svm2 = load(fn)


    #print("comparing predictions")
    out =  svm.apply(feats_test).get_labels()
    out2 =  svm2.apply(feats_test).get_labels()

    # assert outputs are close
    for i in range(len(out)):
        assert abs(out[i] - out2[i] < 0.000001)

    #print("all checks passed.")

    return out,out2
Example #10
0
def training_run(options):
    """Conduct a training run and return a trained SVM kernel"""
    settings = MotifFinderSettings(kirmes_ini.MOTIF_LENGTH,
                                   options.window_width, options.replace)
    positives = MotifFinder(finder_settings=settings)
    positives.setFastaFile(options.positives)
    positives.setMotifs(options.pgff)
    pmotifs, ppositions = positives.getResults()
    negatives = MotifFinder(finder_settings=settings)
    negatives.setFastaFile(options.negatives)
    negatives.setMotifs(options.ngff)
    nmotifs, npositions = negatives.getResults()

    wds_kparams = kirmes_ini.WDS_KERNEL_PARAMETERS
    wds_svm = EasySVM.EasySVM(wds_kparams)
    num_positives = len(pmotifs.values()[0])
    num_negatives = len(nmotifs.values()[0])
    #Creating Kernel Objects
    kernel = CombinedKernel()
    features = CombinedFeatures()
    kernel_array = []
    motifs = pmotifs.keys()
    motifs.sort()
    #Adding Kmer Kernels
    for motif in motifs:
        all_examples = pmotifs[motif] + nmotifs[motif]
        motif_features = wds_svm.createFeatures(all_examples)
        wds_kernel = WeightedDegreePositionStringKernel(motif_features, motif_features, \
                                                        wds_kparams['degree'])
        wds_kernel.set_shifts(wds_kparams['shift'] *
                              ones(wds_kparams['seqlength'], dtype=int32))
        features.append_feature_obj(motif_features)
        kernel_array.append(wds_kernel)
        kernel.append_kernel(wds_kernel)
    rbf_svm = EasySVM.EasySVM(kirmes_ini.RBF_KERNEL_PARAMETERS)
    positions = array(ppositions + npositions, dtype=float64).T
    position_features = rbf_svm.createFeatures(positions)
    features.append_feature_obj(position_features)
    motif_labels = append(ones(num_positives), -ones(num_negatives))
    complete_labels = Labels(motif_labels)
    rbf_kernel = GaussianKernel(position_features, position_features, \
                                kirmes_ini.RBF_KERNEL_PARAMETERS['width'])
    kernel_array.append(rbf_kernel)
    kernel.append_kernel(rbf_kernel)
    #Kernel init
    kernel.init(features, features)
    kernel.set_cache_size(kirmes_ini.K_CACHE_SIZE)
    svm = LibSVM(kirmes_ini.K_COMBINED_C, kernel, complete_labels)
    svm.parallel.set_num_threads(kirmes_ini.K_NUM_THREADS)
    #Training
    svm.train()
    if not os.path.exists(options.output_path):
        os.mkdir(options.output_path)
    html = {}
    if options.contrib:
        html["contrib"] = contrib(svm, kernel, motif_labels, kernel_array,
                                  motifs)
    if options.logos:
        html["poims"] = poims(svm, kernel, kernel_array, motifs,
                              options.output_path)
    if options.query:
        html["query"] = evaluate(options, svm, kernel, features, motifs)
    htmlize(html, options.output_html)