예제 #1
0
def evaluate(options, svm, kernel, features, motifs):
    """Evaluate examples using a trained kernel"""
    query = MotifFinder(finder_settings=MotifFinderSettings(
        kirmes_ini.MOTIF_LENGTH, options.window_width))
    query.setFastaFile(options.query)
    query.setMotifs(options.qgff)
    qmotifs, qpositions = query.getResults()
    feats_query = CombinedFeatures()
    wds_svm = EasySVM.EasySVM(kirmes_ini.WDS_KERNEL_PARAMETERS)
    try:
        assert set(qmotifs.keys()).issuperset(set(motifs))
    except AssertionError:
        print "The motif positions in the query sequence are incomplete, there are no positions for:"
        print set(motifs).difference(qmotifs.keys())
        raise
    for motif in motifs:
        feats_query.append_feature_obj(wds_svm.createFeatures(qmotifs[motif]))
    query_positions = array(qpositions, dtype=float64)
    query_positions = query_positions.T
    rbf_svm = EasySVM.EasySVM(kirmes_ini.RBF_KERNEL_PARAMETERS)
    feats_query.append_feature_obj(rbf_svm.createFeatures(query_positions))
    kernel.init(features, feats_query)
    out = svm.classify().get_labels()
    qgenes = query.getGenes()
    ret_str = ""
    print "#example\toutput\tsplit"
    for i in xrange(len(out)):
        if out[i] >= 0:
            classif = "\tpositive\t"
        else:
            classif = "\tnegative\t"
        ret_str += qgenes[i] + classif + str(out[i]) + "\n"
        print str(i) + "\t" + str(out[i]) + "\t0"
    return ret_str
예제 #2
0
def evaluate(options, svm, kernel, features, motifs):
    """Evaluate examples using a trained kernel"""
    query = MotifFinder(finder_settings=MotifFinderSettings(kirmes_ini.MOTIF_LENGTH, options.window_width))
    query.setFastaFile(options.query)
    query.setMotifs(options.qgff)
    qmotifs, qpositions = query.getResults()
    feats_query = CombinedFeatures()
    wds_svm = EasySVM.EasySVM(kirmes_ini.WDS_KERNEL_PARAMETERS)
    try:
        assert set(qmotifs.keys()).issuperset(set(motifs))
    except AssertionError:
        print "The motif positions in the query sequence are incomplete, there are no positions for:"
        print set(motifs).difference(qmotifs.keys())
        raise
    for motif in motifs:
        feats_query.append_feature_obj(wds_svm.createFeatures(qmotifs[motif]))
    query_positions = array(qpositions, dtype=float64)
    query_positions = query_positions.T
    rbf_svm = EasySVM.EasySVM(kirmes_ini.RBF_KERNEL_PARAMETERS)
    feats_query.append_feature_obj(rbf_svm.createFeatures(query_positions))
    kernel.init(features, feats_query)
    out = svm.classify().get_labels()
    qgenes = query.getGenes()
    ret_str = ""
    print "#example\toutput\tsplit"
    for i in xrange(len(out)):
        if out[i] >= 0:
            classif = "\tpositive\t"
        else:
            classif = "\tnegative\t"
        ret_str += qgenes[i] + classif + str(out[i]) + "\n"
        print str(i) + "\t" + str(out[i]) + "\t0"
    return ret_str
예제 #3
0
def training_run(options):
    """Conduct a training run and return a trained SVM kernel"""
    settings = MotifFinderSettings(kirmes_ini.MOTIF_LENGTH, options.window_width, options.replace)
    positives = MotifFinder(finder_settings=settings)
    positives.setFastaFile(options.positives)
    positives.setMotifs(options.pgff)
    pmotifs, ppositions = positives.getResults()
    negatives = MotifFinder(finder_settings=settings)
    negatives.setFastaFile(options.negatives)
    negatives.setMotifs(options.ngff)
    nmotifs, npositions = negatives.getResults()

    wds_kparams = kirmes_ini.WDS_KERNEL_PARAMETERS
    wds_svm = EasySVM.EasySVM(wds_kparams)
    num_positives = len(pmotifs.values()[0])
    num_negatives = len(nmotifs.values()[0])
    # Creating Kernel Objects
    kernel = CombinedKernel()
    features = CombinedFeatures()
    kernel_array = []
    motifs = pmotifs.keys()
    motifs.sort()
    # Adding Kmer Kernels
    for motif in motifs:
        all_examples = pmotifs[motif] + nmotifs[motif]
        motif_features = wds_svm.createFeatures(all_examples)
        wds_kernel = WeightedDegreePositionStringKernel(motif_features, motif_features, wds_kparams["degree"])
        wds_kernel.set_shifts(wds_kparams["shift"] * ones(wds_kparams["seqlength"], dtype=int32))
        features.append_feature_obj(motif_features)
        kernel_array.append(wds_kernel)
        kernel.append_kernel(wds_kernel)
    rbf_svm = EasySVM.EasySVM(kirmes_ini.RBF_KERNEL_PARAMETERS)
    positions = array(ppositions + npositions, dtype=float64).T
    position_features = rbf_svm.createFeatures(positions)
    features.append_feature_obj(position_features)
    motif_labels = append(ones(num_positives), -ones(num_negatives))
    complete_labels = Labels(motif_labels)
    rbf_kernel = GaussianKernel(position_features, position_features, kirmes_ini.RBF_KERNEL_PARAMETERS["width"])
    kernel_array.append(rbf_kernel)
    kernel.append_kernel(rbf_kernel)
    # Kernel init
    kernel.init(features, features)
    kernel.set_cache_size(kirmes_ini.K_CACHE_SIZE)
    svm = LibSVM(kirmes_ini.K_COMBINED_C, kernel, complete_labels)
    svm.parallel.set_num_threads(kirmes_ini.K_NUM_THREADS)
    # Training
    svm.train()
    if not os.path.exists(options.output_path):
        os.mkdir(options.output_path)
    html = {}
    if options.contrib:
        html["contrib"] = contrib(svm, kernel, motif_labels, kernel_array, motifs)
    if options.logos:
        html["poims"] = poims(svm, kernel, kernel_array, motifs, options.output_path)
    if options.query:
        html["query"] = evaluate(options, svm, kernel, features, motifs)
    htmlize(html, options.output_html)
예제 #4
0
def training_run(options):
    """Conduct a training run and return a trained SVM kernel"""
    settings = MotifFinderSettings(kirmes_ini.MOTIF_LENGTH,
                                   options.window_width, options.replace)
    positives = MotifFinder(finder_settings=settings)
    positives.setFastaFile(options.positives)
    positives.setMotifs(options.pgff)
    pmotifs, ppositions = positives.getResults()
    negatives = MotifFinder(finder_settings=settings)
    negatives.setFastaFile(options.negatives)
    negatives.setMotifs(options.ngff)
    nmotifs, npositions = negatives.getResults()

    wds_kparams = kirmes_ini.WDS_KERNEL_PARAMETERS
    wds_svm = EasySVM.EasySVM(wds_kparams)
    num_positives = len(pmotifs.values()[0])
    num_negatives = len(nmotifs.values()[0])
    #Creating Kernel Objects
    kernel = CombinedKernel()
    features = CombinedFeatures()
    kernel_array = []
    motifs = pmotifs.keys()
    motifs.sort()
    #Adding Kmer Kernels
    for motif in motifs:
        all_examples = pmotifs[motif] + nmotifs[motif]
        motif_features = wds_svm.createFeatures(all_examples)
        wds_kernel = WeightedDegreePositionStringKernel(motif_features, motif_features, \
                                                        wds_kparams['degree'])
        wds_kernel.set_shifts(wds_kparams['shift'] *
                              ones(wds_kparams['seqlength'], dtype=int32))
        features.append_feature_obj(motif_features)
        kernel_array.append(wds_kernel)
        kernel.append_kernel(wds_kernel)
    rbf_svm = EasySVM.EasySVM(kirmes_ini.RBF_KERNEL_PARAMETERS)
    positions = array(ppositions + npositions, dtype=float64).T
    position_features = rbf_svm.createFeatures(positions)
    features.append_feature_obj(position_features)
    motif_labels = append(ones(num_positives), -ones(num_negatives))
    complete_labels = Labels(motif_labels)
    rbf_kernel = GaussianKernel(position_features, position_features, \
                                kirmes_ini.RBF_KERNEL_PARAMETERS['width'])
    kernel_array.append(rbf_kernel)
    kernel.append_kernel(rbf_kernel)
    #Kernel init
    kernel.init(features, features)
    kernel.set_cache_size(kirmes_ini.K_CACHE_SIZE)
    svm = LibSVM(kirmes_ini.K_COMBINED_C, kernel, complete_labels)
    svm.parallel.set_num_threads(kirmes_ini.K_NUM_THREADS)
    #Training
    svm.train()
    if not os.path.exists(options.output_path):
        os.mkdir(options.output_path)
    html = {}
    if options.contrib:
        html["contrib"] = contrib(svm, kernel, motif_labels, kernel_array,
                                  motifs)
    if options.logos:
        html["poims"] = poims(svm, kernel, kernel_array, motifs,
                              options.output_path)
    if options.query:
        html["query"] = evaluate(options, svm, kernel, features, motifs)
    htmlize(html, options.output_html)