Example #1
0
def stEvalInfo(d1, d2, bg):
    """ Compute Match,Pval between d1 & d2.
        Return as string. """

    match = utils_graph.intersectLists([d1, d2])
    pval = utils_stats.prob3(len(bg.keys()),
                             len(d1.keys()),
                             len(d2.keys()),
                             len(match.keys()))
    return str(len(d1.keys())) + '\t' + str(len(match.keys())) + '\t' + str(pval)
Example #2
0
def getEnrichedClusters(gmeans_clusters, target_sets, background_genes, pval_cut):
    """ Return {} of cluster_id to [enriched_cat, enriched_genes {}, pval]
        target_sets is {} of cat to genes.
    """

    # check to make sure everything is in background
    for cluster in gmeans_clusters.keys():
        for gene in gmeans_clusters[cluster].keys():
            if not background_genes.has_key(gene):
                print 'cluster gene not in background genes'
                sys.exit(0)
    for target in target_sets.keys():
        for gene in target_sets[target].keys():
             if not background_genes.has_key(gene):
                print 'target set gene not in background genes'
                sys.exit(0)

    ret_clusters = {}
    background_len = len(background_genes.keys())
    for a_set in target_sets.keys():
        target_genes = target_sets[a_set]
        target_len = len(target_genes.keys())
        for cluster in gmeans_clusters.keys():
            cluster_genes = gmeans_clusters[cluster]
            cluster_len = len(cluster_genes.keys())
            match_genes = utils_graph.intersectLists([cluster_genes,
                                                       target_genes])
            match_len = len(match_genes.keys())
            pval = utils_stats.prob3(background_len,
                                     cluster_len,
                                     target_len,
                                     match_len)
            if pval < pval_cut:
                if not ret_clusters.has_key(cluster):
                    ret_clusters[cluster] = {}
                cluster_per = int(float(100)*float(match_len)/float(cluster_len))
                target_per = int(float(100)*float(match_len)/float(target_len))
                ret_clusters[cluster][a_set] = [match_genes, cluster_per, target_per, pval]
    return ret_clusters
            if hhe_vp2hp.has_key(vp):
                hhe = utils_graph.intersectLists([hhe_vp2hp[vp], all_hps])

                hhe_len = len(hhe.keys())
                preds = pred2vp2hp[predtype][vp]
                preds_len = len(preds.keys())
                match = utils_graph.intersectLists([hhe, preds])
                match_len = len(match.keys())
                precision = int(round(float(100) * float(match_len) / float(preds_len)))
                if hhe_len > 0:
                    recall = int(round(float(100) * float(match_len) / float(hhe_len)))
                else:
                    recall = "NA"
                random_precision = int(round(float(100) * float(hhe_len) / float(len(all_hps.keys()))))
                if match_len != 0:
                    pval = utils_stats.prob3(len(all_hps.keys()), preds_len, hhe_len, match_len)
                else:
                    pval = "No Matches"
                fout.write(
                    predtype
                    + "\t"
                    + vp
                    + "\t"
                    + str(hhe_len)
                    + "\t"
                    + str(preds_len)
                    + "\t"
                    + str(match_len)
                    + "\t"
                    + str(precision)
                    + "\t"