Beispiel #1
0
def run_all(gbfile, groupfile, transfile, tagFiles):
    # does this filter out replicates that appear in every tag?
    annodb, al, dna = analyze.read_genbank_annots(gbfile)
    snps = analyze.read_tag_files(tagFiles)
    gsd = analyze.map_snps(snps, al, dna)

    # Count nonsyn vs. syn. sites for each gene
    site_counts = genes_sites_dict(annodb)
    # Count nonsyn vs. syn. snps for each gene
    snp_counts = analyze.get_gene_na_ns(gsd)

    genes = annodb.keys()
    functional_groups = load_func_assoc(groupfile, transfile)

    #binomial_tests(snp_counts, site_counts)
    #main(site_counts, snp_counts, genes, functional_groups, test_func=binomial_test)
    for name, test_func in [("Fisher test", fisher_test),
                            ("Binomial Test", binomial_test)]:
        print name
        run_tests(site_counts,
                  snp_counts,
                  genes,
                  functional_groups,
                  test_func=test_func)
        print
Beispiel #2
0
def hypergeom_cmd(gbfile, groupfile, transfile, tagFiles, N=30):
    top_genes = phenoseq_top_genes(gbfile, tagFiles)
    pathway_dict = load_func_assoc(groupfile, transfile)
    top_genes_subset = [y for (x,y) in top_genes[:N]]
    #print top_genes_subset
    results = []
    for name, genes_ in pathway_dict.items():
        genes = genes_[1]
        num_genes_int_top_list = len([g for g in genes if g in top_genes_subset])
        
        if num_genes_int_top_list:
            pval = p_value(len(genes), num_genes_int_top_list,
                           len(top_genes_subset))
            if isnan(pval):
                warnings.warn('ignoring invalid NaN pvalues...')
            else:
                results.append( (len(pathway_dict) * pval, name,
                                 len(genes), genes))
    results.sort()
    for p, name, n, genes in results:
        print ",".join(map(str, [p, name, n, " ".join(genes)]))
Beispiel #3
0
def run_all(gbfile, groupfile, transfile, tagFiles):
    # does this filter out replicates that appear in every tag?
    annodb, al, dna = analyze.read_genbank_annots(gbfile)
    snps = analyze.read_tag_files(tagFiles)
    gsd = analyze.map_snps(snps, al, dna)

    # Count nonsyn vs. syn. sites for each gene
    site_counts = genes_sites_dict(annodb)
    # Count nonsyn vs. syn. snps for each gene
    snp_counts = analyze.get_gene_na_ns(gsd)
    
    genes = annodb.keys()
    functional_groups = load_func_assoc(groupfile, transfile)

    #binomial_tests(snp_counts, site_counts)
    #main(site_counts, snp_counts, genes, functional_groups, test_func=binomial_test)
    for name, test_func in [("Fisher test", fisher_test), ("Binomial Test", binomial_test)]:
        print name
        run_tests(site_counts, snp_counts, genes, functional_groups,
                  test_func=test_func)
        print
Beispiel #4
0
def hypergeom_cmd(gbfile, groupfile, transfile, tagFiles, N=30):
    top_genes = phenoseq_top_genes(gbfile, tagFiles)
    pathway_dict = load_func_assoc(groupfile, transfile)
    top_genes_subset = [y for (x, y) in top_genes[:N]]
    #print top_genes_subset
    results = []
    for name, genes_ in pathway_dict.items():
        genes = genes_[1]
        num_genes_int_top_list = len(
            [g for g in genes if g in top_genes_subset])

        if num_genes_int_top_list:
            pval = p_value(len(genes), num_genes_int_top_list,
                           len(top_genes_subset))
            if isnan(pval):
                warnings.warn('ignoring invalid NaN pvalues...')
            else:
                results.append(
                    (len(pathway_dict) * pval, name, len(genes), genes))
    results.sort()
    for p, name, n, genes in results:
        print ",".join(map(str, [p, name, n, " ".join(genes)]))