def missing_plot(cgp_missing, out_path, gencode):
    results, categories = convert_dicts_to_dataframe(cgp_missing, norm=False)
    base_title = (
        "Breakdown of the number of missing genes/transcripts rescued by Comparative Augustus\n"
        "to the consensus gene set derived from the annotation set {}")
    title = base_title.format(gencode)
    out_name = "{}_{}_cgp_consensus".format(gencode, "missing_genes")
    plot_lib.side_by_side_unequal_barplot(results, categories, out_path,
                                          out_name, title)
def join_genes_plot(cgp_join_genes, out_path, gencode):
    results, categories = convert_dicts_to_dataframe(cgp_join_genes,
                                                     norm=False)
    base_title = (
        "How many CGP consensus transcripts join TMR transcripts in a supported fashion\n"
        "to the consensus gene set derived from the annotation set {}")
    title = base_title.format(gencode)
    out_name = "{}_{}_cgp_consensus".format(gencode, "join_genes")
    plot_lib.side_by_side_unequal_barplot(results, categories, out_path,
                                          out_name, title)
Ejemplo n.º 3
0
def gene_fail_plot(gene_fail_evals, out_path, gencode, biotype):
    results, categories = convert_dicts_to_dataframe(gene_fail_evals)
    base_title = "Breakdown of {} genes that failed consensus finding\nfrom annotation set {}"
    title = base_title.format(biotype, gencode)
    out_name = "{}_{}_{}_consensus".format(gencode, biotype, "GeneFail")
    plot_lib.stacked_unequal_barplot(results,
                                     categories,
                                     out_path,
                                     out_name,
                                     title,
                                     ylabel="Number of genes")
Ejemplo n.º 4
0
def transcript_gene_plot(evals, out_path, gencode, mode, biotype):
    results, categories = convert_dicts_to_dataframe(evals, norm=True)
    total = find_total(evals)
    base_title = "Breakdown of {:,} {} {} categorized by consensus finding\nfrom annotation set {}"
    title = base_title.format(total, biotype, mode, gencode)
    out_name = "{}_{}_{}_consensus".format(gencode, biotype, mode)
    palette = etc.config.palette if mode == "genes" or biotype != "protein_coding" else etc.config.triple_palette
    plot_lib.stacked_barplot(results,
                             categories,
                             out_path,
                             out_name,
                             title,
                             color_palette=palette)
def replace_plot(cgp_replace, out_path, gencode):
    results, categories = convert_dicts_to_dataframe(cgp_replace, norm=False)
    base_title = (
        "Breakdown of the number of transMap/augustusTMR consensus transcripts replaced by augustusCGP\n"
        "from the consensus gene set derived from the annotation set {}")
    title = base_title.format(gencode)
    out_name = "{}_{}_cgp_consensus".format(gencode, "transcript_replacement")
    plot_lib.side_by_side_unequal_barplot(results,
                                          categories,
                                          out_path,
                                          out_name,
                                          title,
                                          ylabel="Count")
def addition_plot(cgp_additions, out_path, gencode):
    results, categories = convert_dicts_to_dataframe(cgp_additions, norm=False)
    base_title = (
        "Breakdown of the number of new genes/transcripts introduced by Comparative Augustus\n"
        "to the consensus gene set derived from the annotation set {}")
    title = base_title.format(gencode)
    out_name = "{}_{}_cgp_consensus".format(gencode, "gene_addition")
    plot_lib.side_by_side_unequal_barplot(results,
                                          categories,
                                          out_path,
                                          out_name,
                                          title,
                                          ylabel="Count")
Ejemplo n.º 7
0
def biotype_stacked_plot(counter, out_path, gencode, mode):
    results, categories = convert_dicts_to_dataframe(counter)
    if gencode == "GencodePseudoGeneVM7":
        categories = ["\n".join(x.split(" ")) for x in categories]
    base_title = "Biotype breakdown in final {} set derived\nfrom annotation set {}"
    title = base_title.format(mode.lower(), gencode)
    out_name = "{}_{}_{}_consensus".format(gencode, mode, "biotypeStackedPlot")
    plot_lib.stacked_unequal_barplot(results,
                                     categories,
                                     out_path,
                                     out_name,
                                     title,
                                     ylabel="Number of {}s".format(
                                         mode.lower()))
def consensus_stats_plot(consensus_stats, out_path, gencode):
    # make this pandas-multi-indexable. TODO: do everything in pandas.
    #reform = {(outer_key, inner_key): values for outer_key, inner_dict in consensus_stats.iteritems() for
    #          inner_key, values in inner_dict.iteritems()}
    categories = ["Transcript", "Gene"]
    for cat in categories:
        data = OrderedDict((x, y[cat]) for x, y in consensus_stats.iteritems())
        results, categories = convert_dicts_to_dataframe(data, norm=False)
        base_title = (
            "Breakdown of the origins of the final consensus {} set\n"
            "to the consensus gene set derived from the annotation set {}")
        title = base_title.format(cat, gencode)
        out_name = "{}_{}_{}_cgp_consensus".format(
            cat, gencode, "consensus_overall_metrics")
        plot_lib.stacked_unequal_barplot(results,
                                         categories,
                                         out_path,
                                         out_name,
                                         title,
                                         ylabel="Number of {}".format(cat))