def missing_plot(cgp_missing, out_path, gencode): results, categories = convert_dicts_to_dataframe(cgp_missing, norm=False) base_title = ( "Breakdown of the number of missing genes/transcripts rescued by Comparative Augustus\n" "to the consensus gene set derived from the annotation set {}") title = base_title.format(gencode) out_name = "{}_{}_cgp_consensus".format(gencode, "missing_genes") plot_lib.side_by_side_unequal_barplot(results, categories, out_path, out_name, title)
def join_genes_plot(cgp_join_genes, out_path, gencode): results, categories = convert_dicts_to_dataframe(cgp_join_genes, norm=False) base_title = ( "How many CGP consensus transcripts join TMR transcripts in a supported fashion\n" "to the consensus gene set derived from the annotation set {}") title = base_title.format(gencode) out_name = "{}_{}_cgp_consensus".format(gencode, "join_genes") plot_lib.side_by_side_unequal_barplot(results, categories, out_path, out_name, title)
def gene_fail_plot(gene_fail_evals, out_path, gencode, biotype): results, categories = convert_dicts_to_dataframe(gene_fail_evals) base_title = "Breakdown of {} genes that failed consensus finding\nfrom annotation set {}" title = base_title.format(biotype, gencode) out_name = "{}_{}_{}_consensus".format(gencode, biotype, "GeneFail") plot_lib.stacked_unequal_barplot(results, categories, out_path, out_name, title, ylabel="Number of genes")
def transcript_gene_plot(evals, out_path, gencode, mode, biotype): results, categories = convert_dicts_to_dataframe(evals, norm=True) total = find_total(evals) base_title = "Breakdown of {:,} {} {} categorized by consensus finding\nfrom annotation set {}" title = base_title.format(total, biotype, mode, gencode) out_name = "{}_{}_{}_consensus".format(gencode, biotype, mode) palette = etc.config.palette if mode == "genes" or biotype != "protein_coding" else etc.config.triple_palette plot_lib.stacked_barplot(results, categories, out_path, out_name, title, color_palette=palette)
def replace_plot(cgp_replace, out_path, gencode): results, categories = convert_dicts_to_dataframe(cgp_replace, norm=False) base_title = ( "Breakdown of the number of transMap/augustusTMR consensus transcripts replaced by augustusCGP\n" "from the consensus gene set derived from the annotation set {}") title = base_title.format(gencode) out_name = "{}_{}_cgp_consensus".format(gencode, "transcript_replacement") plot_lib.side_by_side_unequal_barplot(results, categories, out_path, out_name, title, ylabel="Count")
def addition_plot(cgp_additions, out_path, gencode): results, categories = convert_dicts_to_dataframe(cgp_additions, norm=False) base_title = ( "Breakdown of the number of new genes/transcripts introduced by Comparative Augustus\n" "to the consensus gene set derived from the annotation set {}") title = base_title.format(gencode) out_name = "{}_{}_cgp_consensus".format(gencode, "gene_addition") plot_lib.side_by_side_unequal_barplot(results, categories, out_path, out_name, title, ylabel="Count")
def biotype_stacked_plot(counter, out_path, gencode, mode): results, categories = convert_dicts_to_dataframe(counter) if gencode == "GencodePseudoGeneVM7": categories = ["\n".join(x.split(" ")) for x in categories] base_title = "Biotype breakdown in final {} set derived\nfrom annotation set {}" title = base_title.format(mode.lower(), gencode) out_name = "{}_{}_{}_consensus".format(gencode, mode, "biotypeStackedPlot") plot_lib.stacked_unequal_barplot(results, categories, out_path, out_name, title, ylabel="Number of {}s".format( mode.lower()))
def consensus_stats_plot(consensus_stats, out_path, gencode): # make this pandas-multi-indexable. TODO: do everything in pandas. #reform = {(outer_key, inner_key): values for outer_key, inner_dict in consensus_stats.iteritems() for # inner_key, values in inner_dict.iteritems()} categories = ["Transcript", "Gene"] for cat in categories: data = OrderedDict((x, y[cat]) for x, y in consensus_stats.iteritems()) results, categories = convert_dicts_to_dataframe(data, norm=False) base_title = ( "Breakdown of the origins of the final consensus {} set\n" "to the consensus gene set derived from the annotation set {}") title = base_title.format(cat, gencode) out_name = "{}_{}_{}_cgp_consensus".format( cat, gencode, "consensus_overall_metrics") plot_lib.stacked_unequal_barplot(results, categories, out_path, out_name, title, ylabel="Number of {}".format(cat))