Beispiel #1
0
def main(cmdline=None):
    parser = make_parser()
    args = parser.parse_args(cmdline)

    annotation = load_gtf_cache(args.gtf_cache)
    protein_coding = protein_coding_gene_ids(annotation)

    toplot = OrderedDict()
    for filename in args.filenames:
        all_quantifications = pandas.read_csv(filename, header=0, index_col=0)
        if 'gene_name' in all_quantifications.columns:
            columns = [
                c for c in all_quantifications.columns if c != 'gene_name'
            ]
            all_quantifications = all_quantifications[columns]
        protein_quantifications = all_quantifications.loc[protein_coding]

        _, filename = os.path.split(filename)
        basename, _ = os.path.splitext(filename)
        png_name = basename + '.png'
        csv_name = 'genes-detected_' + basename + '.csv'

        binned_quantifications = bin_library_quantification(
            protein_quantifications, args.quantification)
        binned_quantifications.to_csv(csv_name)

        f = plot_gene_detection_histogram(
            binned_quantifications,
            basename,
            show_genes_detected=not args.hide_detected_sum)
        toplot[png_name] = f

    save_fixed_height(toplot)
def main(cmdline=None):
    parser = make_parser()
    args = parser.parse_args(cmdline)

    annotation = load_gtf_cache(args.gtf_cache)
    protein_coding = protein_coding_gene_ids(annotation)

    toplot = OrderedDict()
    for filename in args.filenames:
        all_quantifications = pandas.read_csv(filename, header=0, index_col=0)
        if 'gene_name' in all_quantifications.columns:
            columns = [ c for c in all_quantifications.columns if c != 'gene_name']
            all_quantifications = all_quantifications[columns]
        protein_quantifications = all_quantifications.loc[protein_coding]
        
        _, filename = os.path.split(filename)
        basename, _ = os.path.splitext(filename)
        png_name = basename + '.png'
        csv_name = 'genes-detected_' + basename + '.csv'

        binned_quantifications = bin_library_quantification(protein_quantifications, 'FPKM')
        binned_quantifications.to_csv(csv_name)

        f = plot_gene_detection_histogram(binned_quantifications,
                                          basename,
                                          show_genes_detected=not args.hide_detected_sum)
        toplot[png_name] = f

    save_fixed_height(toplot)
def make_experiment_by_library_coverage_plots(experiments, coverage,
                                              output_format, bare):
    """Coverage plot showing all the libraries for an experiment
    """
    tosave = OrderedDict()

    for experiment_name, experiment_row in experiments.iterrows():
        library_ids = experiment_row['replicates']
        image_name = experiment_name + '.coverage.' + output_format
        f = make_coverage_plot(experiment_name, coverage[library_ids])
        tosave[image_name] = f

    save_fixed_height(tosave)
def make_combined_median_normalized_summary(experiments, coverage,
                                            output_format, bare):
    """Coverage plot showing the median +/-sd of all libraries for an experiment
    """
    assert isinstance(experiments, pandas.DataFrame)
    tosave = OrderedDict()
    library_ids = []
    for experiment in experiments.index:
        library_ids.extend(experiments['replicates'][experiment])

    f = make_median_normalized_summary(experiment, library_ids, coverage, bare)
    if bare:
        plot_suffix = '.median-normalized.coverage.bare.'
    else:
        plot_suffix = '.median-normalized.coverage.'

    image_name = experiment + plot_suffix + output_format
    f.savefig(image_name)
    tosave[image_name] = f
    save_fixed_height(tosave)
def make_by_experiment_median_summary(experiments, coverage, output_format,
                                      bare):
    """Coverage plot showing the median +/-sd of all libraries for an experiment
    """
    tosave = OrderedDict()
    with pyplot.style.context('seaborn-dark-palette'):
        for experiment in experiments.index:
            f = pyplot.figure(dpi=100)
            ax = f.add_subplot(1, 1, 1)

            add_median_plot(ax, experiments, experiment, coverage, bare)

            ax.set_title('Median coverage for {}'.format(experiment))
            ax.set_xlabel("position quantile (5' to 3')")
            ax.set_ylabel('Read depth')
            ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0)
            image_name = experiment + '.median.coverage.' + output_format
            tosave[image_name] = f

        save_fixed_height(tosave)
def make_per_experiment_median_normalized_summary(experiments, coverage,
                                                  output_format, bare):
    """Coverage plot showing the median +/-sd of all libraries for each experiment
    """
    assert isinstance(experiments, pandas.DataFrame)
    tosave = OrderedDict()
    library_ids = []
    if bare:
        plot_suffix = '.median-normalized.coverage.bare.'
    else:
        plot_suffix = '.median-normalized.coverage.'
    for experiment in experiments.index:
        library_ids = experiments['replicates'][experiment]

        f = make_median_normalized_summary(experiment, library_ids, coverage,
                                           bare)
        ax = f.get_axes()[0]
        ax.set_title('Median normalized coverage for {}'.format(
            experiment.replace('_', ' ')))
        image_name = experiment + plot_suffix + output_format
        f.savefig(image_name)
        tosave[image_name] = f
    save_fixed_height(tosave)