Ejemplo n.º 1
0
def size_dist(inputs, paths_in, paths_out):

    files = inputs['files']
    path_figure = paths_out['path_figures']

    plot_num = 0
    sns.set_style("white")
    plt.figure(figsize=(5, 3))

    naming = ''

    for fname in files:

        naming += fname + '_'

        path_analysis = paths_out['path_analysis'] + fname + '/readQC/'
        data = ribo_util.unPickle(path_analysis + 'read_distribution')
        df = ribo_util.dict_to_df(data, 'Length', 'fraction of total')
        plt.plot(df, label=fname)
        plt.title('Size Distribution')
        plt.xlabel("Read Length")
        plt.ylabel("Percent of Reads")
        plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

    plt.savefig(path_figure + 'Comparison/size_dist' + '/sizedist_' + naming +
                '.pdf',
                dpi=400,
                bbox_inches="tight")
    plt.show()

    for fname in files:

        sns.set_style("white")
        plt.figure(figsize=(5, 3))

        path_analysis = paths_out['path_analysis'] + fname + '/readQC/'
        data = ribo_util.unPickle(path_analysis + 'read_distribution')
        df = ribo_util.dict_to_df(data, 'Length', 'fraction of total')
        plt.plot(df, label=fname)
        plt.title('Size Distribution')
        plt.xlabel("Read Length")
        plt.ylabel("Percent of Reads")
        plt.legend(loc='upper right')

        plt.savefig(path_figure + fname + '/sizedist.pdf',
                    dpi=400,
                    bbox_inches="tight")
        size_plot_csv = pd.DataFrame(df)
        size_plot_csv.to_csv(path_analysis + 'size_plot_values.csv')
        plt.gcf().clear()
Ejemplo n.º 2
0
def plot_avggene(inputs, paths_in, paths_out, settings, settings_plot):

    files = inputs['files']
    shift = settings_plot['shift']
    hmmax = settings_plot['HM_max']
    ymax = settings_plot['ymax']

    path_figure = paths_out['path_figures']

    minlength = settings['minlength']
    maxlength = settings['maxlength']
    length_in_ORF = settings['length_in_ORF']
    length_out_ORF = settings['length_out_ORF']
    density_type = settings['density_type']
    next_gene = settings['next_gene']
    equal_weight = settings['equal_weight']
    threshold = settings['threshold']

    minlength_1 = str(minlength) + '_'
    maxlength_1 = str(maxlength) + '_'
    length_in_ORF_1 = str(length_in_ORF) + '_'
    length_out_ORF_1 = str(length_out_ORF) + '_'
    density_type_1 = density_type + '_'
    next_gene_1 = str(next_gene) + '_'
    equal_weight_1 = equal_weight + '_'
    threshold_1 = str(threshold) + '_'

    name_settings = length_in_ORF_1 + length_out_ORF_1 + next_gene_1 + threshold_1
    name_settings += density_type_1 + equal_weight_1 + minlength_1 + maxlength_1

    for fname in files:
        plot_num = 0

        sns.set_style("white")
        plt.figure(figsize=(20, 5))
        path_analysis = paths_out['path_analysis'] + fname + '/avggenes/'

        data_start = ribo_util.unPickle(path_analysis + 'avg_start_' +
                                        name_settings + '_all')
        data_stop = ribo_util.unPickle(path_analysis + 'avg_stop_' +
                                       name_settings + '_all')
        data_startHM = ribo_util.unPickle(path_analysis + 'avg_start_' +
                                          name_settings + '_HM')
        data_stopHM = ribo_util.unPickle(path_analysis + 'avg_stop_' +
                                         name_settings + '_HM')

        xmax = len(data_start.keys())

        data_start = ribo_util.dict_to_df(data_start, 'Position', 'Reads')
        data_stop = ribo_util.dict_to_df(data_stop, 'Position', 'Reads')
        data_startHM = ribo_util.heatmapdict_to_df(data_startHM, 'Length',
                                                   'Position', 'composition')
        data_stopHM = ribo_util.heatmapdict_to_df(data_stopHM, 'Length',
                                                  'Position', 'composition')

        data_startHM = data_startHM.reindex(index=data_startHM.index[::-1])
        data_stopHM = data_stopHM.reindex(index=data_stopHM.index[::-1])

        data_start.to_csv(path_analysis + 'start_all.csv')
        data_stop.to_csv(path_analysis + 'stop_all.csv')

        max_start = data_start["Reads"].max()
        max_stop = data_stop["Reads"].max()

        if ymax == 0:
            if max_start > max_stop:
                ymax = max_start
            else:
                ymax = max_stop

        for graph in ['Start', 'Stop']:
            plot_num += 1

            if graph == 'Start':
                data = data_start
            elif graph == 'Stop':
                data = data_stop

            plt.subplot(2, 2, plot_num)
            plt.plot(
                data,
                sns.xkcd_rgb["dark grey"],
            )
            plt.title(fname + ' ' + graph)
            plt.ylabel("Reads")
            plt.ylim(0, ymax)
            plt.xlim(0, xmax)
            sns.despine()
        for graph in ['startHM', 'stopHM']:
            plot_num += 1
            if graph == 'startHM':
                dataHM = data_startHM
            elif graph == 'stopHM':
                dataHM = data_stopHM

            plt.subplot(2, 2, plot_num)
            plot = sns.heatmap(dataHM,
                               cmap="ocean_r",
                               vmin=0,
                               vmax=hmmax,
                               cbar=False)

            plt.setp(plot.get_xticklabels(), visible=False)
            plt.setp(plot.get_xticklabels()[0::10], visible=True)
            plt.setp(plot.get_yticklabels(), visible=False)
            plt.setp(plot.get_yticklabels()[0::4], visible=True)

    plt.savefig(path_figure + fname + '/avggene_' + name_settings + '.pdf',
                dpi=400)
    plt.show()
Ejemplo n.º 3
0
def plot_avggene_end(inputs, paths_in, paths_out, settings):

    files = inputs['files']
    shift = settings['shift']
    hmmax = settings['HM_max']

    for fname in files:
        plot_num = 0
        plt.figure(figsize=(20, 5))
        path_analysis = paths_out['path_analysis'] + fname + '/'

        data_start = ribo_util.unPickle(path_analysis + 'avg_start_all_end')
        data_stop = ribo_util.unPickle(path_analysis + 'avg_stop_all_end')
        data_startHM = ribo_util.unPickle(path_analysis + 'avg_start_HM_end')
        data_stopHM = ribo_util.unPickle(path_analysis + 'avg_stop_HM_end')

        xmax = len(data_start.keys())

        data_start = ribo_util.dict_to_df(data_start, 'Position', 'Reads')
        data_stop = ribo_util.dict_to_df(data_stop, 'Position', 'Reads')
        data_startHM = ribo_util.heatmapdict_to_df(data_startHM, 'Length',
                                                   'Position', 'composition')
        data_stopHM = ribo_util.heatmapdict_to_df(data_stopHM, 'Length',
                                                  'Position', 'composition')

        data_start.to_csv(path_analysis + 'start_all.csv')
        data_stop.to_csv(path_analysis + 'stop_all.csv')

        max_start = data_start["Reads"].max()
        max_stop = data_stop["Reads"].max()

        if max_start > max_stop:
            ymax = max_start
        else:
            ymax = max_stop

        for graph in ['Start', 'Stop']:
            plot_num += 1

            if graph == 'Start':
                data = data_start
            elif graph == 'Stop':
                data = data_stop

            plt.subplot(2, 2, plot_num)
            plt.plot(
                data,
                sns.xkcd_rgb["dark grey"],
            )
            plt.title(fname + ' ' + graph)
            plt.ylabel("Reads")
            plt.ylim(0, ymax)
            plt.xlim(0, xmax)
            sns.despine()
        for graph in ['startHM', 'stopHM']:
            plot_num += 1
            if graph == 'startHM':
                dataHM = data_startHM
            elif graph == 'stopHM':
                dataHM = data_stopHM

            plt.subplot(2, 2, plot_num)
            plot = sns.heatmap(dataHM,
                               cmap="ocean_r",
                               vmin=0,
                               vmax=hmmax,
                               cbar=False)

            plt.setp(plot.get_xticklabels(), visible=False)
            plt.setp(plot.get_xticklabels()[0::10], visible=True)
            plt.setp(plot.get_yticklabels(), visible=False)
            plt.setp(plot.get_yticklabels()[0::4], visible=True)

    plt.show()