Beispiel #1
0
def saveFig(fig_fileprefix, bbox=True):
    PL.rcParams['svg.fonttype'] = 'none'
    for ftype in ['svg', 'png']:
        if FIG_TYPE == 'both' or FIG_TYPE == ftype:
            if bbox:
                PL.savefig(getPlotDir() + '/' + fig_fileprefix + '.' + ftype,
                           bbox_inches='tight')
            else:
                PL.savefig(getPlotDir() + '/' + fig_fileprefix + '.' + ftype)
def plotPercScatterAnalysis(data, label='test', y_axis = 'Percent Non-Null Reads', plot_scatters=False, plot_regr_lines=False, scatter_mh_lens=[], mh_lens=[9]):
    
    plot_dir = getPlotDir()
    regr_lines = {}
    for mh_len in mh_lens:
        mh_data = data.loc[data['MH Len'] == mh_len]
        mh_rdata = mh_data.loc[(mh_data['MH Dist'] >= 0) & (mh_data['MH Dist'] < (30-mh_len)) ]
        
        regr = linear_model.LinearRegression()
        rx, ry = mh_rdata[['MH Dist']], mh_rdata[[y_axis]] #np.log(mh_rdata[[y_axis]])
        regr.fit(rx, ry)
        corr = scipy.stats.pearsonr(rx, ry)
        min_x, max_x = rx.min()[0], rx.max()[0]
        x_pts = [min_x, max_x]
        regr_lines[mh_len] = (x_pts,[regr.predict(x)[0] for x in x_pts],corr[0])
        
        if plot_scatters and mh_len in scatter_mh_lens:
            fig = PL.figure(figsize=(5,5))
            PL.plot( mh_data['MH Dist'], mh_data[y_axis], '.', alpha=0.4 )
            PL.plot(regr_lines[mh_len][0],regr_lines[mh_len][1],'dodgerblue',linewidth=3)
        
            PL.xlabel('Distance between nearest ends of\nmicrohomologous sequences',fontsize=14)
            PL.ylabel('Percent of mutated reads of corresponding\nMH-mediated deletion',fontsize=14)
            PL.tick_params(labelsize=14)
            PL.xlim((0,20))
            PL.title('Microhomology of length %d (r=%.2f)' % (mh_len,corr[0]),fontsize=14)
            PL.show(block=False)  
            saveFig('mh_scatter_len%d_%s' % (mh_len,label.split('/')[-1])) 
    
    if plot_regr_lines:
        fig = PL.figure()
        output_data = {}
        for mh_len in mh_lens:
            fit_data = regr_lines[mh_len]
            if mh_len > 15:
                continue
            lsty = '--' if mh_len < 9 else '-'
            PL.plot(fit_data[0], fit_data[1], linewidth=2, linestyle=lsty, label='MH length %d (R=%.1f)' % (mh_len, fit_data[2]))
        PL.title(label,fontsize=18)
        PL.xlabel('Distance between nearest ends of\nmicrohomologous sequences',fontsize=14)
        PL.ylabel('Percent of mutated reads of corresponding\nMH-mediated deletion',fontsize=14)
          
        PL.tick_params(labelsize=18)
        PL.legend()
        PL.ylim((0,100))
        PL.show(block=False)  
        saveFig(plot_dir + '/mh_scatter_all_len_%s' % label.split('/')[-1]) 
    return regr_lines
Beispiel #3
0
def plotVerticalHistSummary(all_result_outputs,
                            label='',
                            data_label='',
                            y_label='',
                            plot_label='',
                            hist_width=1000,
                            hist_bins=100,
                            oligo_id_str='Oligo ID',
                            val_str='Cut Rate',
                            total_reads_str='Total Reads'):

    datas = [x[0][data_label][0] for x in all_result_outputs]
    sample_names = [shortDirLabel(x[1]) for x in all_result_outputs]

    merged_data = pd.merge(datas[0],
                           datas[1],
                           how='inner',
                           on=oligo_id_str,
                           suffixes=['', ' 2'])
    for i, data in enumerate(datas[2:]):
        merged_data = pd.merge(merged_data,
                               data,
                               how='inner',
                               on=oligo_id_str,
                               suffixes=['', ' %d' % (i + 3)])
    suffix = lambda i: ' %d' % (i + 1) if i > 0 else ''

    xpos = [x * hist_width for x in range(len(sample_names))]

    PL.figure(figsize=(12, 8))
    for i, label1 in enumerate(sample_names):
        dvs = merged_data[val_str + suffix(i)]
        PL.hist(dvs,
                bins=hist_bins,
                bottom=i * hist_width,
                orientation='horizontal')
    PL.xticks(xpos, sample_names, rotation='vertical')
    PL.ylabel(y_label)
    PL.title(label)
    PL.show(block=False)
    PL.savefig(getPlotDir() + '/%s_%s.png' %
               (plot_label, label.replace(' ', '_')),
               bbox_inches='tight')
Beispiel #4
0
def plotBoxPlotSummary(all_result_outputs,
                       label='',
                       data_label='',
                       y_label='',
                       plot_label=''):

    data_values = [x[0][data_label][0].values for x in all_result_outputs]
    sample_names = [
        shortDirLabel(x[1]) + ' (%d)' % x[0][data_label][1]
        for x in all_result_outputs
    ]

    PL.figure(figsize=(12, 8))
    for i, dvs in enumerate(data_values):
        PL.boxplot([dvs], positions=[i], showfliers=True, sym='.', widths=0.8)
    PL.xticks(range(len(sample_names)), sample_names, rotation='vertical')
    PL.ylabel(y_label)
    PL.title(label)
    PL.show(block=False)
    PL.savefig(getPlotDir() + '/%s_%s.png' %
               (plot_label, sanitizeLabel(label)),
               bbox_inches='tight')
Beispiel #5
0
def plotCorrelations(all_result_outputs,
                     label='',
                     data_label='',
                     y_label='',
                     plot_label='',
                     plot_scatters=False,
                     oligo_id_str='Oligo ID',
                     val_str='Cut Rate',
                     total_reads_str='Total Reads',
                     scatter_samples={},
                     sdims=(0, 0),
                     scatter_fig=None,
                     add_leg=True):

    datas = [x[0][data_label][0] for x in all_result_outputs]
    sample_names = [shortDirLabel(x[1]) for x in all_result_outputs]

    merged_data = pd.merge(datas[0],
                           datas[1],
                           how='inner',
                           on=oligo_id_str,
                           suffixes=['', ' 2'])
    for i, data in enumerate(datas[2:]):
        merged_data = pd.merge(merged_data,
                               data,
                               how='inner',
                               on=oligo_id_str,
                               suffixes=['', ' %d' % (i + 3)])
    suffix = lambda i: ' %d' % (i + 1) if i > 0 else ''

    N = len(sample_names)
    if plot_scatters:
        if scatter_fig is None: PL.figure()
        else: PL.figure(scatter_fig.number)
        s_dims = (N, N) if len(scatter_samples) == 0 else sdims
    pcorrs, scorrs = np.zeros((N, N)), np.zeros((N, N))
    for i, label1 in enumerate(sample_names):
        for j, label2 in enumerate(sample_names):
            dvs1, dvs2, ids = merged_data[val_str + suffix(i)], merged_data[
                val_str + suffix(j)], merged_data[oligo_id_str]
            pcorrs[i, j] = pearsonr(dvs1, dvs2)[0]
            scorrs[i, j] = spearmanr(dvs1, dvs2)[0]
            if plot_scatters:
                if (label1, label2) in scatter_samples:
                    idx = scatter_samples[(label1, label2)]
                elif len(scatter_samples) == 0:
                    idx = i * N + j + 1
                else:
                    continue
                PL.subplot(s_dims[0], s_dims[1], idx)
                trs1, trs2 = merged_data[
                    total_reads_str + suffix(i)], merged_data[total_reads_str +
                                                              suffix(j)]
                for thr in [20, 50, 100, 500, 1000]:
                    thr_dvs = [(dv1, dv2, id)
                               for (dv1, dv2, tr1, tr2,
                                    id) in zip(dvs1, dvs2, trs1, trs2, ids)
                               if (tr1 >= thr and tr2 >= thr)]
                    pcorrs[i, j] = pearsonr([x[0] for x in thr_dvs],
                                            [x[1] for x in thr_dvs])[0]
                    PL.plot([x[0] for x in thr_dvs], [x[1] for x in thr_dvs],
                            '.',
                            label='>%d Reads' % thr)

                PL.plot([0, 100], [0, 100], 'k--')
                PL.xlabel('K562 Replicate A')
                PL.ylabel('K562 Replicate B')
                if add_leg: PL.legend()
                PL.title('%s (%.2f)' % (y_label, pcorrs[i, j]))
    if plot_scatters:
        PL.subplots_adjust(left=0.05,
                           right=0.95,
                           top=0.9,
                           bottom=0.1,
                           hspace=0.4)
        PL.show(block=False)
        saveFig(plot_label)

    if not plot_scatters:
        PL.figure()
        PL.subplot(1, 2, 1)
        PL.imshow(pcorrs,
                  cmap='hot',
                  vmin=0.0,
                  vmax=1.0,
                  interpolation='nearest')
        PL.xticks(range(N), sample_names, rotation='vertical')
        PL.yticks(range(N), sample_names)
        PL.title(y_label + ': Pearson')
        PL.colorbar()
        PL.subplot(1, 2, 2)
        PL.imshow(scorrs,
                  cmap='hot',
                  vmin=0.0,
                  vmax=1.0,
                  interpolation='nearest')
        PL.xticks(range(N), sample_names, rotation='vertical')
        PL.yticks(range(N), sample_names)
        PL.title(y_label + ': Spearman')
        PL.colorbar()
        PL.show(block=False)
        PL.savefig(getPlotDir() + '/%s_%s.png' %
                   (plot_label, sanitizeLabel(label)),
                   bbox_inches='tight')