def saveFig(fig_fileprefix, bbox=True): PL.rcParams['svg.fonttype'] = 'none' for ftype in ['svg', 'png']: if FIG_TYPE == 'both' or FIG_TYPE == ftype: if bbox: PL.savefig(getPlotDir() + '/' + fig_fileprefix + '.' + ftype, bbox_inches='tight') else: PL.savefig(getPlotDir() + '/' + fig_fileprefix + '.' + ftype)
def plotPercScatterAnalysis(data, label='test', y_axis = 'Percent Non-Null Reads', plot_scatters=False, plot_regr_lines=False, scatter_mh_lens=[], mh_lens=[9]): plot_dir = getPlotDir() regr_lines = {} for mh_len in mh_lens: mh_data = data.loc[data['MH Len'] == mh_len] mh_rdata = mh_data.loc[(mh_data['MH Dist'] >= 0) & (mh_data['MH Dist'] < (30-mh_len)) ] regr = linear_model.LinearRegression() rx, ry = mh_rdata[['MH Dist']], mh_rdata[[y_axis]] #np.log(mh_rdata[[y_axis]]) regr.fit(rx, ry) corr = scipy.stats.pearsonr(rx, ry) min_x, max_x = rx.min()[0], rx.max()[0] x_pts = [min_x, max_x] regr_lines[mh_len] = (x_pts,[regr.predict(x)[0] for x in x_pts],corr[0]) if plot_scatters and mh_len in scatter_mh_lens: fig = PL.figure(figsize=(5,5)) PL.plot( mh_data['MH Dist'], mh_data[y_axis], '.', alpha=0.4 ) PL.plot(regr_lines[mh_len][0],regr_lines[mh_len][1],'dodgerblue',linewidth=3) PL.xlabel('Distance between nearest ends of\nmicrohomologous sequences',fontsize=14) PL.ylabel('Percent of mutated reads of corresponding\nMH-mediated deletion',fontsize=14) PL.tick_params(labelsize=14) PL.xlim((0,20)) PL.title('Microhomology of length %d (r=%.2f)' % (mh_len,corr[0]),fontsize=14) PL.show(block=False) saveFig('mh_scatter_len%d_%s' % (mh_len,label.split('/')[-1])) if plot_regr_lines: fig = PL.figure() output_data = {} for mh_len in mh_lens: fit_data = regr_lines[mh_len] if mh_len > 15: continue lsty = '--' if mh_len < 9 else '-' PL.plot(fit_data[0], fit_data[1], linewidth=2, linestyle=lsty, label='MH length %d (R=%.1f)' % (mh_len, fit_data[2])) PL.title(label,fontsize=18) PL.xlabel('Distance between nearest ends of\nmicrohomologous sequences',fontsize=14) PL.ylabel('Percent of mutated reads of corresponding\nMH-mediated deletion',fontsize=14) PL.tick_params(labelsize=18) PL.legend() PL.ylim((0,100)) PL.show(block=False) saveFig(plot_dir + '/mh_scatter_all_len_%s' % label.split('/')[-1]) return regr_lines
def plotVerticalHistSummary(all_result_outputs, label='', data_label='', y_label='', plot_label='', hist_width=1000, hist_bins=100, oligo_id_str='Oligo ID', val_str='Cut Rate', total_reads_str='Total Reads'): datas = [x[0][data_label][0] for x in all_result_outputs] sample_names = [shortDirLabel(x[1]) for x in all_result_outputs] merged_data = pd.merge(datas[0], datas[1], how='inner', on=oligo_id_str, suffixes=['', ' 2']) for i, data in enumerate(datas[2:]): merged_data = pd.merge(merged_data, data, how='inner', on=oligo_id_str, suffixes=['', ' %d' % (i + 3)]) suffix = lambda i: ' %d' % (i + 1) if i > 0 else '' xpos = [x * hist_width for x in range(len(sample_names))] PL.figure(figsize=(12, 8)) for i, label1 in enumerate(sample_names): dvs = merged_data[val_str + suffix(i)] PL.hist(dvs, bins=hist_bins, bottom=i * hist_width, orientation='horizontal') PL.xticks(xpos, sample_names, rotation='vertical') PL.ylabel(y_label) PL.title(label) PL.show(block=False) PL.savefig(getPlotDir() + '/%s_%s.png' % (plot_label, label.replace(' ', '_')), bbox_inches='tight')
def plotBoxPlotSummary(all_result_outputs, label='', data_label='', y_label='', plot_label=''): data_values = [x[0][data_label][0].values for x in all_result_outputs] sample_names = [ shortDirLabel(x[1]) + ' (%d)' % x[0][data_label][1] for x in all_result_outputs ] PL.figure(figsize=(12, 8)) for i, dvs in enumerate(data_values): PL.boxplot([dvs], positions=[i], showfliers=True, sym='.', widths=0.8) PL.xticks(range(len(sample_names)), sample_names, rotation='vertical') PL.ylabel(y_label) PL.title(label) PL.show(block=False) PL.savefig(getPlotDir() + '/%s_%s.png' % (plot_label, sanitizeLabel(label)), bbox_inches='tight')
def plotCorrelations(all_result_outputs, label='', data_label='', y_label='', plot_label='', plot_scatters=False, oligo_id_str='Oligo ID', val_str='Cut Rate', total_reads_str='Total Reads', scatter_samples={}, sdims=(0, 0), scatter_fig=None, add_leg=True): datas = [x[0][data_label][0] for x in all_result_outputs] sample_names = [shortDirLabel(x[1]) for x in all_result_outputs] merged_data = pd.merge(datas[0], datas[1], how='inner', on=oligo_id_str, suffixes=['', ' 2']) for i, data in enumerate(datas[2:]): merged_data = pd.merge(merged_data, data, how='inner', on=oligo_id_str, suffixes=['', ' %d' % (i + 3)]) suffix = lambda i: ' %d' % (i + 1) if i > 0 else '' N = len(sample_names) if plot_scatters: if scatter_fig is None: PL.figure() else: PL.figure(scatter_fig.number) s_dims = (N, N) if len(scatter_samples) == 0 else sdims pcorrs, scorrs = np.zeros((N, N)), np.zeros((N, N)) for i, label1 in enumerate(sample_names): for j, label2 in enumerate(sample_names): dvs1, dvs2, ids = merged_data[val_str + suffix(i)], merged_data[ val_str + suffix(j)], merged_data[oligo_id_str] pcorrs[i, j] = pearsonr(dvs1, dvs2)[0] scorrs[i, j] = spearmanr(dvs1, dvs2)[0] if plot_scatters: if (label1, label2) in scatter_samples: idx = scatter_samples[(label1, label2)] elif len(scatter_samples) == 0: idx = i * N + j + 1 else: continue PL.subplot(s_dims[0], s_dims[1], idx) trs1, trs2 = merged_data[ total_reads_str + suffix(i)], merged_data[total_reads_str + suffix(j)] for thr in [20, 50, 100, 500, 1000]: thr_dvs = [(dv1, dv2, id) for (dv1, dv2, tr1, tr2, id) in zip(dvs1, dvs2, trs1, trs2, ids) if (tr1 >= thr and tr2 >= thr)] pcorrs[i, j] = pearsonr([x[0] for x in thr_dvs], [x[1] for x in thr_dvs])[0] PL.plot([x[0] for x in thr_dvs], [x[1] for x in thr_dvs], '.', label='>%d Reads' % thr) PL.plot([0, 100], [0, 100], 'k--') PL.xlabel('K562 Replicate A') PL.ylabel('K562 Replicate B') if add_leg: PL.legend() PL.title('%s (%.2f)' % (y_label, pcorrs[i, j])) if plot_scatters: PL.subplots_adjust(left=0.05, right=0.95, top=0.9, bottom=0.1, hspace=0.4) PL.show(block=False) saveFig(plot_label) if not plot_scatters: PL.figure() PL.subplot(1, 2, 1) PL.imshow(pcorrs, cmap='hot', vmin=0.0, vmax=1.0, interpolation='nearest') PL.xticks(range(N), sample_names, rotation='vertical') PL.yticks(range(N), sample_names) PL.title(y_label + ': Pearson') PL.colorbar() PL.subplot(1, 2, 2) PL.imshow(scorrs, cmap='hot', vmin=0.0, vmax=1.0, interpolation='nearest') PL.xticks(range(N), sample_names, rotation='vertical') PL.yticks(range(N), sample_names) PL.title(y_label + ': Spearman') PL.colorbar() PL.show(block=False) PL.savefig(getPlotDir() + '/%s_%s.png' % (plot_label, sanitizeLabel(label)), bbox_inches='tight')