def run(): config_params() np.random.seed(12345) treated_samples_full = pickle.load(gzip.open(Conf['treatment_FDA_drug'])) base_path = 'data/hartwig/signatures/extraction/results/SignatureAnalyzer/' exposures_path = base_path + 'snvs/exposures/Pan_full/Pan_full.exposures.tsv' # load exposures of Pan exp_snv = load_exposures(exposures_path) exposures_path = base_path + 'indels/exposures/Pan/Pan.exposures.tsv' # load exposures of Pan exp = load_exposures(exposures_path) treatment = 'RADIATION' breaks = [ 'Unknown', 'Topoisomerase Inhibitor', 'Anthracycline Topoisomerase Inhibitor', 'Alkylating Drug', 'Platinum-based Drug', 'Poly(ADP-Ribose) Polymerase Inhibitor', 'Miscellanious', 'Nucleoside Analog Antiviral', 'TOPOII', 'Nuclear therapy' ] samples_braca, samples_lowbraca = select_HRdeficient_samples(exp_snv) only_treated, not_breaked = select_treated_not_treated( treated_samples_full, treatment, breaks, exp) do_plot(samples_lowbraca, samples_braca, not_breaked, only_treated, exp)
def do_plot(exposures_pan, sig, MMR_notaffected_not_tzm_variants, MMR_affected_not_tzm_variants, MMR_notaffected_tzm_variants, MMR_affected_tzm_variants): config_params(6.5) fig, ax = plt.subplots(1, 1, figsize=(1.25, 1.5)) sns.stripplot(data=[ exposures_pan[MMR_notaffected_not_tzm_variants].loc[sig], exposures_pan[MMR_affected_not_tzm_variants].loc[sig], exposures_pan[MMR_notaffected_tzm_variants].loc[sig].tolist(), exposures_pan[MMR_affected_tzm_variants].loc[sig].tolist(), ], jitter=0.3, s=2, lw=0.5, color='#800080ff') ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) plt.xticks([0, 1, 2, 3], [ 'TZM untreated, MMR-def (n={})'.format( len(MMR_notaffected_not_tzm_variants)), 'TZM untreated, MMR-def (n={})'.format( len(MMR_affected_not_tzm_variants)), 'TZM treated, no MMR-def (n={})'.format( len(MMR_notaffected_tzm_variants)), 'TZM treated, MMR-def (n={})'.format(len(MMR_affected_tzm_variants)), ], rotation=90) plt.ylabel('TMZ related SBS') plt.savefig('figures/TZM_treated.svg') plt.close()
def specific_treatments_bar(drug): config_params(5) dplot_treat = defaultdict(int) typeDrug = pickle.load( gzip.open('data/clinical_data/hartwig_typeDrug.pckl.gz')) samples_tract_specific_FDA = pickle.load( gzip.open(Conf['treatment_specific_drug'])) for k in typeDrug[drug].keys(): number_treated = samples_tract_specific_FDA[k]['Pan']['YES'] dplot_treat[k] = len(number_treated) sorted_keys = sorted(dplot_treat, key=dplot_treat.get, reverse=True) fig, ax = plt.subplots(1, 1, figsize=(0.3, 1)) bottom = 0 for treat in sorted_keys: ax.bar(0, dplot_treat[treat], width=1, bottom=bottom) ax.text(-0.1, dplot_treat[treat] + bottom - 150, str(dplot_treat[treat])) bottom += dplot_treat[treat] plt.xticks([0], fontsize=5) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) plt.savefig('figures/2A_{}_barplot.svg'.format(drug.lower().replace( ' ', '_'))) plt.savefig('figures/2A_{}_barplot.png'.format(drug.lower().replace( ' ', '_')), dpi=600) plt.close()
def plot_distribution_tissue_origin(): # PLOT DISTRIBUTION NUMBER COHORTS file_metadata = Conf['path_metadata'] colors_ttype = return_colors() # read Hartwig metadata pat = pd.read_csv(file_metadata, sep='\t') pat['primaryTumorLocation'] = pat['primaryTumorLocation'].replace( 'Bone/soft tissue', 'Bone/Soft tissue') # fix primary location pat['primaryTumorLocation_fixed'] = pat['primaryTumorLocation'].apply( lambda x: str(x).replace(' ', '-').replace('/', '-')) pat['primaryTumorLocation_fixed'] = pat[ 'primaryTumorLocation_fixed'].replace('Head-and-Neck', 'Head-and-neck') pat['primaryTumorLocation_fixed'] = pat[ 'primaryTumorLocation_fixed'].replace('nan', 'Unknown') pat['primaryTumorLocation_fixed'] = pat[ 'primaryTumorLocation_fixed'].replace('CUP', 'Unknown') pat['primaryTumorLocation_fixed'] = pat[ 'primaryTumorLocation_fixed'].replace('Net', 'NET') dic_tumor = pat['primaryTumorLocation_fixed'].value_counts().to_dict() sorted_keys = sorted(dic_tumor, key=dic_tumor.get, reverse=True) config_params(font_size=5.5) fig, ax = plt.subplots(1, 1, figsize=(2, 2.35)) labels = [] count = 0 for ix, k in enumerate(sorted_keys[::-1]): if dic_tumor[k] > 20: if k in colors_ttype: count += 1 ax.barh(count, dic_tumor[k], color=colors_ttype[k]) ax.text(dic_tumor[k] + 40, count - 0.1, dic_tumor[k], verticalalignment='center') labels.append(k) plt.yticks([i + 1 for i in range(0, len(labels))], labels, rotation=0) ax.spines['right'].set_visible(False) ax.spines['bottom'].set_visible(False) plt.xlabel('Number of samples') ax.xaxis.tick_top() ax.xaxis.set_label_position('top') plt.tight_layout() ax.xaxis.set_tick_params(width=0.45) ax.yaxis.set_tick_params(width=0.45) plt.savefig('figures/1B.svg') plt.savefig('figures/1B.png', dpi=600) plt.show()
def plot_scatter_bars(expos, sig): non_treated, samples_FU, samples_capecitabine = get_treated_not_treated() vals_to_plot_FU, colors_FU, count_exposed_FU, count_total_FU = get_items_to_plot( samples_FU, expos, sig) vals_to_plot_capecitabine, colors_capecitabine, count_exposed_cape, count_total_cape = get_items_to_plot( samples_capecitabine, expos, sig) for ttype in ['Colon-Rectum', 'Breast']: config_params(font_size=7) fig, ax = plt.subplots(1, 1, figsize=(0.5, 1.25)) plt.yscale('log') ax.set_ylabel('SBS Capecitabine/5-FU') range_nums = get_range_nums(vals_to_plot_FU[ttype], 0) ax.scatter(range_nums, sorted(vals_to_plot_FU[ttype]), color=colors_FU[ttype], s=1, alpha=0.75) range_nums = get_range_nums(vals_to_plot_capecitabine[ttype], 1) ax.scatter(range_nums, sorted(vals_to_plot_capecitabine[ttype]), color=colors_capecitabine[ttype], s=1, alpha=0.75) print(len(vals_to_plot_FU[ttype]), len(vals_to_plot_capecitabine[ttype]), ttype) plt.xticks([0.5, 1.5], ['5-FU', 'Capecitabine'], rotation=90) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) plt.ylim(0, 10000) plt.savefig('figures/sigmoid_treat_{}.svg'.format(ttype)) plt.show() fig, ax = plt.subplots(1, 1, figsize=(0.5, 0.4)) ax.bar(0, count_exposed_FU[ttype] / count_total_FU[ttype], color='#2c89a0ff') ax.bar(1, count_exposed_cape[ttype] / count_total_cape[ttype], color='#2c89a0ff') plt.ylim(0, 1) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.set_ylabel('Proportion of samples\nwith activity') plt.savefig('figures/bar_treat_{}.svg'.format(ttype)) plt.show()
def do_volcano(significant_vetted, nonsignificant, significant_not_vetted, method_extraction): config_params() fig, ax = plt.subplots(1, 1, figsize=(4.2, 4)) size = 3 plt.hlines(-np.log10(0.001), 0, 10, linestyles="dashed", color='red', alpha=0.3) plt.vlines(2, 0, 7, linestyles="dashed", color='red', alpha=0.3) for i, row in significant_vetted.iterrows(): plt.scatter(row['effect_size'], row['logp'], c='#99d8c9', s=size, marker=row['format_type']) for i, row in nonsignificant.iterrows(): plt.scatter(row['effect_size'], row['logp'], c='grey', s=size, marker=row['format_type']) for i, row in significant_not_vetted.iterrows(): plt.scatter(row['effect_size'], row['logp'], c=row['color'], s=25, edgecolor='black', linewidths=0.7, marker=row['format_type']) plt.xlim(0, 16) plt.ylim(0, 3.1) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) plt.ylabel('statistical significance\n(-log10 pvalue)') plt.xlabel('treated-untreated fold change') plt.tight_layout() plt.savefig('figures/{}/volcano.svg'.format(method_extraction)) plt.savefig('figures/{}/volcano.png'.format(method_extraction), dpi=600) plt.show()
def do_plot(d_box, dttype, outfile, signature_2_treatment): np.random.seed(12345) config_params(6) toplot = [] labels = [] for sig in sorted(d_box, key=lambda k: np.median(d_box[k]), reverse=False): if (len(d_box[sig]) > 5) & (sig in signature_2_treatment): toplot.append(sorted(d_box[sig])) labels.append(sig) fig, ax = plt.subplots(1, 1, figsize=(3, 3)) plt.yscale('log') color_ttype = return_colors() number_of_samples = [] for ix, signature in enumerate(labels): plotdot = [] colors = [] for ttype, samples in dttype[signature].items(): for sample in samples: plotdot.append(sample) colors.append(color_ttype[ttype]) ax.scatter([ix + np.random.uniform(-0.2, 0.2, 1)[0] for i in range(len(plotdot))], plotdot, color=colors, s=1, alpha=0.75) number_of_samples.append(len(plotdot)) box = sns.boxplot(data=toplot, color='black', linewidth=0.6, ax=ax, showfliers=False) for b in box.artists: b.set_facecolor('#d1d1d1ff') plt.ylabel('foldchange late/early') ax.set_xticklabels(['{}\n{}'.format(l, number_of_samples[ixl]) for ixl, l in enumerate(labels)], rotation=90) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.set_ylim(0.05, 25) ax.hlines(1, 0, len(toplot), alpha=0.5) plt.tight_layout() plt.savefig(outfile)
def do_plot(list_val, norm=False): fig, axs = plt.subplots(1, 1, figsize=(0.1, 1.5)) toplot = 0 config_params() if norm is False: axs.set_xlabel('{} ({})'.format('Pediatric', len(list_val)), rotation=90) axs.set_ylim(10, 200000) axs.set_yscale('log') range_nums = [num / len(list_val) for num in range(len(list_val))] axs.hlines(np.median(list_val), np.median(range_nums) - 0.25, np.median(range_nums) + 0.45, color='grey', alpha=0.4) axs.scatter(range_nums, sorted(list_val), s=2, linewidth=0.2, color='#006400ff') axs.set_xlim(-0.2, 1.1) axs.spines['bottom'].set_visible(False) axs.spines['right'].set_visible(False) axs.set_xticklabels([]) if toplot > 0: axs.spines['left'].set_visible(False) else: axs.set_ylabel('Number of SBS\nassociated to treatments') lab = 'full' if norm is True: axs.set_ylim(0, 0.8) lab = 'norm' axs.xaxis.set_label_position('top') axs.xaxis.set_ticks_position('none') axs.yaxis.set_ticks_position('none') plt.savefig('figures/stjude_{}.svg'.format(lab))
def analysis_two_timepoints(drug, signature_dic, exposures_path): config_params() lower_drug = drug.lower().capitalize() df_treat, dup = get_patients_two_points() (keep_patients, stringent, samples_not_treated, samples_treated, space_between_biopsies, time_treated, start_treatment) = get_pre_post(lower_drug, df_treat, dup) df_exp, samples_exposed = read_exposures(exposures_path) fig, ax = plt.subplots(1, 1, figsize=(2, 2)) plt.ylabel('{}-related mutations'.format(lower_drug.capitalize())) plt.xlabel('Days') for ix in range(len(samples_not_treated)): if (samples_not_treated[ix] in samples_exposed) & (samples_treated[ix] in samples_exposed): exposure_not_treated = df_exp.loc[samples_not_treated[ix]][ signature_dic[drug]].sum() exposure_treated = df_exp.loc[samples_treated[ix]][ signature_dic[drug]].sum() plt.plot([0, space_between_biopsies[ix]], [exposure_not_treated, exposure_treated], color='#d95f0e', ls='--') slope = (exposure_treated - exposure_not_treated) / space_between_biopsies[ix] start_t = start_treatment[ix] end_t = start_treatment[ix] + time_treated[ix] plt.plot([start_t, end_t], [ exposure_not_treated + slope * start_t, exposure_not_treated + slope * end_t ], color='darkred', lw=2) plt.scatter(space_between_biopsies[ix], exposure_treated) plt.savefig('figures/{}.pre_post.svg'.format(drug)) plt.close()
def do_plot(type_extraction, axis_replication, axis_transcription, selected_sigs, wanted_sigs): config_params(6.5) fig, ax = plt.subplots(1, 1, figsize=( 1.85, 2.1, )) plt.scatter(axis_replication, axis_transcription, s=1, c='grey') for sig in wanted_sigs: print(sig, selected_sigs[sig]) x, y = selected_sigs[sig] plt.scatter(x, y, s=10, c='#ff6600ff') ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) plt.ylabel('Transcription strand asymmetry') plt.xlabel('Replication strand asymmetry') plt.savefig('figures/{}/repli_trans_assym.svg'.format(type_extraction)) plt.show()
def plot_bias_indel(d_clustered, d_unclustered, outpath, ttype, label1, label2): config_params(3) final_order = [] order_plot = order_to_plot_indel() for o in order_plot: final_order.append('{}_{}'.format(o, label1)) final_order.append('{}_{}'.format(o, label2)) for s in d_clustered.columns: sig = d_clustered[s] colors_ind = ['#fdbe6f'] * 6 + ['#ff8002'] * 6 + ['#b0dd8b'] * 6 + ['#36a12e'] * 6 + \ ['#fdcab5'] * 6 + ['#fc8a6a'] * 6 + ['#f14432'] * 6 + ['#bc191a'] * 6 + \ ['#d0e1f2'] * 6 + ['#94c4df'] * 6 + ['#4a98c9'] * 6 + ['#1764ab'] * 6 + \ ['#e1e1ef'] * 1 + ['#b6b6d8'] * 2 + ['#8683bd'] * 3 + ['#62409b'] * 5 colors_individual = [ '#fdbe6f', '#ff8002', '#b0dd8b', '#36a12e', '#fdcab5', '#fc8a6a', '#f14432', '#bc191a', '#d0e1f2', '#94c4df', '#4a98c9', '#1764ab', '#e1e1ef', '#b6b6d8', '#8683bd', '#62409b' ] fig, axs = plt.subplots(nrows=4, ncols=1, figsize=(10, 4)) colors = [] for col in colors_ind: colors.append(col) colors.append(col) x = [i for i in range(len(colors))] sig = [] for lag, lead in zip(d_clustered[s], d_unclustered[s]): sig.append(lag) sig.append(lead) axs[0].bar(x, sig, color=colors, width=0.8, linewidth=0) axs[0].set_xticks(x) axs[0].set_xticklabels(final_order, rotation=90, fontsize=2) axs[0].spines['top'].set_visible(False) axs[0].set_xlim(-1, 172) toplot_full = [] colors_joinbar = [] marks = [] start = 0 sizes = [6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 1, 2, 3, 5] for ix, len_size in enumerate(sizes): c = sig[start:start + len_size * 2] start = start + len_size * 2 c_condition1 = np.sum(c[::2]) c_condition2 = np.sum(c[1::2]) colors_joinbar.append(colors_individual[ix]) colors_joinbar.append(colors_individual[ix]) toplot_full.append(c_condition1) toplot_full.append(c_condition2) pval = poisson_exact(c_condition1, c_condition2) if pval < 0.05: mark = '*' else: mark = '' marks.append(mark) marks.append(mark) xjoin = [i for i in range(len(toplot_full))] axs[3].bar(xjoin, toplot_full, color=colors_joinbar, width=0.5, linewidth=0) axs[3].set_xticks(xjoin) for ix, (mark, val) in enumerate(zip(marks, toplot_full)): axs[3].text(xjoin[ix], val + 10, mark) vector = [] for lag, lead in zip(sig[0::2], sig[1::2]): vector.append(lag - lead) x = [i for i in range(len(colors_ind))] axs[1].bar(x, vector, color=colors_ind, width=0.5, linewidth=0) axs[1].set_xticks(x) axs[1].set_xticklabels(order_plot, rotation=90, fontsize=2) axs[1].set_xlim(-1, 86) axs[1].spines['top'].set_visible(False) vector = [] sig = sig / np.sum(sig) for lag, lead in zip(sig[0::2], sig[1::2]): vector.append(lag + lead) x = [i for i in range(len(colors_ind))] axs[2].bar(x, vector, color=colors_ind, width=0.75, linewidth=0) axs[2].set_xticks(x) axs[2].set_xticklabels(order_plot, rotation=90, fontsize=2) axs[2].set_xlim(-1, 86) axs[2].spines['top'].set_visible(False) axs[0].spines['top'].set_visible(False) axs[0].set_ylabel('NMF counts') axs[1].set_ylabel('{} - {}'.format(label1, label2)) axs[2].set_ylabel('Relative Probability') plt.setp([axs[0].get_xticklines(), axs[0].get_yticklines()], color='grey') plt.setp([axs[1].get_xticklines(), axs[1].get_yticklines()], color='grey') plt.setp([axs[2].get_xticklines(), axs[2].get_yticklines()], color='grey') plt.setp([axs[3].get_xticklines(), axs[2].get_yticklines()], color='grey') axs[0].xaxis.set_ticks_position('none') axs[1].xaxis.set_ticks_position('none') axs[2].xaxis.set_ticks_position('none') axs[3].xaxis.set_ticks_position('none') for axis in ['top', 'bottom', 'left', 'right']: axs[0].spines[axis].set_linewidth(0.2) axs[1].spines[axis].set_linewidth(0.2) axs[2].spines[axis].set_linewidth(0.2) axs[3].spines[axis].set_linewidth(0.2) for indx in [0, 1, 2, 3]: axs[indx].xaxis.set_tick_params(pad=0.5) axs[indx].yaxis.set_tick_params(pad=0.5, width=0.5) plt.tight_layout() plt.savefig('{}/processes/{}/{}.{}.png'.format(outpath, ttype, ttype, s), dpi=300, bbox_inches='tight') plt.savefig('{}/processes/{}/{}.{}.svg'.format(outpath, ttype, ttype, s)) plt.close() fig, ax = plt.subplots(1, 1, figsize=(1, 1)) total_max = np.max([np.max(d_clustered[s]), np.max(d_unclustered[s])]) plt.plot([0, total_max], [0, total_max], lw=1, alpha=0.4) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) plt.scatter(d_clustered[s], d_unclustered[s], s=5, c=colors_ind) plt.xlim(0, np.max(d_clustered[s]) + 10) plt.ylim(0, np.max(d_unclustered[s]) + 10) plt.xlabel(label1) plt.ylabel(label2) plt.tight_layout() plt.savefig('{}/processes/{}/{}.{}.diagonal.png'.format( outpath, ttype, ttype, s), dpi=300, bbox_inches='tight') plt.savefig('{}/processes/{}/{}.{}.diagonal.svg'.format( outpath, ttype, ttype, s)) plt.close()
def sankey_plot_main(): config_params(font_size=4) hv.extension('matplotlib') hv.output(fig='svg') forbidden = ['RADIATION', 'Miscellanious', 'Unknown', 'TopoII', 'TOPOII'] out, dic_t = create_matrix_treatments_plot() order_ttypes = [ 'Breast', 'Colon-Rectum', 'Prostate', 'Lung', 'Skin', 'Bone-Soft-tissue', 'Ovary', 'Esophagus', 'Urinary-tract', 'NET', 'Kidney', 'Nervous-system', 'Biliary', 'Pancreas', 'Unknown', 'Uterus', 'Head-and-neck', 'Liver', 'Stomach', 'Mesothelioma', ] all_rows = [] for ttype in order_ttypes: samples = dic_t[ttype] subs = out.loc[samples] for col in subs: if col not in forbidden: all_rows.append((ttype, col, int(subs[col].sum()))) matrix_df = pd.DataFrame(all_rows) matrix_df.columns = ['target', 'source', 'value'] matrix_df = matrix_df[(matrix_df['target'] != 'Unknown')] matrix_df = matrix_df.fillna(0) matrix_df['value'] = matrix_df['value'].astype(int) good_source = set() for source, data in matrix_df.groupby(by='source'): tot = data['value'].sum() if tot > 30: if source != 'Unknown': good_source.add(source) matrix_df = matrix_df[matrix_df['source'].isin(good_source)] out = hv.Sankey(matrix_df.sort_values( by='source', ascending=True, ), label='').opts(label_position='left', edge_color='target', node_color='index', cmap='Set1') # color=total_colors) fig = hv.render(out) fig.set_figwidth(10) fig.savefig('figures/2A.svg') fig.savefig('figures/2A.png', dpi=600)
def plot_heatmap_treatment(file): outf = os.path.basename(file).split('.')[0] dic_primary_full, _ = return_metadata() color_ttype = return_colors() total_s = defaultdict(list) total_count = defaultdict(int) for sample, t in dic_primary_full.items(): total_s[t].append(sample) total_count[t] += 1 sorted_ttyps = sorted(total_count, key=total_count.get, reverse=True) treated = pickle.load(gzip.open(file)) forbidden = ['RADIATION', 'TOPOII'] matrix_treated = defaultdict(lambda: defaultdict(int)) for sample, t in dic_primary_full.items(): for k, d in treated.items(): if k not in forbidden: if sample in d[t]['YES']: matrix_treated[sample][k] = 1 elif sample in d[t]['NO']: matrix_treated[sample][k] = 0 d_treatments = defaultdict(int) for k, d in treated.items(): if k not in forbidden: for ttype, l in d.items(): d_treatments[k] += len(l['YES']) sorted_treatments = sorted(d_treatments, key=d_treatments.get, reverse=True) out = pd.DataFrame.from_dict(matrix_treated, orient='index') out['TTYPE'] = [dic_primary_full[t] for t in out.index.tolist()] out['sum'] = out.sum(axis=1) out = out[out['sum'] > 0].drop('sum', axis=1) forbidden = ['Double-primary'] order_sample_plot = [] order = [] dic_len = defaultdict(int) for ttype in tqdm(sorted_ttyps): if ttype not in forbidden: subs = out[out['TTYPE'] == ttype] mat = subs.drop('TTYPE', axis=1).dropna()[sorted_treatments[:30]] if len(mat) > 1: n = classic_mutual_exclusivity_visualization( mat, sorted_treatments[:30]) new_order = n.dendrogram_col.reordered_ind sample_list = mat.reset_index().loc[new_order]['index'].tolist( ) order_sample_plot.extend(sample_list) order.append(ttype) dic_len[ttype] = len(sample_list) new_cmap = LinearSegmentedColormap.from_list( "", ["lightgrey", "grey", "darkred"]) concat = out.loc[order_sample_plot].drop('TTYPE', axis=1) concat = concat[sorted_treatments[:20]] if 'specific' in outf: new_cols = [s.lower() for s in concat.columns] concat.columns = new_cols config_params(2) fig, ax = plt.subplots(1, 2, figsize=(1, 3), gridspec_kw={'width_ratios': [1, 27]}) ax2 = sns.heatmap(concat, cmap=new_cmap, yticklabels=False, ax=ax[1], cbar=False) ax[1].xaxis.set_ticks_position('top') bot = 0 for t in order[::-1]: ax[0].bar(0, dic_len[t], bottom=bot, color=color_ttype[t]) bot += dic_len[t] ax[0].set_ylim(0, bot) ax[0].spines['top'].set_visible(False) ax[0].spines['bottom'].set_visible(False) ax[0].spines['left'].set_visible(False) ax[0].spines['right'].set_visible(False) ax[0].get_yaxis().set_visible(False) ax[0].get_xaxis().set_visible(False) plt.xticks(rotation=90) plt.savefig('figures/EDF1_{}.png'.format(outf), dpi=600) plt.close()
def do_plot(samples_lowbraca, samples_braca, not_breaked, only_treated, exp): dic_primary_full, _ = return_metadata() color_ttype = return_colors() fig, ax = plt.subplots(1, 1, figsize=(2, 1.5)) config_params(6.5) sigs = ['12_ID6_0.962941_1'] exposures_not_breaked_1 = exp[sigs].sum(axis=1).loc[[ i for i in not_breaked if i in samples_lowbraca ]].dropna() exposures_not_breaked_2 = exp[sigs].sum(axis=1).loc[[ i for i in not_breaked if i in samples_braca ]].dropna() exposures_breaked_1 = exp[sigs].sum(axis=1).loc[[ i for i in only_treated if i in samples_lowbraca ]].dropna() exposures_breaked_2 = exp[sigs].sum(axis=1).loc[[ i for i in only_treated if i in samples_braca ]].dropna() sns.boxplot(data=[ exposures_not_breaked_1, exposures_breaked_1, exposures_not_breaked_2, exposures_breaked_2 ], linewidth=0.6, showfliers=False, color='#cbcacbff') plt.ylabel('Indels DSB repair by\nnon-homologous end-joining') plt.xticks([0, 1, 2, 3], [ 'Not radiated no BRCAness ({})'.format(len(exposures_not_breaked_1)), 'Radiated no BRCAness ({})'.format(len(exposures_breaked_1)), 'Not radiated BRCAness ({})'.format(len(exposures_not_breaked_2)), 'Radiated BRCAness ({})'.format(len(exposures_breaked_2)) ], rotation=90) plotdot = [] colors = [] for sample in [i for i in not_breaked if i in samples_lowbraca]: plotdot.append(exp[sigs].sum(axis=1).loc[sample]) colors.append(color_ttype[dic_primary_full[sample]]) ax.scatter( [0 + np.random.uniform(-0.2, 0.2, 1)[0] for i in range(len(plotdot))], plotdot, color=colors, s=1, alpha=0.2) plotdot = [] colors = [] for sample in [i for i in only_treated if i in samples_lowbraca]: plotdot.append(exp[sigs].sum(axis=1).loc[sample]) colors.append(color_ttype[dic_primary_full[sample]]) ax.scatter( [1 + np.random.uniform(-0.2, 0.2, 1)[0] for i in range(len(plotdot))], plotdot, color=colors, s=1, alpha=0.2) plotdot = [] colors = [] for sample in [i for i in not_breaked if i in samples_braca]: plotdot.append(exp[sigs].sum(axis=1).loc[sample]) colors.append(color_ttype[dic_primary_full[sample]]) ax.scatter( [2 + np.random.uniform(-0.2, 0.2, 1)[0] for i in range(len(plotdot))], plotdot, color=colors, s=1, alpha=0.2) plotdot = [] colors = [] for sample in [i for i in only_treated if i in samples_braca]: plotdot.append(exp[sigs].sum(axis=1).loc[sample]) colors.append(color_ttype[dic_primary_full[sample]]) ax.scatter( [3 + np.random.uniform(-0.2, 0.2, 1)[0] for i in range(len(plotdot))], plotdot, color=colors, s=1, alpha=0.2) plt.ylim(0, 700) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) plt.savefig('figures/radiation.svg') plt.show() ################## fig, ax = plt.subplots( 1, 1, ) stat, pval1 = mannwhitneyu(exposures_not_breaked_1, exposures_breaked_1) print("Not radiated no BRCAnes vs Radiated no BRCAness", pval1) stat, pval2 = mannwhitneyu(exposures_not_breaked_2, exposures_breaked_2) print("Not radiated BRCAnes vs Radiated BRCAness", pval2) stat, pval3 = mannwhitneyu(exposures_breaked_1, exposures_breaked_2) print("radiated no BRCAnes vs Radiated BRCAness", pval3) ax.text(1, 1, "$\it{P}$" + " = {}".format(sci_notation(pval1)), fontsize=7) ax.text(1, 4, "$\it{P}$" + " = {}".format(sci_notation(pval2)), fontsize=7) ax.text(1, 8, "$\it{P}$" + " = {}".format(sci_notation(pval3)), fontsize=7) plt.xlim(0, 5) plt.ylim(0, 10) plt.savefig('figures/radiation_pvals.svg') sys.exit()
outpath, 'count') plot_single_correlation(dsigprof_simple, dsiganalyzer_simple, drug, outpath, 'count') plot_single_distribution(dsigprof_simple_exposure, dsiganalyzer_simple_exposure, drug, outpath, 'exposure') plot_single_correlation(dsigprof_simple_exposure, dsiganalyzer_simple_exposure, drug, outpath, 'exposure') bad_samples_drug = get_samples_similar_exposure( dsigprof_simple_exposure, dsiganalyzer_simple_exposure, drug, outpath, 'exposure') config_params(5) def run(): dic_primary_full, _ = return_metadata() signature_2_treatment_signature_analyzer = { 'CISPLATIN': ['21_SBS31_0.953955_1', '14_1'], 'CARBOPLATIN': ['21_SBS31_0.953955_1', '25_1'], '5-FU_CAPE': ['31_SBS17b_0.968799_1'], 'OXALIPLATIN': ['14_1', '37_1'] } signature_2_treatment_sigprofiler = { 'CISPLATIN': ['1_SBS31_0.968153_0.98'], 'CARBOPLATIN': ['1_SBS31_0.968153_0.98'], 'OXALIPLATIN': ['20_0.92'],
def plot_piecharts_signatures(exposures_path, type_mut, type_extraction, figsize, min_val): colors_full = { "SBS": { 25: '#003d7c', 10: '#224ba5', 5: '#4459ce', 2.5: '#6767f7', 1: '#9968c8', 0.5: '#cc6999', 0.25: '#ff6b6b', 0.1: '#ff8962', 0.05: '#ffa759', 0: '#ffc651' }, "ID": { 1: '#003d7c', 0.5: '#224ba5', 0.1: '#4459ce', 0.05: '#6767f7', 0.04: '#9968c8', 0.03: '#cc6999', 0.02: '#ff6b6b', 0.01: '#ff8962', 0.001: '#ffa759', 0: '#ffc651' }, "DBS": { 0.5: '#003d7c', 0.1: '#224ba5', 0.05: '#4459ce', 0.03: '#6767f7', 0.02: '#9968c8', 0.01: '#cc6999', 0.008: '#ff6b6b', 0.005: '#ff8962', 0.001: '#ffa759', 0: '#ffc651' } } colors = colors_full[type_mut] result = read_exposures(exposures_path) config_params() dic_primary_full, _ = return_metadata() # result = pd.concat([df, df_mela]) result = result.fillna(0) signatures = result.columns.tolist() # get list of similar found signatures in the extraction similar = [s for s in signatures if type_mut in s] notsimilar = [s for s in signatures if type_mut not in s] result['TTYPE'] = [dic_primary_full[t] for t in result.index.tolist()] dic_sig = defaultdict(lambda: defaultdict(float)) dic_proportion = defaultdict(lambda: defaultdict(float)) for ttype, data in result.groupby(by='TTYPE'): data2 = data.copy() data2.drop('TTYPE', axis=1, inplace=True) for col in data2: # we normalize it by the number of MB in the human genome (3234) dic_sig[ttype][col] = data2[ data2[col] > min_val][col].median() / 3234 if type_mut not in col: dic_proportion[ttype][col.split('_')[0]] = len( data2[data2[col] > min_val]) / len(data2) else: dic_proportion[ttype][col] = len( data2[data2[col] > min_val]) / len(data2) medians = pd.DataFrame.from_dict(dic_sig) # sorting the already known signatures keep_order_similar = defaultdict(list) for s in similar: number = s.split('_')[1].split(type_mut)[1] try: keep_n = int(number) except Exception: keep_n = int(number[:-1]) keep_order_similar[keep_n].append(str(s)) order_prev_labels = [] order_prev = [] for i in sorted(keep_order_similar, reverse=True): all_s = [] d_equiv = defaultdict(str) for sig in keep_order_similar[i]: ID_sig = '{}_{}_{}'.format( sig.split('_')[1], sig.split('_')[2], sig.split('_')[0]) d_equiv[ID_sig] = sig sorted_final_k = sorted(d_equiv) sorted_sigs_list = [d_equiv[ss] for ss in sorted_final_k] for sim, sig in reversed(list(enumerate(sorted_sigs_list, start=1))): if len(keep_order_similar[i]) == 1: order_prev_labels.append('E-{}{} ({}-like, {})'.format( type_mut, sig.split('_')[0], sig.split('_')[1], round(float(sig.split('_')[2]), 3))) order_prev.append(sig) else: order_prev_labels.append('E-{}{} ({}-like {}, {})'.format( type_mut, sig.split('_')[0], sig.split('_')[1], sim, round(float(sig.split('_')[2]), 3))) order_prev.append(sig) no_similar_signatures = medians.loc[notsimilar] new_index = [ int(l.split('_')[0]) for l in no_similar_signatures.index.tolist() ] no_similar_signatures.index = new_index no_similar_signatures.sort_index(inplace=True, ascending=True) names_notsimilar = [ 'E-{} {}'.format(type_mut, c) for c in no_similar_signatures.index.tolist()[::-1] ] # merge new and old merged = pd.concat([ no_similar_signatures.sort_index(ascending=False), medians.loc[order_prev], ]) # merged = merged.loc[order_prev+small_newset.index.tolist()] merged_labels = names_notsimilar + order_prev_labels config_params(5) fig, ax = plt.subplots(1, 1, figsize=figsize) # plt.grid(b=True, which='major',) for yval, (i, row) in enumerate(merged.iterrows()): for xval, t in enumerate(merged.columns.tolist()): val = row[t] if val > 0: color = None for number in sorted(colors.keys(), reverse=True): if (val > number) & (color is None): color_scatter = colors[number] break if type_mut in str(i): plt.scatter(xval, yval, c=color_scatter, s=dic_proportion[t][i] * 20) else: plt.scatter(xval, yval, c=color_scatter, s=dic_proportion[t][str(i)] * 20) ax.set_xticks(np.arange(len(merged.T))) ax.set_xticklabels(merged.columns.tolist(), rotation=90) ax.set_yticks(np.arange(len(merged))) ax.set_yticklabels(merged_labels) ax.xaxis.set_ticks_position('top') ax.set_axisbelow(True) ax.yaxis.grid(color='gray', linestyle='dashed', alpha=0.3) ax.xaxis.grid(color='gray', linestyle='dashed', alpha=0.3) ax.spines['top'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.spines['left'].set_visible(False) ax.spines['right'].set_visible(False) ax.xaxis.set_ticks_position('none') ax.yaxis.set_ticks_position('none') plt.ylim(-1, len(merged)) plt.tight_layout() plt.savefig('figures/{}/supp1_{}.svg'.format(type_extraction, type_mut)) plt.savefig('figures/{}/supp1_{}.png'.format(type_extraction, type_mut), dpi=600) plt.close()
def GBM_tzm_plot(total_MGMT, nottreated, total_treated, samples_mmr, fitted): sig_temo = 'SBS11' config_params(6.5) # first we select MMR defitient treated_MMR = [i for i in total_treated if i in samples_mmr] # then we select MGMT but nor MMR deff treated_MGMT = [ i for i in total_treated if ((i not in treated_MMR) & (i in total_MGMT)) ] # then treated without the others treated_no_alt = [ i for i in total_treated if (i not in treated_MMR) & (i not in treated_MGMT) ] fig, ax = plt.subplots(1, 1, figsize=(1.25, 1.5)) sns.boxplot(data=[ fitted[[i for i in nottreated if i in fitted.columns]].loc[sig_temo], fitted[[i for i in treated_no_alt if i in fitted.columns]].loc[sig_temo], fitted[[i for i in treated_MGMT if i in fitted.columns]].loc[sig_temo], fitted[[i for i in treated_MMR if i in fitted.columns]].loc[sig_temo] ], color='#ecececff', linewidth=1, showfliers=False, ax=ax) ax.scatter([ 0 + np.random.uniform(-0.2, 0.2, 1)[0] for i in fitted[[i for i in nottreated if i in fitted.columns]].loc[sig_temo] ], fitted[[i for i in nottreated if i in fitted.columns]].loc[sig_temo], s=15, color='#800080ff', edgecolor='black', lw=0.5) ax.scatter([ 1 + np.random.uniform(-0.2, 0.2, 1)[0] for i in fitted[[i for i in treated_no_alt if i in fitted.columns]].loc[sig_temo] ], fitted[[i for i in treated_no_alt if i in fitted.columns]].loc[sig_temo], s=15, color='#800080ff', edgecolor='black', lw=0.5) ax.scatter([ 2 + np.random.uniform(-0.2, 0.2, 1)[0] for i in fitted[[i for i in treated_MGMT if i in fitted.columns]].loc[sig_temo] ], fitted[[i for i in treated_MGMT if i in fitted.columns]].loc[sig_temo], s=15, color='#800080ff', edgecolor='black', lw=0.5) ax.scatter([ 3 + np.random.uniform(-0.1, 0.1, 1)[0] for i in fitted[[i for i in treated_MMR if i in fitted.columns]].loc[sig_temo] ], fitted[[i for i in treated_MMR if i in fitted.columns]].loc[sig_temo], s=15, color='#800080ff', edgecolor='black', lw=0.5) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) plt.xticks([0, 1, 2, 3], [ 'TMZ untreated (n ={})'.format(len(nottreated)), 'MGMT-notmet MMR-prof (n = {})'.format(len(treated_no_alt)), 'MGMT-met MMR-prof (n = {})'.format(len(treated_MGMT)), 'MMR-def(n = {})'.format(len(treated_MMR)), ], rotation=90) plt.title('GBM-cohort\n(Wang et al, 2016)') plt.ylabel('TMZ related SBS') plt.savefig('figures/GBM_tmz.svg') plt.close()
def plot_dbs(sig, title, outpath, ttype): config_params(3) fig, axs = plt.subplots(nrows=2, ncols=1, figsize=(3.2, 1), gridspec_kw={'height_ratios': [1, 9]}) vals = [] colors_mut = [ '#1ebff0', '#050708', '#e62725', '#cbcacb', '#a1cf64', '#edc8c5' ] dbs_color = { 'AC': '#a6cee3', 'AT': '#1f78b4', 'CC': '#b2df8a', 'CG': '#33a02c', 'CT': '#fb9a99', 'GC': '#e3211d', 'TA': '#fdbf6f', 'TC': '#ff7f00', 'TG': '#cab2d6', 'TT': '#6a3d9a', } order_color = [ '#a6cee3', '#1f78b4', '#b2df8a', '#33a02c', '#fb9a99', '#e3211d', '#fdbf6f', '#ff7f00', '#cab2d6', '#6a3d9a' ] order_dbs_list = order_to_plot_dbs() vals = sig colors = [dbs_color[db.split('_')[0]] for db in order_dbs_list] counter_colors = Counter(colors) bot = -0.5 for c in order_color: axs[0].barh( 1, counter_colors[c], left=bot, color=c, align='center', ) bot += counter_colors[c] axs[0].spines['top'].set_visible(False) axs[0].spines['bottom'].set_visible(False) axs[0].spines['left'].set_visible(False) axs[0].spines['right'].set_visible(False) axs[0].get_yaxis().set_visible(False) axs[0].get_xaxis().set_visible(False) axs[0].set_xlim(-1, 78) x = [i for i in range(len(vals))] axs[1].axhline(y=0.05, xmin=-1, xmax=96, lw=0.3, color='grey', alpha=0.2) axs[1].axhline(y=0.1, xmin=-1, xmax=96, lw=0.3, color='grey', alpha=0.2) axs[1].axhline(y=0.15, xmin=-1, xmax=96, lw=0.3, color='grey', alpha=0.2) axs[1].bar(x, vals, color=colors, width=0.8, linewidth=0, align='center', alpha=1) axs[1].set_xticks(x) axs[1].set_xticklabels( ['{}{}'.format( a[-2], a[-1], ) for a in order_dbs_list], rotation=90, fontsize=2, verticalalignment="center", ha='center', color='grey') axs[1].set_xlim(-1, 78) axs[1].spines['top'].set_visible(False) axs[1].set_ylabel('Relative Probability') axs[1].spines['right'].set_visible(False) plt.setp([axs[1].get_xticklines(), axs[1].get_yticklines()], color='grey') axs[1].xaxis.set_ticks_position('none') for axis in ['top', 'bottom', 'left', 'right']: axs[1].spines[axis].set_linewidth(0.2) axs[1].xaxis.set_tick_params(pad=0.5) axs[1].yaxis.set_tick_params(pad=0.5, width=0.5) plt.tick_params(axis='both', which='both', bottom=False, left=False) plt.tight_layout() plt.savefig('{}/processes/{}/{}.{}.png'.format(outpath, ttype, ttype, title), dpi=300, bbox_inches='tight') plt.savefig('{}/processes/{}/{}.{}.svg'.format(outpath, ttype, ttype, title)) plt.close()
def plot_indel(sig, title, outpath, ttype): config_params(3) fig, axs = plt.subplots(nrows=2, ncols=1, figsize=(3.2, 1), gridspec_kw={'height_ratios': [1, 9]}) vals = sig colors = ['#fdbe6f'] * 6 + ['#ff8002'] * 6 + ['#b0dd8b'] * 6 + ['#36a12e'] * 6 + \ ['#fdcab5'] * 6 + ['#fc8a6a'] * 6 + ['#f14432'] * 6 + ['#bc191a'] * 6 + \ ['#d0e1f2'] * 6 + ['#94c4df'] * 6 + ['#4a98c9'] * 6 + ['#1764ab'] * 6 + \ ['#e1e1ef'] * 1 + ['#b6b6d8'] * 2 + ['#8683bd'] * 3 + ['#62409b'] * 5 order_colors = [ '#fdbe6f', '#ff8002', '#b0dd8b', '#36a12e', '#fdcab5', '#fc8a6a', '#f14432', '#bc191a', '#d0e1f2', '#94c4df', '#4a98c9', '#1764ab', '#e1e1ef', '#b6b6d8', '#8683bd', '#62409b' ] counter_colors = Counter(colors) bot = -0.5 for c in order_colors: axs[0].barh(1, counter_colors[c], left=bot, color=c) bot += counter_colors[c] axs[0].spines['top'].set_visible(False) axs[0].spines['bottom'].set_visible(False) axs[0].spines['left'].set_visible(False) axs[0].spines['right'].set_visible(False) axs[0].get_yaxis().set_visible(False) axs[0].get_xaxis().set_visible(False) axs[0].get_yaxis().set_visible(False) axs[0].get_xaxis().set_visible(False) axs[0].set_xlim(-1, 83) x = [i for i in range(len(vals))] axs[1].axhline(y=0.05, xmin=-1, xmax=83, lw=0.3, color='grey', alpha=0.3) axs[1].axhline(y=0.1, xmin=-1, xmax=83, lw=0.3, color='grey', alpha=0.3) axs[1].axhline(y=0.15, xmin=-1, xmax=83, lw=0.3, color='grey', alpha=0.3) axs[1].bar(x, vals, color=colors, width=0.7, linewidth=0, align='center', alpha=1) axs[1].set_xticks(x) axs[1].set_xticklabels([i.split('_')[-1] for i in order_to_plot_indel()], fontsize=2, verticalalignment="center", ha='center', color='grey') axs[1].set_xlim(-1, 83) plt.tight_layout() axs[1].spines['top'].set_visible(False) axs[1].set_ylabel('Relative Probability') axs[1].spines['right'].set_visible(False) plt.setp([axs[1].get_xticklines(), axs[1].get_yticklines()], color='grey') axs[1].xaxis.set_ticks_position('none') for axis in ['top', 'bottom', 'left', 'right']: axs[1].spines[axis].set_linewidth(0.2) axs[1].xaxis.set_tick_params(pad=0.5) axs[1].yaxis.set_tick_params(pad=0.5, width=0.5) plt.tick_params(axis='both', which='both', bottom=False, left=False) plt.savefig('{}/processes/{}/{}.{}.png'.format(outpath, ttype, ttype, title), dpi=300, bbox_inches='tight') plt.savefig('{}/processes/{}/{}.{}.svg'.format(outpath, ttype, ttype, title)) plt.close()
def plot_bias_dbs(d_clustered, d_unclustered, outpath, ttype, label1, label2): config_params(3) final_order = [] order_plot = order_to_plot_dbs() for o in order_plot: final_order.append('{}_{}'.format(o, label1)) final_order.append('{}_{}'.format(o, label2)) for s in d_clustered.columns: sig = d_clustered[s] dbs_color = { 'AC': '#a6cee3', 'AT': '#1f78b4', 'CC': '#b2df8a', 'CG': '#33a02c', 'CT': '#fb9a99', 'GC': '#e3211d', 'TA': '#fdbf6f', 'TC': '#ff7f00', 'TG': '#cab2d6', 'TT': '#6a3d9a', } fig, axs = plt.subplots(nrows=4, ncols=1, figsize=(10, 4)) vals = sig colors = [] for db in order_plot: colors.append(dbs_color[db.split('_')[0]]) colors.append(dbs_color[db.split('_')[0]]) x = [i for i in range(len(colors))] sig = [] for lag, lead in zip(d_clustered[s], d_unclustered[s]): sig.append(lag) sig.append(lead) toplot_full = [] colors_joinbar = [] marks = [] start = 0 for k, color_db in dbs_color.items(): len_ks = [db for db in order_plot if db.split('_')[0] == k] amount = len(len_ks) * 2 c = sig[start:start + amount] start = start + amount c_condition1 = np.sum(c[::2]) c_condition2 = np.sum(c[1::2]) colors_joinbar.append(color_db) colors_joinbar.append(color_db) toplot_full.append(c_condition1) toplot_full.append(c_condition2) pval = poisson_exact(c_condition1, c_condition2) if pval < 0.05: mark = '*' else: mark = '' marks.append(mark) marks.append(mark) xjoin = [i for i in range(len(toplot_full))] axs[3].bar(xjoin, toplot_full, color=colors_joinbar, width=0.5, linewidth=0) axs[3].set_xticks(xjoin) for ix, (mark, val) in enumerate(zip(marks, toplot_full)): axs[3].text(xjoin[ix], val + 10, mark) start = 0 bias = '' labels = list(dbs_color.keys()) for indx, pairs in enumerate(chunks(toplot_full, 2)): first, second = pairs[0], pairs[1] if (first + second) > start: start = first + second bias = '{}\t{}\t{}\n'.format(labels[indx], first, second) with open( '{}/processes/{}/{}.{}.max_components.tsv'.format( outpath, ttype, ttype, s), 'wt') as outfile: outfile.write(bias) axs[0].bar(x, sig, color=colors, width=0.5, linewidth=0) axs[0].set_xticks(x) axs[0].set_xticklabels(final_order, rotation=90, fontsize=2) axs[0].spines['top'].set_visible(False) axs[0].set_xlim(-1, 156) colors = [] vector = [] for lag, lead in zip(sig[0::2], sig[1::2]): vector.append(lag - lead) for db in order_plot: colors.append(dbs_color[db.split('_')[0]]) x = [i for i in range(len(colors))] axs[1].bar(x, vector, color=colors, width=0.5, linewidth=0) axs[1].set_xticks(x) axs[1].set_xticklabels(order_plot, rotation=90, fontsize=2) axs[1].set_xlim(-1, 78) axs[1].spines['top'].set_visible(False) vector = [] colors = [] sig = sig / np.sum(sig) for lag, lead in zip(sig[0::2], sig[1::2]): vector.append(lag + lead) for db in order_plot: colors.append(dbs_color[db.split('_')[0]]) x = [i for i in range(len(colors))] axs[2].bar(x, vector, color=colors, width=0.75, linewidth=0) axs[2].set_xticks(x) axs[2].set_xticklabels(order_plot, rotation=90, fontsize=2) axs[2].set_xlim(-1, 78) axs[2].spines['top'].set_visible(False) axs[0].spines['top'].set_visible(False) axs[0].set_ylabel('NMF counts') axs[1].set_ylabel('{} - {}'.format(label1, label2)) axs[2].set_ylabel('Relative Probability') plt.setp([axs[0].get_xticklines(), axs[0].get_yticklines()], color='grey') plt.setp([axs[1].get_xticklines(), axs[1].get_yticklines()], color='grey') plt.setp([axs[2].get_xticklines(), axs[2].get_yticklines()], color='grey') plt.setp([axs[3].get_xticklines(), axs[2].get_yticklines()], color='grey') axs[0].xaxis.set_ticks_position('none') axs[1].xaxis.set_ticks_position('none') axs[2].xaxis.set_ticks_position('none') axs[3].xaxis.set_ticks_position('none') for axis in ['top', 'bottom', 'left', 'right']: axs[0].spines[axis].set_linewidth(0.2) axs[1].spines[axis].set_linewidth(0.2) axs[2].spines[axis].set_linewidth(0.2) axs[3].spines[axis].set_linewidth(0.2) for indx in [0, 1, 2, 3]: axs[indx].xaxis.set_tick_params(pad=0.5) axs[indx].yaxis.set_tick_params(pad=0.5, width=0.5) plt.tight_layout() plt.savefig('{}/processes/{}/{}.{}.png'.format(outpath, ttype, ttype, s), dpi=300, bbox_inches='tight') plt.savefig('{}/processes/{}/{}.{}.svg'.format(outpath, ttype, ttype, s)) plt.close() fig, ax = plt.subplots(1, 1, figsize=(1, 1)) total_max = np.max([np.max(d_clustered[s]), np.max(d_unclustered[s])]) plt.plot([0, total_max], [0, total_max], lw=1, alpha=0.4) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) plt.scatter(d_clustered[s], d_unclustered[s], s=5, c=colors) plt.xlim(0, np.max(d_clustered[s]) + 10) plt.ylim(0, np.max(d_unclustered[s]) + 10) plt.xlabel(label1) plt.ylabel(label2) plt.tight_layout() plt.savefig('{}/processes/{}/{}.{}.diagonal.png'.format( outpath, ttype, ttype, s), dpi=300, bbox_inches='tight') plt.savefig('{}/processes/{}/{}.{}.diagonal.svg'.format( outpath, ttype, ttype, s)) plt.close()
def plot_snvs(sig, title, outpath, ttype): config_params(3) fig, axs = plt.subplots(nrows=2, ncols=1, figsize=(3.2, 1), gridspec_kw={'height_ratios': [1, 9]}) order_plot = order_muts("snv") vals = [] colors = [] colors_mut = [ '#1ebff0', '#050708', '#e62725', '#cbcacb', '#a1cf64', '#edc8c5' ] bot = -0.5 for ix, c in enumerate(chunks(sig, 16)): colors.extend([colors_mut[ix] for s in c]) axs[0].barh(1, 16, left=bot, color=colors_mut[ix]) bot += 16 vals.extend(c) axs[0].set_xlim(-1, 96) axs[0].spines['top'].set_visible(False) axs[0].spines['bottom'].set_visible(False) axs[0].spines['left'].set_visible(False) axs[0].spines['right'].set_visible(False) axs[0].get_yaxis().set_visible(False) axs[0].get_xaxis().set_visible(False) x = [i for i in range(len(vals))] axs[1].axhline(y=0.05, xmin=-1, xmax=96, lw=0.6, color='grey', alpha=0.2) axs[1].axhline(y=0.1, xmin=-1, xmax=96, lw=0.6, color='grey', alpha=0.2) axs[1].axhline(y=0.15, xmin=-1, xmax=96, lw=0.6, color='grey', alpha=0.2) axs[1].bar(x, vals, color=colors, width=0.8, linewidth=0, align='center') axs[1].set_xticks(x) axs[1].set_xticklabels( ['{}{}{}'.format(a[0], a[2], a[-1]) for a in order_plot], verticalalignment="center", ha='center', rotation=90, fontsize=2, color='grey') plt.tight_layout() plt.xlim(-1, 96) axs[1].spines['top'].set_visible(False) axs[1].set_ylabel('Relative Probability') axs[1].spines['right'].set_visible(False) plt.setp([axs[1].get_xticklines(), axs[1].get_yticklines()], color='grey') axs[1].xaxis.set_ticks_position('none') for axis in ['top', 'bottom', 'left', 'right']: axs[1].spines[axis].set_linewidth(0.2) axs[1].xaxis.set_tick_params(pad=0.5) axs[1].yaxis.set_tick_params(pad=0.5, width=0.5) plt.tick_params(axis='both', which='both', bottom=False, left=False) plt.savefig('{}/processes/{}/{}.{}.png'.format(outpath, ttype, ttype, title), dpi=300, bbox_inches='tight') plt.savefig('{}/processes/{}/{}.{}.svg'.format(outpath, ttype, ttype, title)) plt.close()
def plot_bias_snv(d_clustered, d_unclustered, outpath, ttype, label1, label2): config_params(3) final_order = [] order_plot = order_to_plot_snvs() for o in order_plot: final_order.append('{}_{}'.format(o, label1)) final_order.append('{}_{}'.format(o, label2)) for s in d_clustered.columns: sig = d_clustered[s] fig, axs = plt.subplots(nrows=4, ncols=1, figsize=(10, 4)) colors = [] colors_mut = [ '#1ebff0', '#050708', '#e62725', '#cbcacb', '#a1cf64', '#edc8c5' ] for ix, c in enumerate(chunks(sig, 16)): colors.extend([colors_mut[ix] for s in c]) colors.extend([colors_mut[ix] for s in c]) sig = [] for lag, lead in zip(d_clustered[s], d_unclustered[s]): sig.append(lag) sig.append(lead) # get significance per type toplot_full = [] colors_joinbar = [] marks = [] for ix, c in enumerate(chunks(sig, 32)): c_condition1 = np.sum(c[::2]) c_condition2 = np.sum(c[1::2]) colors_joinbar.append(colors_mut[ix]) colors_joinbar.append(colors_mut[ix]) toplot_full.append(c_condition1) toplot_full.append(c_condition2) pval = poisson_exact(c_condition1, c_condition2) if pval < 0.05: mark = '*' else: mark = '' marks.append(mark) marks.append(mark) xjoin = [i for i in range(len(toplot_full))] axs[3].bar(xjoin, toplot_full, color=colors_joinbar, width=0.5, linewidth=0) start = 0 bias = '' labels = ['C>A', 'C>G', 'C>T', 'T>A', 'T>C', 'T>G'] for indx, pairs in enumerate(chunks(toplot_full, 2)): first, second = pairs[0], pairs[1] if (first + second) > start: start = first + second bias = '{}\t{}\t{}\n'.format(labels[indx], first, second) with open( '{}/processes/{}/{}.{}.max_components.tsv'.format( outpath, ttype, ttype, s), 'wt') as outfile: outfile.write(bias) axs[3].set_xticks(xjoin) for ix, (mark, val) in enumerate(zip(marks, toplot_full)): axs[3].text(xjoin[ix], val + 10, mark) x = [i for i in range(len(colors))] axs[0].bar(x, sig, color=colors, width=0.5, linewidth=0) axs[0].set_xticks(x) axs[0].set_xticklabels(final_order, rotation=90, fontsize=2) axs[0].spines['top'].set_visible(False) axs[0].set_xlim(-1, 192) colors = [] colors_mut = [ '#1ebff0', '#050708', '#e62725', '#cbcacb', '#a1cf64', '#edc8c5' ] vector = [] for lag, lead in zip(sig[0::2], sig[1::2]): vector.append(lag - lead) for ix, c in enumerate(chunks(vector, 16)): colors.extend([colors_mut[ix] for s in c]) x = [i for i in range(len(colors))] axs[1].bar(x, vector, color=colors, width=0.5, linewidth=0) axs[1].set_xticks(x) axs[1].set_xticklabels(order_plot, rotation=90, fontsize=2) axs[1].set_xlim(-1, 96) axs[1].spines['top'].set_visible(False) vector = [] colors = [] sig = sig / np.sum(sig) for lag, lead in zip(sig[0::2], sig[1::2]): vector.append(lag + lead) for ix, c in enumerate(chunks(vector, 16)): colors.extend([colors_mut[ix] for s in c]) x = [i for i in range(len(colors))] axs[2].bar(x, vector, color=colors, width=0.75, linewidth=0) axs[2].set_xticks(x) axs[2].set_xticklabels(order_plot, rotation=90, fontsize=2) axs[2].set_xlim(-1, 96) axs[2].spines['top'].set_visible(False) axs[0].spines['top'].set_visible(False) plt.setp([axs[0].get_xticklines(), axs[0].get_yticklines()], color='grey') plt.setp([axs[1].get_xticklines(), axs[1].get_yticklines()], color='grey') plt.setp([axs[2].get_xticklines(), axs[2].get_yticklines()], color='grey') plt.setp([axs[3].get_xticklines(), axs[2].get_yticklines()], color='grey') axs[0].xaxis.set_ticks_position('none') axs[1].xaxis.set_ticks_position('none') axs[2].xaxis.set_ticks_position('none') axs[3].xaxis.set_ticks_position('none') for axis in ['top', 'bottom', 'left', 'right']: axs[0].spines[axis].set_linewidth(0.2) axs[1].spines[axis].set_linewidth(0.2) axs[2].spines[axis].set_linewidth(0.2) axs[3].spines[axis].set_linewidth(0.2) for indx in [0, 1, 2, 3]: axs[indx].xaxis.set_tick_params(pad=0.5) axs[indx].yaxis.set_tick_params(pad=0.5, width=0.5) plt.tight_layout() plt.savefig('{}/processes/{}/{}.{}.png'.format(outpath, ttype, ttype, s), dpi=300, bbox_inches='tight') plt.savefig('{}/processes/{}/{}.{}.svg'.format(outpath, ttype, ttype, s)) plt.close() fig, ax = plt.subplots(1, 1, figsize=(1, 1)) total_max = np.max([np.max(d_clustered[s]), np.max(d_unclustered[s])]) plt.plot([0, total_max], [0, total_max], lw=1, alpha=0.4) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) plt.scatter(d_clustered[s], d_unclustered[s], s=5, c=colors) plt.xlim(0, np.max(d_clustered[s]) + 1000) plt.ylim(0, np.max(d_unclustered[s]) + 1000) plt.xlabel(label1) plt.ylabel(label2) plt.tight_layout() plt.savefig('{}/processes/{}/{}.{}.diagonal.png'.format( outpath, ttype, ttype, s), dpi=300, bbox_inches='tight') plt.savefig('{}/processes/{}/{}.{}.diagonal.svg'.format( outpath, ttype, ttype, s)) plt.close()