def radar_plot(ax, chrom, FSAMPLE=True): fig_all_df = cnv_chr_counts.query('Space==@chrom') # chrlen = ref.query('space==@chrom').max()['end']/1e6/100 chrlen = (100*ref.groupby('space')['end'].max()/ref.groupby('space')['end'].max().sum()).loc[chrom] # y_all = fig_all_df.set_index('Sample').reindex(index=xlsx_tab['SAMPLE']).fillna(0)['ratio'] y_all = fig_all_df.set_index('Sample').reindex(index=xlsx_tab['SAMPLE']).fillna(0)['Cell'] y_all = y_all/chrlen if not FSAMPLE: y_all = y_all.drop('$FSAMPLE') codes = xlsx_tab.query('SAMPLE!="$FSAMPLE"').replace('M11','M10')['CODE'].values else: codes = xlsx_tab['CODE'].values n_sample = y_all.shape[0] x = np.linspace(0,2*np.pi,n_sample+1) fig_df = pd.DataFrame({'x':x[:-1],'y':y_all}) ax.bar( fig_df['x'], # fig_df['y'].clip(upper=2), fig_df['y'].clip(upper=1), width=0.3, facecolor='limegreen', edgecolor='k', lw=1.5, alpha=0.9, zorder=5 ) ax.bar( # fig_df.query('y>2')['x'], # (fig_df.query('y>2')['y'])*2/30, fig_df.query('y>1')['x'], (fig_df.query('y>1')['y']-1)/10, width=0.3, bottom=1, facecolor='gold', edgecolor='k', lw=1.5, alpha=0.9, zorder=5 ) # ax.set_rlim(0,4.7) # ax.set_rlim(-0.7,3.7) ax.set_rlim(0,3.7) ax.set_rorigin(-0.7) # _, r_label = ax.set_rgrids([2,3,4], ['2','15','30'], angle=12, fontsize=smallsize, va='bottom', ha='center') _, r_label = ax.set_rgrids([1,1.9,2.9], ['1','10','20'], angle=12, fontsize=smallsize, va='bottom', ha='center') r_label[0].set_color('limegreen') r_label[1].set_color('darkgoldenrod') r_label[2].set_color('darkgoldenrod') _, sample_label = ax.set_thetagrids( np.linspace(0,360,n_sample), codes ) for tobj,angle in zip(sample_label,np.linspace(0,360,n_sample)): if 'M' in tobj.get_text(): tobj.set_color(two_colors[0]) else: tobj.set_color(two_colors[1]) tobj.set_fontsize(smallsize) for tpos,l in zip(x, sample_label): if 'M' in l.get_text(): r=90 else: r=270 ax.text( # tpos, 5.5, tpos, 4.5, l.get_text().replace('F','').replace('M',''), va='center', ha='center', fontsize=smallsize, color=l.get_color(), rotation=r-tpos*360/np.pi/2 ) ax.set_title(chrom, fontsize=midsize, pad=midsize) ax.tick_params(grid_color='k', grid_linewidth=1, grid_alpha=1, zorder=0, grid_linestyle='--') ax.set_theta_zero_location('N') ax.set_theta_direction(-1) ax.set_xticklabels([]) return
def female_genome_plot(ax1, ax2): # f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(12,2), gridspec_kw={'height_ratios':[3,1], 'hspace':0.1}) n = pd.concat( [ cells_tab, xlsx_tab.set_index('SAMPLE') ], axis=1, sort=False ).query('GENDER=="female"')['n_pf'].sum() big_cnv_gain =( sum_df .loc[big_cnv_idx] .query('cnvTag=="Gain" & Gender=="female"') ) big_cnv_loss =( sum_df .loc[big_cnv_idx] .query('cnvTag=="Loss" & Gender=="female"') ) bin_bed = BedTools.from_dataframe(ref.loc[good_df.index]) gain_bed = BedTools.from_dataframe(big_cnv_gain) loss_bed = BedTools.from_dataframe(big_cnv_loss) gain_s = BedTools.intersect(self=bin_bed,b=gain_bed,wa=True,c=True).to_dataframe().set_index(good_df.index)['score'] loss_s = BedTools.intersect(self=bin_bed,b=loss_bed,wa=True,c=True).to_dataframe().set_index(good_df.index)['score'] tmpCleanUp() ax1.plot(np.arange(female_fig_df.columns.shape[0]), 100*gain_s[female_fig_df.columns].fillna(0)/n, '-', lw=2, color='darkorange', label='Gain') ax2.plot(np.arange(female_fig_df.columns.shape[0]), 100*gain_s[female_fig_df.columns].fillna(0)/n, '-', lw=2, color='darkorange', label='Gain') ax1.plot(np.arange(female_fig_df.columns.shape[0]), -100*loss_s[female_fig_df.columns].fillna(0)/n, '-', lw=2, color='deepskyblue', label='Loss') ax2.plot(np.arange(female_fig_df.columns.shape[0]), -100*loss_s[female_fig_df.columns].fillna(0)/n, '-', lw=2, color='deepskyblue', label='Loss') # Plot chroms. boundary g = ref.groupby('space').min()['abs_pos'] for pos in chr_pos: ax1.plot(np.tile(pos,10), np.linspace(-5, 9, 10), '-', color='black', alpha=0.5, lw=1, zorder=0) ax2.plot(np.tile(pos,10), np.linspace(-5, 9, 10), '-', color='black', alpha=0.5, lw=1, zorder=0) ax1.set_xticks([]) ax2.set_xticks([]) ax1.tick_params(axis='x', bottom=False, length=0) ax2.tick_params(axis='x', bottom=False, length=0) ax1.tick_params(axis='y', length=5) ax2.tick_params(axis='y', length=5) ax1.set_ylim(-0.4, 0.4) ax2.set_ylim(-4.5, -1.5) ax1.spines['bottom'].set_visible(False) ax2.spines['top'].set_visible(False) # d = 0.01 # how big to make the diagonal lines in axes coordinates # kwargs = dict(transform=ax1.transAxes, color='k', clip_on=False) # ax1.plot((-d, +d), (0, 0), **kwargs) # top-left diagonal # ax1.plot((1 - d, 1 + d), (0, 0), **kwargs) # top-right diagonal # kwargs.update(transform=ax2.transAxes) # switch to the bottom axes # ax2.plot((-d, +d), (1, 1), **kwargs) # bottom-left diagonal # ax2.plot((1 - d, 1 + d), (1, 1), **kwargs) # bottom-right diagonal ax1.set_ylabel('Freq.\n(%)', fontsize=midsize) ax1.set_yticks([0.3,0,-0.3]) ax1.set_yticklabels(['0.3','0.0','0.3'], fontsize=smallsize) ax2.set_yticks([-3]) ax2.set_yticklabels(['3'], fontsize=smallsize) return
def chr_size_plot(axs, FSAMPLE=True): # fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(6,4)) axx = axs[0] ax = axs[1] if FSAMPLE: # chr_counts = sum_df.loc[big_cnv_idx].groupby('Space')['Pos'].count() chr_counts = ( sum_df .loc[big_cnv_idx] .groupby(['Space','Cell','cnvTag'])['Pos'].first() .reset_index() .groupby('Space')['Cell'].count() ) else: chr_counts = ( sum_df .loc[big_cnv_idx] .query('Sample!="$FSAMPLE"') .groupby(['Space','Cell','cnvTag'])['Pos'].first() .reset_index() .groupby('Space')['Cell'].count() ) # x = ref.groupby('space').max().drop(['chrX','chrY'])['end']/1000/1000 # y = chr_counts.drop(['chrX','chrY'], errors='ignore') x = ref.groupby('space').max().drop(['chrY'])['end']/1000/1000 y = chr_counts.drop(['chrY'], errors='ignore') ax.plot( x.drop('chr21'), y.drop('chr21'), '.', color='black', markersize=8, ) ax.plot( x['chr21'], y['chr21'], '.', color='red', markersize=10, ) axx.plot( x['chrX'], y['chrX'], '.', color='red', markersize=10, ) for tchr in ref['space'].unique()[:-2]: a = -5 b = 4 if tchr == 'chr21': a = 0 b = -13 elif tchr == 'chr17': a = -5 b = -10 elif tchr == 'chr11': a = 8 b = 0 elif tchr == 'chr10': a = -8 b = -3 elif tchr == 'chr2': a = 0 b = -10 elif tchr == 'chr18': a = 8 b = -2 elif tchr == 'chr8': a = 5 b = -3 elif tchr == 'chr12': pass elif tchr == 'chr9': a = 3 b = 4 pass ax.text( a + ref.groupby('space').max().loc[tchr,'end']/1000/1000, b + chr_counts[tchr], tchr.replace('chr',''), fontsize=10, ha='center', va='center', ) ax.set_xlim(0,300) ax.set_xticks([0,100,200,300]) ax.set_ylim(0,180) ax.set_yticks([0,80,160]) axx.set_xlim(0,300) axx.set_ylim(280,400) axx.set_yticks([300,400]) axx.set_xticks([]) axx.set_xticklabels('') axx.spines['bottom'].set_visible(False) ax.spines['top' ].set_visible(False) axx.text( ref.groupby('space').max().loc['chrX','end']/1000/1000, chr_counts['chrX']-30, 'X', fontsize=10, ha='center', va='center', ) # d = 0.015 # how big to make the diagonal lines in axes coordinates # ax1 = axx # ax2 = ax # kwargs = dict(transform=ax1.transAxes, color='k', clip_on=False) # ax1.plot((-d, +d), (-d, +d), **kwargs) # kwargs.update(transform=ax2.transAxes) # ax1.plot((1-d, 1+d), (1-d, 1+d), **kwargs) # kwargs.update(transform=ax1.transAxes) # ax2.plot((1-d, 1+d), (-d, +d), **kwargs) # kwargs.update(transform=ax2.transAxes) # ax2.plot((-d, +d), (1-d, 1+d), **kwargs) sns.regplot( # x.drop('chr21'), y.drop('chr21'), x.drop(['chr21','chrX']), y.drop(['chr21','chrX']), ci=95, ax=ax, # scatter=False, scatter_kws={'color':'w','zorder':0, 'alpha':0}, line_kws={'lw':0, 'linestyle':'--', 'color':'k','zorder':0, 'alpha':0.5}, ) # model = LinearRegression(fit_intercept=False).fit( x.drop('chr21').values.reshape(-1,1), y.drop('chr21') ) # r_sq = model.score(x.drop('chr21').values.reshape(-1,1), y.drop('chr21')) model = LinearRegression(fit_intercept=False).fit( x.drop(['chr21','chrX']).values.reshape(-1,1), y.drop(['chr21','chrX']) ) r_sq = model.score(x.drop(['chr21','chrX']).values.reshape(-1,1), y.drop(['chr21','chrX'])) ax.plot(np.linspace(50,250), model.predict( np.linspace(50,250).reshape((-1, 1)) ), '--', lw=1.5, color='k', zorder=0 ) # texts1 = r'y=$\alpha$x+$\beta$' # texts2 = r'$R^2$=' + str(np.round(r_sq,2)) texts1 = r'$R^2$=' + str(np.round(r_sq,2)) # texts3 = r'$\alpha$=' + str(np.round(model.coef_[0],4)) # texts4 = r'$\beta$=' + str(np.round(model.intercept_,4)) # texts1 = 'y=ax+b' # texts2 = 'R2=' + str(np.round(r_sq,2)) # texts3 = 'a=' + str(np.round(model.coef_[0],4)) # texts4 = 'b=' + str(np.round(model.intercept_,4)) ax.text(200, 20, texts1, fontsize=smallsize, va='center' ) # ax.text(20, 135, texts2, fontsize=smallsize, va='center' ) # ax.text(200, 30, texts3, fontsize=smallsize, va='center' ) # ax.text(200, 15, texts4, fontsize=smallsize, va='center' ) ax.tick_params(axis='both', labelsize=smallsize, length=5) axx.tick_params(axis='both', labelsize=smallsize, length=5) ax.set_ylabel(' '*8+'# of cells with CNA', fontsize=midsize, labelpad=-5) ax.set_xlabel('Chromosome size (Mb)', fontsize=midsize) return
def male_genome_plot(ax, FSAMPLE=True): # fig, ax = plt.subplots(figsize=(20,4)) n = pd.concat( [ cells_tab, xlsx_tab.set_index('SAMPLE') ], axis=1, sort=False ).query('GENDER=="male"') big_cnv_gain =( sum_df .loc[big_cnv_idx] .query('cnvTag=="Gain" & Gender=="male"') ) big_cnv_loss =( sum_df .loc[big_cnv_idx] .query('cnvTag=="Loss" & Gender=="male"') ) if not FSAMPLE: big_cnv_gain = big_cnv_gain.query('Sample!="$FSAMPLE"') big_cnv_loss = big_cnv_loss.query('Sample!="$FSAMPLE"') n = n.drop('$FSAMPLE')['n_pf'].sum() else: n = n['n_pf'].sum() bin_bed = BedTools.from_dataframe(ref.loc[good_df.index]) gain_bed = BedTools.from_dataframe(big_cnv_gain) loss_bed = BedTools.from_dataframe(big_cnv_loss) gain_s = BedTools.intersect(self=bin_bed,b=gain_bed,wa=True,c=True).to_dataframe().set_index(good_df.index)['score'] loss_s = BedTools.intersect(self=bin_bed,b=loss_bed,wa=True,c=True).to_dataframe().set_index(good_df.index)['score'] tmpCleanUp() ax.plot(np.arange(male_fig_df.columns.shape[0]), 100*gain_s[male_fig_df.columns].fillna(0)/n, '-', lw=2, color='darkorange', label='Gain', zorder=1) ax.plot(np.arange(male_fig_df.columns.shape[0]), -100*loss_s[male_fig_df.columns].fillna(0)/n, '-', lw=2, color='deepskyblue', label='Loss', zorder=1) # Plot chroms. boundary g = ref.groupby('space').min()['abs_pos'] for pos in chr_pos: ax.plot(np.tile(pos,10), np.linspace(-3, 9, 10), '-', color='black', alpha=0.5, lw=1, zorder=0) ax.set_ylabel('Freq.\n(%)', fontsize=midsize) ax.set_xticks( pd.Series(chr_pos).rolling(2).mean()[1:] ) ax.set_xticklabels( ref['space'].drop_duplicates()[:-1].str.replace('chr','').replace('19',' 19').replace('21',' 21').replace('X',' X'), fontsize=smallsize-4, ha='center', rotation=90, ) ax.xaxis.set_ticks_position('top') ax.set_ylim(-0.4,0.4) ax.set_yticks( [-0.3, 0, 0.3] ) ax.set_yticklabels( [0.3, 0 ,0.3], fontsize=smallsize ) ax.tick_params(axis='x', bottom=False, length=0, pad=5) ax.tick_params(axis='y', length=5) return
gridspec_kw={'hspace':0.2, 'wspace':0.5}, ) axs = axs.flatten() for n,chrom in enumerate(ref['space'].unique()[:-1]): radar_plot(axs[n],chrom, FSAMPLE=FSAMPLE) axs[-2].axis('off') axs[-1].axis('off') fig.savefig('A4_SI_Radar.pdf', frameon=False, transparent=True, bbox_inches='tight') plt.show() # + chrlen = (100*ref.groupby('space')['end'].max()/ref.groupby('space')['end'].max().sum()) cnv_chr_counts['norm_Cell'] = (cnv_chr_counts.set_index('Space')['Cell']/chrlen).dropna().values df = cnv_chr_counts.pivot(index='Space',columns='Sample', values='norm_Cell').fillna(0).loc[ ref['space'].unique(), xlsx_tab['SAMPLE'], ].drop('$FSAMPLE',axis=1).drop('chrY') a4f = 2.5 width_a4f = 0.4 hight_a4f = 0.35 fig, ax = plt.subplots( figsize=(a4f*width_a4f*8.27,a4f*hight_a4f*11.69), )
'Space=="chr6" & Sample=="XXX" & chrom_fraction>15 & chrom_fraction<50' )['Cell'].unique() lax = None for i, n in enumerate(np.arange(1, 30, 3)): # for i,n in enumerate(np.arange(1,33,3)): ax_cell = fig.add_subplot(gs[n:n + 3, 20:]) showCell(clone_cells[i], give_ax=[ax_cell], alpha=0.5, ms=1) ax_cell.set_ylabel('') ax_cell.yaxis.set_ticks_position('right') ax_cell.tick_params(axis='y', labelsize=smallsize, length=5) if not lax: lax = ax_cell lax.set_xticks( ref.groupby('space', sort=False)['abs_pos'].min().rolling(2).mean()[1:]) lax.set_xticklabels( ref['space'].drop_duplicates()[:-1].str.replace('chr', '').replace( '17', ' 17').replace('19', ' 19').replace('21', ' 21').replace('X', ' X'), fontsize=smallsize, ha='center', rotation=90, ) lax.xaxis.set_ticks_position('top') lax.tick_params(axis='x', bottom=False, length=0, pad=5) lax.set_title('F01 chr6 clonal CNA cells', fontsize=midsize, pad=-5) fig.subplots_adjust(hspace=10, wspace=0.4)
def ideoPlot(chrom,ax): ''' Plot ideo for one chromosome. ''' tag_acen = 1 chr_base = ref.groupby('space').min().loc[chrom,'abs_pos'] rdis = ref.groupby('space').max().loc[chrom,'end']*rf for idx, row in ideo.set_index('#chrom').loc[chrom].reset_index().iterrows(): start = row['chromStart'] + chr_base end = row['chromEnd'] + chr_base gieStain = row['gieStain'] if idx == 0: ax.set_xlim(start-rdis, ax.get_xlim()[1]) v1 = [ ( end, tf ), ( end, bf ), ( start, bf ), ( start-rdis, bf ), ( start-rdis, tf ), ( start, tf ), ( end, tf ), ] p = PathPatch( Path(v1, codes), facecolor=color_dict[gieStain], edgecolor='black', lw=2, zorder=1 ) ax.add_patch(p) elif idx == ideo.set_index('#chrom').loc[chrom].shape[0]-1: ax.set_xlim(ax.get_xlim()[0], end+rdis) v2 = [ ( start, bf ), ( start, tf ), ( end, tf ), ( end+rdis, tf ), ( end+rdis, bf ), ( end, bf ), ( start, bf ), ] p = PathPatch( Path(v2, codes), facecolor=color_dict[gieStain], edgecolor='black', lw=2, zorder=1 ) ax.add_patch(p) elif gieStain == 'acen': if tag_acen == 1: triangle = [ (start, bf), (start, tf), (end, mf) ] tag_acen = 0 elif tag_acen == 0: triangle = [ (end, bf), (end, tf), (start, mf) ] tag_acen = -1 else: print ('Bug at acen!') continue p = Polygon( triangle, facecolor=color_dict[gieStain], edgecolor='black', lw=2, zorder=1 ) ax.add_patch(p) else: p = Rectangle( (start,bf), end-start, height=(tf-bf), facecolor=color_dict[gieStain], edgecolor='black', lw=2, zorder=1 ) ax.add_patch(p) ax.spines['top' ].set_visible(False) ax.spines['bottom'].set_visible(False) ax.spines['left' ].set_visible(False) ax.spines['right' ].set_visible(False) ax.tick_params(bottom=False, length=2, pad=-20) ax.set_ylim(-4,1) ax.set_yticks([]) chrom_bins = chrom_dict[chrom] boundary = max(2, chrom_bins//50) step = (chrom_bins+boundary)//5 ideo_zero = ref.groupby('space').min().loc[chrom,'abs_pos'] ax.set_xticks( ideo_zero + np.arange( 0, (chrom_bins+boundary)+step, step )*1e6 ) ax.set_xticklabels( np.arange( 0, (chrom_bins+boundary)+step, step ), fontsize=midsize, ) ax.set_xlim(ideo_zero-boundary*1e6,ideo_zero+(chrom_bins+boundary)*1e6) return ax
] np.random.seed(0) # - # # Get data max_alpha = 4 max_pie = 40 # + good_cells = good_df.xs(key='copy',axis=1,level=1).columns sample_code_dict = xlsx_tab.set_index('SAMPLE')['CODE'].to_dict() chrom_dict = (ref.groupby('space').max()['end']//1e6).astype(int).to_dict() two_colors = sns.xkcd_palette(['red', 'bright blue']) def name2code(name): code = [] for i in name.split('_'): if i in sample_code_dict: code.append(sample_code_dict[i]) else: code.append(i) return '_'.join(code) # -