def run_embedding_proc(outdir, plot_configs, desc, tsne_embeds_dict, prefix=None): '''Create a scatter plot for each embedding in tsne_embeds_dict ''' plotting_df, legend_kwargs_list, default_kwargs = plot_configs if plotting_df is None: return for (pp, lr), embed_df in tsne_embeds_dict.items(): if prefix is None: basename = 'tsne_embeds_pp_%d_lr_%d.png' % (pp, lr) else: basename = '%s_tsne_embeds_pp_%d_lr_%d.png' % (prefix, pp, lr) outpath = os.path.join(outdir, basename) title = '%s tSNE(perplexity=%d, learning_rate=%d)' % (desc.title(), pp, lr) fig, ax = plt.subplots() plot_utils.plot_embeddings(embed_df, plotting_df, default_kwargs=default_kwargs, legend_kwargs_list=legend_kwargs_list, ax=ax, title=title) plot_utils.save(outpath, do_pdf=DO_PDF) plt.close() return
def plot_heatmap(outpath, df, sample_linkage, sample_colors, event_linkage, desc, sample_color_lut): assert desc.lower().startswith('altsplice') or desc.lower().startswith( 'expression') is_altsplice = desc.lower().startswith('altsplice') sys.setrecursionlimit(100000) print "Plotting data ... " graph = sns.clustermap(df.T, col_colors=sample_colors, col_linkage=sample_linkage, row_linkage=event_linkage, cmap=sns.cubehelix_palette(as_cmap=True)) graph.ax_heatmap.axis('off') graph.ax_col_dendrogram.set_title("%s Clustering" % ' '.join(desc.split('_')).title()) graph.ax_heatmap.set_xlabel("Events") graph.ax_heatmap.set_ylabel("Samples") if is_altsplice: graph.cax.set_title("psi") else: graph.cax.set_title("log(counts)") add_legend(graph, sample_color_lut) plot_utils.save(outpath) return
plt.ylabel('$\Delta$ test accuracy') plt.legend() plt.tight_layout() plt.title( 'ResNet-50 $\Delta$ test accuracy after {idx}{suffix} pruning iteration ({sparsity:.2%} sparsity)' .format( idx=it, suffix=suffix_of_number(it), sparsity=1 - density, )) vals = plt.gca().get_yticks() plt.gca().set_yticklabels(['{:,.2%}'.format(x) for x in vals]) ticks1 = np.linspace(0, 90, 11) int_ticks1 = [round(i) for i in ticks1] plt.gca().set_xticks(int_ticks1) vals = plt.gca().get_xticks() plt.gca().set_xticklabels( [r'${} \times {}$'.format(int(x), it) if x > 0 else '0' for x in vals]) plt.tight_layout() if plot_utils.save(): plt.savefig( os.path.join('results_iterative', 'resnet50_{}'.format(it) + '.png')) if plot_utils.show(): plt.show()
if not os.path.exists(outdir): os.makedirs(outdir) desc = 'Expression' if NORM: desc = 'Normalized ' + desc try: df = utils.load_large_df(path.replace('.tsv', '')) except IOError: df = pd.read_csv(path, sep='\t', index_col=0) df.iloc[:] = np.minimum(df.values, np.percentile(df.values, 99, axis=0)) keep_cols = filter_to_high_var(df.values, df.columns, MAX_EVENTS) df = df.iloc[:, keep_cols] metadata_df = utils.load_metadata_df(config.metadata_path, df.index) medians = collapse_to_median(df, metadata_df['cnc']) heatmap_dists_with_dendro(medians, norm=NORM) outpath = os.path.join(outdir, desc.lower().replace(' ', '_') +'_rep_dists_heatmap.png') plot_utils.save(outpath, do_pdf=True) # Add AltSplice if False: altsplice_event_list= ['alt_3prime', 'alt_5prime', 'intron_retention', 'exon_skip'] for event in altsplice_event_list: path = os.path.join(config.embed_dir, 'altsplice', event, 'data.tsv') outdir = os.path.join(config.plot_dir, 'altsplice', event, 'heatmap') if not os.path.exists(outdir): os.makedirs(outdir) desc = 'AltSplice %s'%event.title() if NORM: desc = 'Normalized ' + desc print desc print "Loading %s" %path try: df = utils.load_large_df(path.replace('.tsv', '')) except IOError:
def main(embed_dir, plot_dir, desc): '''Runs all tasks on a single embedding. embed_dir: location of pca & tsne embeddings plot_dir: directory to write plots desc: identifies embedding (used for e.g. plot titles) ''' assert os.path.exists(embed_dir), embed_dir if not os.path.exists(plot_dir): os.makedirs(plot_dir) rnadeg_df = utils.load_rnadeg(config.rnadeg_path) if DEGSCORE else None libsize_df = utils.load_libsize(config.libsize_path) if LIBSIZE else None pca_model, pca_embeds, tsne_embeds_dict = utils.load_embeds( embed_dir, whitelist=WHITELIST, pp_set=TSNE_PP_PLOT_SET, lr_set=TSNE_LR_PLOT_SET) metadata_df = utils.load_metadata_df(config.metadata_path, pca_embeds.index) metadata_df, subtype_names = utils.append_subtype(config.subtype_path, metadata_df) if ALL_TASKS or COLOR_CNC: outdir = os.path.join(plot_dir, 'complete') if not os.path.exists(outdir): os.makedirs(outdir) plot_configs = plot_args.cnc_plotting_args(metadata_df) run_embedding_proc(outdir, plot_configs, desc, tsne_embeds_dict) if ALL_TASKS or HL_CNC is not None: if ALL_TASKS or HL_CNC == 'all': cnc_list = np.unique(metadata_df['cnc'].values) else: cnc_list = HL_CNC cnc_groups = metadata_df.groupby('cnc') for cnc in cnc_list: cnc_index = cnc_groups.get_group(cnc).index other_index = np.setdiff1d(metadata_df.index, cnc_index) for subtype in subtype_names: if DEBUG: print cnc, subtype subtype_configs = plot_args.load_subtype_color_tumor_marker_kwargs( metadata_df, subtype, cnc) outdir = os.path.join(plot_dir, 'highlights_subtype', cnc) if not os.path.exists(outdir): os.makedirs(outdir) subtype_desc = ' '.join([subtype, desc]) run_embedding_proc(outdir, subtype_configs, subtype_desc, tsne_embeds_dict, prefix=subtype) pass if ALL_TASKS or TUMOR_NORMAL: if DEBUG: print "start tumor normal" tn_configs = plot_args.tumor_normal_plotting_args(metadata_df) outdir = os.path.join(plot_dir, 'complete_tumor_normal') if not os.path.exists(outdir): os.makedirs(outdir) tn_desc = '%s Tumor/Normal' % desc run_embedding_proc(outdir, tn_configs, tn_desc, tsne_embeds_dict) if ALL_TASKS or LIBSIZE: if DEBUG: print " start libsize" cbar_title = libsize_df.columns[0] outdir = os.path.join(plot_dir, 'qc', 'libsize') if not os.path.exists(outdir): os.makedirs(outdir) for (pp, lr), embed_df in tsne_embeds_dict.items(): outpath = os.path.join(outdir, 'tsne_embeds_pp_%d_lr_%d.png' % (pp, lr)) axis_title = '%s Library Size Effects\ntSNE(perplexity=%d, learning_rate=%d)' % ( desc.title(), pp, lr) fig, ax = plt.subplots() plot_utils.plot_continuous_color_embeddings(embed_df, libsize_df.iloc[:, 0], ax=ax, axis_title=axis_title, cbar_title=cbar_title) plot_utils.save(outpath, do_pdf=DO_PDF) if ALL_TASKS or DEGSCORE: if DEBUG: print " start degscore" outdir = os.path.join(plot_dir, 'qc', 'degscore') if not os.path.exists(outdir): os.makedirs(outdir) cbar_title = rnadeg_df.columns[0] for (pp, lr), embed_df in tsne_embeds_dict.items(): outpath = os.path.join(outdir, 'tsne_embeds_pp_%d_lr_%d.png' % (pp, lr)) fig, ax = plt.subplots() axis_title = '%s RNADeg Effects\ntSNE(perplexity=%d, learning_rate=%d)' % ( desc.title(), pp, lr) plot_utils.plot_continuous_color_embeddings(embed_df, rnadeg_df.iloc[:, 0], ax=ax, axis_title=axis_title, cbar_title=cbar_title) plot_utils.save(outpath, do_pdf=DO_PDF)