Beispiel #1
0
def run_embedding_proc(outdir,
                       plot_configs,
                       desc,
                       tsne_embeds_dict,
                       prefix=None):
    '''Create a scatter plot for each embedding in tsne_embeds_dict
    '''
    plotting_df, legend_kwargs_list, default_kwargs = plot_configs
    if plotting_df is None: return
    for (pp, lr), embed_df in tsne_embeds_dict.items():
        if prefix is None:
            basename = 'tsne_embeds_pp_%d_lr_%d.png' % (pp, lr)
        else:
            basename = '%s_tsne_embeds_pp_%d_lr_%d.png' % (prefix, pp, lr)
        outpath = os.path.join(outdir, basename)
        title = '%s tSNE(perplexity=%d, learning_rate=%d)' % (desc.title(), pp,
                                                              lr)
        fig, ax = plt.subplots()
        plot_utils.plot_embeddings(embed_df,
                                   plotting_df,
                                   default_kwargs=default_kwargs,
                                   legend_kwargs_list=legend_kwargs_list,
                                   ax=ax,
                                   title=title)
        plot_utils.save(outpath, do_pdf=DO_PDF)
        plt.close()
    return
Beispiel #2
0
def plot_heatmap(outpath, df, sample_linkage, sample_colors, event_linkage,
                 desc, sample_color_lut):

    assert desc.lower().startswith('altsplice') or desc.lower().startswith(
        'expression')
    is_altsplice = desc.lower().startswith('altsplice')

    sys.setrecursionlimit(100000)
    print "Plotting data ... "
    graph = sns.clustermap(df.T,
                           col_colors=sample_colors,
                           col_linkage=sample_linkage,
                           row_linkage=event_linkage,
                           cmap=sns.cubehelix_palette(as_cmap=True))
    graph.ax_heatmap.axis('off')
    graph.ax_col_dendrogram.set_title("%s Clustering" %
                                      ' '.join(desc.split('_')).title())
    graph.ax_heatmap.set_xlabel("Events")
    graph.ax_heatmap.set_ylabel("Samples")
    if is_altsplice: graph.cax.set_title("psi")
    else: graph.cax.set_title("log(counts)")
    add_legend(graph, sample_color_lut)
    plot_utils.save(outpath)
    return
    plt.ylabel('$\Delta$ test accuracy')
    plt.legend()
    plt.tight_layout()
    plt.title(
        'ResNet-50 $\Delta$ test accuracy after {idx}{suffix} pruning iteration ({sparsity:.2%} sparsity)'
        .format(
            idx=it,
            suffix=suffix_of_number(it),
            sparsity=1 - density,
        ))
    vals = plt.gca().get_yticks()
    plt.gca().set_yticklabels(['{:,.2%}'.format(x) for x in vals])

    ticks1 = np.linspace(0, 90, 11)
    int_ticks1 = [round(i) for i in ticks1]
    plt.gca().set_xticks(int_ticks1)

    vals = plt.gca().get_xticks()
    plt.gca().set_xticklabels(
        [r'${} \times {}$'.format(int(x), it) if x > 0 else '0' for x in vals])

    plt.tight_layout()

    if plot_utils.save():
        plt.savefig(
            os.path.join('results_iterative',
                         'resnet50_{}'.format(it) + '.png'))

if plot_utils.show():
    plt.show()
Beispiel #4
0
        if not os.path.exists(outdir): os.makedirs(outdir)
        desc = 'Expression'
        if NORM: desc = 'Normalized ' + desc
        try:
            df = utils.load_large_df(path.replace('.tsv', ''))
        except IOError:
            df = pd.read_csv(path, sep='\t', index_col=0)

        df.iloc[:] = np.minimum(df.values, np.percentile(df.values, 99, axis=0))
        keep_cols = filter_to_high_var(df.values, df.columns, MAX_EVENTS)
        df = df.iloc[:, keep_cols]
        metadata_df = utils.load_metadata_df(config.metadata_path, df.index)
        medians = collapse_to_median(df, metadata_df['cnc'])
        heatmap_dists_with_dendro(medians, norm=NORM)
        outpath = os.path.join(outdir, desc.lower().replace(' ', '_') +'_rep_dists_heatmap.png')
        plot_utils.save(outpath, do_pdf=True)

    # Add AltSplice
    if False:
        altsplice_event_list= ['alt_3prime', 'alt_5prime', 'intron_retention', 'exon_skip']
        for event in altsplice_event_list:
            path = os.path.join(config.embed_dir, 'altsplice', event, 'data.tsv')
            outdir = os.path.join(config.plot_dir, 'altsplice', event, 'heatmap')
            if not os.path.exists(outdir): os.makedirs(outdir)
            desc = 'AltSplice %s'%event.title()
            if NORM: desc = 'Normalized ' + desc
            print desc
            print "Loading %s" %path
            try:
                df = utils.load_large_df(path.replace('.tsv', ''))
            except IOError:
Beispiel #5
0
def main(embed_dir, plot_dir, desc):
    '''Runs all tasks on a single embedding.

    embed_dir: location of pca & tsne embeddings
    plot_dir: directory to write plots
    desc: identifies embedding (used for e.g. plot titles)
    '''
    assert os.path.exists(embed_dir), embed_dir
    if not os.path.exists(plot_dir): os.makedirs(plot_dir)

    rnadeg_df = utils.load_rnadeg(config.rnadeg_path) if DEGSCORE else None
    libsize_df = utils.load_libsize(config.libsize_path) if LIBSIZE else None

    pca_model, pca_embeds, tsne_embeds_dict = utils.load_embeds(
        embed_dir,
        whitelist=WHITELIST,
        pp_set=TSNE_PP_PLOT_SET,
        lr_set=TSNE_LR_PLOT_SET)

    metadata_df = utils.load_metadata_df(config.metadata_path,
                                         pca_embeds.index)
    metadata_df, subtype_names = utils.append_subtype(config.subtype_path,
                                                      metadata_df)

    if ALL_TASKS or COLOR_CNC:
        outdir = os.path.join(plot_dir, 'complete')
        if not os.path.exists(outdir): os.makedirs(outdir)
        plot_configs = plot_args.cnc_plotting_args(metadata_df)
        run_embedding_proc(outdir, plot_configs, desc, tsne_embeds_dict)

    if ALL_TASKS or HL_CNC is not None:
        if ALL_TASKS or HL_CNC == 'all':
            cnc_list = np.unique(metadata_df['cnc'].values)
        else:
            cnc_list = HL_CNC
        cnc_groups = metadata_df.groupby('cnc')
        for cnc in cnc_list:
            cnc_index = cnc_groups.get_group(cnc).index
            other_index = np.setdiff1d(metadata_df.index, cnc_index)
            for subtype in subtype_names:
                if DEBUG: print cnc, subtype
                subtype_configs = plot_args.load_subtype_color_tumor_marker_kwargs(
                    metadata_df, subtype, cnc)
                outdir = os.path.join(plot_dir, 'highlights_subtype', cnc)
                if not os.path.exists(outdir): os.makedirs(outdir)
                subtype_desc = ' '.join([subtype, desc])
                run_embedding_proc(outdir,
                                   subtype_configs,
                                   subtype_desc,
                                   tsne_embeds_dict,
                                   prefix=subtype)
        pass

    if ALL_TASKS or TUMOR_NORMAL:
        if DEBUG: print "start tumor normal"
        tn_configs = plot_args.tumor_normal_plotting_args(metadata_df)
        outdir = os.path.join(plot_dir, 'complete_tumor_normal')
        if not os.path.exists(outdir): os.makedirs(outdir)
        tn_desc = '%s Tumor/Normal' % desc
        run_embedding_proc(outdir, tn_configs, tn_desc, tsne_embeds_dict)

    if ALL_TASKS or LIBSIZE:
        if DEBUG: print " start libsize"
        cbar_title = libsize_df.columns[0]
        outdir = os.path.join(plot_dir, 'qc', 'libsize')
        if not os.path.exists(outdir): os.makedirs(outdir)
        for (pp, lr), embed_df in tsne_embeds_dict.items():
            outpath = os.path.join(outdir,
                                   'tsne_embeds_pp_%d_lr_%d.png' % (pp, lr))
            axis_title = '%s Library Size Effects\ntSNE(perplexity=%d, learning_rate=%d)' % (
                desc.title(), pp, lr)
            fig, ax = plt.subplots()
            plot_utils.plot_continuous_color_embeddings(embed_df,
                                                        libsize_df.iloc[:, 0],
                                                        ax=ax,
                                                        axis_title=axis_title,
                                                        cbar_title=cbar_title)
            plot_utils.save(outpath, do_pdf=DO_PDF)

    if ALL_TASKS or DEGSCORE:
        if DEBUG: print " start degscore"
        outdir = os.path.join(plot_dir, 'qc', 'degscore')
        if not os.path.exists(outdir): os.makedirs(outdir)
        cbar_title = rnadeg_df.columns[0]
        for (pp, lr), embed_df in tsne_embeds_dict.items():
            outpath = os.path.join(outdir,
                                   'tsne_embeds_pp_%d_lr_%d.png' % (pp, lr))
            fig, ax = plt.subplots()
            axis_title = '%s RNADeg Effects\ntSNE(perplexity=%d, learning_rate=%d)' % (
                desc.title(), pp, lr)
            plot_utils.plot_continuous_color_embeddings(embed_df,
                                                        rnadeg_df.iloc[:, 0],
                                                        ax=ax,
                                                        axis_title=axis_title,
                                                        cbar_title=cbar_title)
            plot_utils.save(outpath, do_pdf=DO_PDF)