Example #1
0
def dmr_cluster_count_by_class(blocks,
                               show_labels=True,
                               set_labels=None,
                               outdir=None,
                               figname="dmr_cluster_count_by_class",
                               ax=None):
    """
    Venn diagram showing the distribution of clusters amongst the classes for a single result.
    :param blocks: dictionary, keyed by class, values are iterables of IDs
    :param outdir: If supplied, write plot to a file.
    :param figname:
    :return:
    """
    created = False
    if ax is None:
        fig = plt.figure()
        ax = fig.add_subplot(111)
        created = True

    # get classes
    # all_classes = dmr_results.classes

    sl, blocks = zip(*blocks.items())

    # blocks = []
    # sl = []
    # for cls in all_classes:
    #     d = dmr_results[cls]
    #     blocks.append(d.keys())
    #     sl.append(cls)

    if set_labels is None:
        set_labels = sl

    if not show_labels:
        set_labels = None

    venn.venn_diagram(*blocks, ax=ax, set_labels=set_labels)

    if created:
        fig.tight_layout()

    if outdir is not None:
        ax.figure.savefig(os.path.join(outdir, "%s.png" % figname), dpi=200)
        ax.figure.savefig(os.path.join(outdir, "%s.pdf" % figname))
    return ax
Example #2
0
def dmr_overlap(dmr_results,
                pids=None,
                comparisons=('matched', 'gibco'),
                comparison_titles=('Isogenic', 'Reference'),
                outdir=None,
                figname='dmr_individual_overlap'):
    """
    Plot a Venn diagram showing the overlap of DMRs in the the individuals. One plot per comparison (default)
    behaviour is to include two comparisons: isogenic and reference.
    This can be limited to a single DMR probe class, or (default) include all.
    :param dmr_results:
    :param pids:
    :param comparisons:
    :param comparison_titles:
    :param outdir:
    :param figname:
    :return:
    """
    if len(comparisons) != len(comparison_titles):
        raise AttributeError(
            "Length of comparisons and comparison_titles must be equal.")
    if pids is None:
        pids = dmr_results.keys()
    fig, axs = plt.subplots(1, len(comparisons), num=figname)

    for i, cmp in enumerate(comparisons):
        inputs = [dmr_results[pid][cmp] for pid in pids]
        # n_level = 3
        # if probe_class is not None:
        #     inputs = [dmr.dict_by_sublevel(t, 2, probe_class) for t in inputs]
        #     n_level = 2
        #     hasher = lambda x: tuple(x)
        # else:
        #     hasher = lambda x: (x[0], x[2])
        #
        # inputs = [
        #     set([hasher(t) for t, _ in dmr.dict_iterator(x, n_level=n_level)]) for x in inputs
        # ]
        venn.venn_diagram(set_labels=pids, ax=axs[i], *inputs)
        axs[i].set_title(comparison_titles[i])

    fig.tight_layout()
    if outdir is not None:
        fig.savefig(os.path.join(outdir, "%s.png" % figname), dpi=200)
        fig.savefig(os.path.join(outdir, "%s.pdf" % figname))
Example #3
0
def venn_dmr_counts(dmr_results,
                    pids=None,
                    outdir=None,
                    figname='dmr_venn',
                    comparisons=('matched', 'gibco'),
                    set_labels=('Isogenic', 'Reference')):
    """
    For each supplied patient (top level of keys in the supplied results), generate 3 Venn diagrams
    - all DMR count
    - hypermethylated DMR counts
    - hypomethylated DMR counts
    In each case, splitting according to `comparisons`
    :param dmr_results: Dictionary of results, e.g. from the results (results_significant) attribute of DmrResults.
    :param outdir:
    :param comparisons: Iterable giving the comparisons to use.
    :param set_labels: Iterable giving the set labels to use for the plot. Coresponds to comparisons.
    :return:
    """
    if len(set_labels) != len(comparisons):
        raise AttributeError(
            "set_labels must be the same length as comparisons")
    if pids is None:
        pids = dmr_results.keys()

    # isogenic vs reference in each patient, all, hyper and hypomethylated
    fig, axs = plt.subplots(3, len(pids), figsize=(11, 7), num=figname)
    for i, pid in enumerate(pids):
        inputs = [dmr_results[pid][k] for k in comparisons]

        inputs_all = [set(x.keys()) for x in inputs]

        inputs_up = [
            set([k for k, v in x.items() if v['median_change'] > 0])
            for x in inputs
        ]
        inputs_down = [
            set([k for k, v in x.items() if v['median_change'] < 0])
            for x in inputs
        ]

        sl = set_labels if i == 0 else None

        venn.venn_diagram(set_labels=sl, ax=axs[0, i], *inputs_all)
        axs[0, i].set_title("GBM%s" % pid)

        venn.venn_diagram(set_labels=sl, ax=axs[1, i], *inputs_up)
        venn.venn_diagram(set_labels=sl, ax=axs[2, i], *inputs_down)

    fig.tight_layout()
    if outdir is not None:
        fig.savefig(os.path.join(outdir, "%s.png" % figname), dpi=200)
        fig.savefig(os.path.join(outdir, "%s_venn.pdf" % figname))
def plot_venn_de_directions(
    logfc,
    set_colours_dict,
    ax=None,
    set_labels=('Hypo', 'Hyper'),
    fontsize=16
):
    if ax is None:
        fig = plt.figure(figsize=(5., 3.3))
        ax = fig.add_subplot(111)

    vv, vs, vc = venn.venn_diagram(
        *[logfc[k].index[logfc[k]['consistent'].astype(bool)] for k in set_labels],
        set_labels=set_labels,
        set_colors=[set_colours_dict[t] for t in set_labels],
        ax=ax
    )
    ax.figure.tight_layout()

    # modify labels based on direction
    this_members = collections.OrderedDict()
    for k in set_labels:
        this_res = logfc[k]
        this_ix = this_res['consistent'].astype(bool)
        this_res = np.sign(this_res.loc[this_ix].astype(float).mean(axis=1))
        this_members["%s up" % k] = this_res.index[this_res > 0].difference(vs['11'])
        this_members["%s down" % k] = this_res.index[this_res < 0].difference(vs['11'])
        # get the corresponding label
        lbl = vv.get_label_by_id(setops.specific_sets(set_labels)[k])
        ## FIXME: font family seems to change from old text to new - related to LaTeX rendering?
        lbl.set_text(
            lbl.get_text()
            + '\n'
            + r'$%d\uparrow$' % len(this_members["%s up" % k])
            + '\n'
            + r'$%d\downarrow$' % len(this_members["%s down" % k]),
        )
    plt.setp(common.get_children_recursive(ax, type_filt=plt.Text), fontsize=fontsize)

    return ax
Example #5
0
    all_gs_dict[('mTOR', )] = mtor_geneset

    for_export = pd.DataFrame(index=range(
        max((len(t) for t in all_gs_dict.values()))),
                              columns=[])
    for k, v in all_gs_dict.items():
        the_key = '_'.join(k)
        for_export.loc[range(len(v)), the_key] = sorted(v)
    for_export.fillna('', inplace=True)
    for_export = for_export.sort_index(axis=1)
    for_export.to_excel(os.path.join(outdir, "gene_sets.xlsx"), index=False)

    # Venn diagram showing various mTOR signature options and overlap between them
    fig, ax = plt.subplots()
    venn.venn_diagram(*mtor_gs_dict.values(),
                      set_labels=mtor_gs_dict.keys(),
                      ax=ax)
    fig.tight_layout()
    ax.set_facecolor('w')
    fig.savefig(os.path.join(outdir, "venn_mtor_genesets.png"), dpi=200)

    basedir = os.path.join(HGIC_LOCAL_DIR, 'current/input_data/tcga')

    brennan_s7_fn = os.path.join(basedir, "brennan_s7.csv")
    brennan_s7 = pd.read_csv(brennan_s7_fn, header=0, index_col=0)

    if rnaseq_type == 'counts':
        rnaseq_dat_fn = os.path.join(basedir, 'rnaseq.xlsx')
        rnaseq_meta_fn = os.path.join(basedir, 'rnaseq.meta.xlsx')
        sheet_name = 'htseq'
        wang_fn = os.path.join(basedir, 'wang_classification',
Example #6
0
                     'methylation.450k.meta.csv'),
        'meth_27k':
        os.path.join(DATA_DIR, 'methylation', 'tcga_gbm', 'primary_tumour',
                     'methylation.27k.meta.csv'),
    }

    meta = {}
    for k, fn in meta_fn.items():
        meta[k] = pd.read_csv(fn, header=0, index_col=0)

    # microarray only
    fig = plt.figure()
    ax = fig.add_subplot(111)
    venn.venn_diagram(meta['marr_u133'].case_id,
                      meta['marr_agilent1'].case_id,
                      meta['marr_agilent2'].case_id,
                      set_labels=('U133', 'Agilent 1', 'Agilent 2'),
                      ax=ax)
    plt.tight_layout(rect=(0, 0, 1., 1.05))
    plt.savefig(os.path.join(outdir, "venn_by_caseid_microarray.png"), dpi=200)

    # rnaseq / methylation
    fig = plt.figure()
    ax = fig.add_subplot(111)
    venn.venn_diagram(meta['rnaseq'].index,
                      meta['meth_450k'].index,
                      meta['meth_27k'].index,
                      set_labels=('RNA-Seq', 'Methylation 450K',
                                  'Methylation 27K'),
                      ax=ax)
    plt.tight_layout(rect=(0, 0, 1.05, 1.05))
    de_in_all = reference_genomes.ensembl_to_gene_symbol(
        setops.reduce_intersection(
            *[t.index for t in de_res_separate.values()]))
    # sort this by the avg logFC
    logfc_in_all = pd.DataFrame.from_dict(
        dict([(p, v.loc[de_in_all.index, 'logFC'])
              for p, v in de_res_separate.items()]))
    logfc_in_all = logfc_in_all.loc[logfc_in_all.mean(
        axis=1).abs().sort_values(ascending=False).index]
    general.add_gene_symbols_to_ensembl_data(logfc_in_all)

    fig = plt.figure(figsize=(5, 5))
    ax = fig.add_subplot(111, facecolor='w')
    venn.venn_diagram(set_labels=de_res_separate.keys(),
                      *[t.index for t in de_res_separate.values()],
                      ax=ax)
    fig.tight_layout()
    fig.savefig(os.path.join(outdir,
                             "de_nsc_ss2-polya_combined_dispersion.png"),
                dpi=200)

    fig, axs = plt.subplots(nrows=2,
                            ncols=2,
                            sharex=True,
                            sharey=True,
                            figsize=(10, 10))
    fig.tight_layout()
    for i, p in enumerate(pids):
        ax = axs.flat[i]
        sp = sample_pairs[p]
    gl261_bmdm = pd.read_csv(os.path.join(
        indir, 'gl261_bmdm_vs_healthy_monocyte.csv'),
                             header=0,
                             index_col=0)
    gemm_mg = pd.read_csv(os.path.join(indir, 'gemm_mg_vs_healthy_mg.csv'),
                          header=0,
                          index_col=0)
    gemm_bmdm = pd.read_csv(os.path.join(indir,
                                         'gemm_bmdm_vs_healthy_monocyte.csv'),
                            header=0,
                            index_col=0)

    fig = plt.figure()
    ax = fig.add_subplot(111)
    v, sets, counts = venn.venn_diagram(gl261_mg.index,
                                        gemm_mg.index,
                                        set_labels=("GL261 MG", "GEMM MG"),
                                        ax=ax)
    fig.tight_layout()
    fig.savefig(os.path.join(outdir, "gl261_mg-gemm_mg.png"), dpi=200)
    fig.savefig(os.path.join(outdir, "gl261_mg-gemm_mg.pdf"))

    fig = plt.figure()
    ax = fig.add_subplot(111)
    v, sets, counts = venn.venn_diagram(gl261_bmdm.index,
                                        gemm_bmdm.index,
                                        set_labels=("GL261 BMDM", "GEMM BMDM"),
                                        ax=ax)
    fig.tight_layout()
    fig.savefig(os.path.join(outdir, "gl261_bmdm-gemm_bmdm.png"), dpi=200)
    fig.savefig(os.path.join(outdir, "gl261_bmdm-gemm_bmdm.pdf"))
                               vert=True,
                               patch_artist=True,
                               medianprops=medianprops,
                               widths=0.7)
            ax.set_xticks([])
            for p, c in zip(bplot['boxes'], colours):
                p.set_facecolor(c)

        # add third column with Venn diagrams
        ax = fig.add_subplot(gs[ax_i + 1, 2],
                             facecolor='none',
                             frame_on=False,
                             xticks=[],
                             yticks=[])
        v = venn.venn_diagram(*u_hypo.values(),
                              ax=ax,
                              set_labels=u_hypo.keys(),
                              set_colors=[colours[1]] * 2)
        ax = fig.add_subplot(gs[ax_i, 2],
                             facecolor='none',
                             frame_on=False,
                             xticks=[],
                             yticks=[])
        v = venn.venn_diagram(*u_hyper.values(),
                              ax=ax,
                              set_labels=u_hyper.keys(),
                              set_colors=[colours[0]] * 2,
                              alpha=0.7)

    for i, (k1, obj) in enumerate(to_plot.items()):
        axs[i, 0].set_ylabel(k1)
Example #10
0
    cols = []
    for k, nm in zip(['res_1', 'res_2', 'res_4'], ['original', 'original_filter', 'group_dispersion']):
        for m in methods:
            cols.append("%s_%s" % (nm, m))
    n_pair_only_intersect = pd.DataFrame(index=pids, columns=cols)

    for k, nm in zip(['res_1', 'res_2', 'res_4'], ['original', 'original_filter', 'group_dispersion']):
        for m in methods:
            this_res = to_save[k][m]

            # number of DE genes in Venn diagrams
            fig, axs = plt.subplots(nrows=3, ncols=4, figsize=(10, 8))
            for i, pid in enumerate(pids):
                # number DE in the refs
                ax = axs.flat[i]
                venn.venn_diagram(*[this_res[pid][t].index for t in ['iNSC'] + refs], set_labels=['iNSC'] + refs, ax=ax)
                ax.set_title(pid, fontsize=16)
            for i in range(len(pids), 12):
                ax = axs.flat[i]
                ax.set_visible(False)
            fig.subplots_adjust(left=0.02, right=0.98, bottom=0.02, top=0.95)
            fig.savefig(os.path.join(outdir, "number_de_genes_ref_comparison_%s_%s.png" % (nm, m)), dpi=200)

            # number of PO genes in Venn diagrams
            fig, axs = plt.subplots(nrows=3, ncols=4, figsize=(10, 6))
            for i, pid in enumerate(pids):
                a = this_res[pid]['iNSC'].index
                po = []
                for ref in refs:
                    b = this_res[pid][ref].index
                    vs, vc = setops.venn_from_arrays(a, b)
Example #11
0
    # Venn diagrams (two ESC studies)
    s1 = ['_'.join(t) for t in zip(pids, ['ours'] * len(pids))]
    s2 = ['_'.join(t) for t in zip(['N2', '50'], ['kogut'] * 2)]
    fig, axs = plt.subplots(ncols=3, nrows=3)
    i = 0
    for s in s1 + s2:
        this_arr = []
        for r in ref_labels:
            k = ('iPSC_%s' % s, 'ESC_%s' % r)
            if k in de_res_sign:
                this_arr.append(de_res_sign[k])
        if len(this_arr):
            print "Found comparison %s" % s
            venn.venn_diagram(*[t.index for t in this_arr],
                              ax=axs.flat[i],
                              set_labels=ref_labels)
            axs.flat[i].set_title(s)
            i += 1
        else:
            print "No comparison %s" % s

    for j in range(i, axs.size):
        axs.flat[i].axis('off')

    fig.subplots_adjust(left=0.05, right=0.98, bottom=0.05, top=0.95)
    fig.savefig(os.path.join(outdir, "ipsc_esc_venn.png"), dpi=200)

    # core DE genes
    ipsc_vs_esc = dict([(k, v) for k, v in de_res_sign.items()
                        if re.search('|'.join(ref_labels), k[1])
                              legend=False,
                              ax=ax)
        if i == 0:
            ax.set_ylabel('% DMRs')
        else:
            ax.yaxis.set_visible(False)

    fig.tight_layout()
    fig.savefig(os.path.join(outdir, "dmr_direction_all_groups.png"), dpi=200)
    fig.savefig(os.path.join(outdir, "dmr_direction_all_groups.tiff"), dpi=200)

    # is there much overlap in the gene sets between the two groups?
    fig = plt.figure(figsize=(5, 3))
    ax = fig.add_subplot(111)
    venn.venn_diagram(genes_from_dmr_groups['Hyper'],
                      genes_from_dmr_groups['Hypo'],
                      set_labels=['Hyper', 'Hypo'],
                      ax=ax)
    fig.tight_layout()
    fig.savefig(os.path.join(outdir, "genes_from_dmr_groups_venn.png"),
                dpi=200)

    # no, but if we look at the intersection genes, are they in different directions (DE) between the two groups?
    groups_inv = dictionary.complement_dictionary_of_iterables(groups,
                                                               squeeze=True)
    in_both = setops.reduce_intersection(*genes_from_dmr_groups.values())
    in_both_ens = reference_genomes.gene_symbol_to_ensembl(in_both)

    # some of these will have no DE results
    tmp = {}
    for pid in pids:
        tmp[pid] = de_res_s1[pid].reindex(in_both_ens).logFC.dropna()