Esempio n. 1
0
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/NAR_review_data/'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'hg19_mcf7_pie_charts')

    yzer.legend_location = 'lower left'
    pie1 = '''Annotated by RefSeq and/or ncRNA.org    14,022
Unannotated    67,046'''
    pie1 = [row.split('    ') for row in pie1.split('\n')]
    pie1 = zip(*pie1)
    yzer.piechart(map(lambda s: int(s.replace(',', '')), pie1[1]),
                  pie1[0],
                  title='Hah et al MCF-7 Transcripts\nwith Score >= 1',
                  save_dir=img_dirpath,
                  show_plot=True)

    pie2 = '''Promoter-associated RNA    7,055
Antisense of RefSeq    7,539
Other RefSeq Proximal    13,664
Distal with H3K4me2    2,352
Distal w/in 2kbp of H3K4me2    5,524
Distal remainder with LINE    16,292
Remainder    14,620'''
    pie2 = [row.split('    ') for row in pie2.split('\n')]
    pie2 = zip(*pie2)
    yzer.legend_columns = 2
    yzer.piechart(
        map(lambda s: int(s.replace(',', '')), pie2[1]),
Esempio n. 2
0
                 ]
 
 in_dex_no_p65 = dataset[(dataset['gr_dex_tag_count'] > min_tags) &
                          (dataset['p65_kla_tag_count'] + dataset['p65_kla_dex_tag_count'] <= min_tags)
                  ]
 
 kla_only_no_p65 = dataset[(dataset['gr_dex_tag_count'] <= min_tags) &
                          (dataset['gr_kla_dex_tag_count'] > min_tags) &
                          (dataset['p65_kla_tag_count'] + dataset['p65_kla_dex_tag_count'] <= min_tags)
                  ]
 
 sets = [tethered, direct_comp_gr, indirect_comp_gr, 
                    direct_comp_p65, cobound,
                    direct_novel, indirect_novel,
                    in_dex_no_p65, kla_only_no_p65]
 id_sets = [d['nearest_refseq_transcript_id'].unique() for d in sets]
 for id_set in id_sets: total_gr = total_gr - set(id_set)
 counts = [len(id_set) for id_set in id_sets] + [len(total_gr)] 
 
 labels = ['Tethered', 'Direct competition, favor to GR', 'Indirect competition, favor to GR',
           'Direct competition, favor to p65', 'Directly co-bound without loss', 
           'Directly bound novel p65 site', 'Indirectly bound novel p65 site', 
           'Has GR in Dex, no p65', 'Has GR in KLA+Dex only, no p65',
           'Other with GR']
 if draw_pies:
     yzer.piechart(counts, labels,
              title='Genes near Enhancer-like Subsets {0} with GR\nby Putative Enhancer Mechanism'.format(name.title()), 
              small_legend=True,
              save_dir=img_dirpath, show_plot=True)
 
 
Esempio n. 3
0
        yzer.boxplot([gp['naive_foxo1_tag_count'] for gp in groups],
                     labels,
                     title='Foxo1 tags in ATAC-seq regions by group',
                     ylabel='Foxo1 peak tag count', save_dir=save_path,
                     show_plot=False)
        yzer.boxplot([gp['lcmv_d12_foxo1_tag_count'] for gp in groups],
                     labels,
                     title='LCMV d12 Foxo1 tags in ATAC-seq regions by group',
                     ylabel='Foxo1 peak tag count', save_dir=save_path,
                     show_plot=False)

    if True:
        for i, gp in enumerate(groups):
            yzer.piechart([sum(gp['naive_foxo1_tag_count'] >= min_thresh),
                           sum(gp['naive_foxo1_tag_count'] < min_thresh)],
                          ['With Foxo1', 'Without Foxo1'],
                          title='Co-occurrence with Foxo1- ' + labels[i],
                          save_dir=save_path, show_plot=False)
            yzer.piechart([sum(gp['lcmv_d12_foxo1_tag_count'] >= min_thresh),
                           sum(gp['lcmv_d12_foxo1_tag_count'] < min_thresh)],
                          ['With Foxo1', 'Without Foxo1'],
                          title='Co-occurrence with LCMV d12 Foxo1- ' +
                          labels[i],
                          save_dir=save_path, show_plot=False)
            yzer.histogram(gp['naive_foxo1_tag_count'].tolist(), bins=20,
                           title='Foxo1 peak tag count distribution- ' +
                           labels[i],
                           xlabel='Tag count in Foxo1 peak',
                           ylabel='Number of peaks',
                           save_dir=save_path, show_plot=False)
Esempio n. 4
0
if __name__ == '__main__':
    yzer = SeqGrapher()

    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/CD4TCells/Oshea_enhancers/ctcf_stat1_overlap'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'figures')

    data = yzer.import_file(
        yzer.get_filename(dirpath, 'ctcf_with_stat1_binding.txt')).fillna(0)
    with_stat1 = data[data['p2_tag_count'] > 0]
    without_stat1 = data[data['p2_tag_count'] == 0]

    if True:
        ax = yzer.piechart(
            [len(with_stat1), len(without_stat1)],
            ['CTCF sites with STAT1', 'CTCF sites without STAT1'],
            title='DP Thymocyte CTCF Sites with STAT1 in Th1 Cells',
            save_dir=img_dirpath,
            show_plot=True)
    data['tag_count_nonzero'] = nonzero(data['tag_count'])
    data['p2_tag_count_nonzero'] = nonzero(data['p2_tag_count'])
    ax = yzer.scatterplot(
        data,
        'tag_count_nonzero',
        'p2_tag_count_nonzero',
        xlabel='CTCF Tag Count',
        ylabel='Stat1 Tag Count',
        log=True,
        color='blue',
        title='Tags in CTCF Peaks versus Overlapping Stat1 Peaks',
        show_2x_range=False,
        show_legend=False,
Esempio n. 5
0
            & (refseq_no_runoff['refseq'] == 't')
            & (refseq_no_runoff['percent_covered'] < 1.5)]
        if False:
            yzer.histogram(mrna_with_runoff['percent_covered'])
            yzer.histogram(mrna_no_runoff['percent_covered'])

            relationships = ['is contained by', 'contains', 'overlaps with']
            with_runoff_counts = [
                sum(mrna_with_runoff['relationship'] == rel)
                for rel in relationships
            ]
            no_runoff_counts = [
                sum(mrna_no_runoff['relationship'] == rel)
                for rel in relationships
            ]
            yzer.piechart(with_runoff_counts, labels=relationships)
            yzer.piechart(no_runoff_counts, labels=relationships)

    if True:
        # Filter down to high-expression genes
        def distance_to_reg_end(row):
            if row['strand'] == 0:
                # RefSeq annotated end - transcript end; pos if Refseq is longer
                distance = row['transcription_end(2)'] - row[
                    'transcription_end']
            elif row['strand'] == 1:
                # transcript start - RefSeq annotated start; pos if Refseq is longer
                distance = row['transcription_start'] - row[
                    'transcription_start(2)']
            return distance
        transrepressed = data[(data['kla_1_lfc_trans'] >= 1)
                              & (data['dex_over_kla_1_lfc_trans'] <= -.58)]
        not_trans = data[(data['kla_1_lfc_trans'] < 1) |
                         (data['dex_over_kla_1_lfc_trans'] > -.58)]
        up_in_kla = data[(data['kla_1_lfc_trans'] >= 1)
                         & (data['dex_over_kla_1_lfc_trans'] > -.58)]

    supersets = (('All', data), ('Not near transrepressed genes', not_trans),
                 ('Up in KLA', up_in_kla), ('Near transrepressed genes',
                                            transrepressed))

    # Plot trans versus not
    if draw_pies:
        yzer.piechart([len(d) for d in zip(*supersets[1:])[1]],
                      zip(*supersets[1:])[0],
                      title='Enhancer-like Subsets by state in KLA+Dex',
                      save_dir=img_dirpath,
                      show_plot=False)

    tfs = [('PU.1', 'pu_1'), ('p65', 'p65'), ('GR', 'gr')]
    contexts = [('DMSO', ''), ('Dex', 'dex'), ('KLA', 'kla'),
                ('KLA+Dex', 'kla_dex')]

    for name, dataset in supersets:
        total_for_set = len(dataset)

        # Have GR
        dataset = dataset[dataset['gr_dex_tag_count'] +
                          dataset['gr_kla_dex_tag_count'] > min_tags]
        total_gr = len(dataset)
        if draw_pies:
Esempio n. 7
0
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/Demo-data'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'figure_4_pie_charts')

    yzer.legend_location = 'lower left'
    pie1 = '''Annotated by RefSeq and/or ncRNA.org    16,945
Unannotated    36,578'''
    pie1 = [row.split('    ') for row in pie1.split('\n')]
    pie1 = zip(*pie1)
    yzer.piechart(map(lambda s: int(s.replace(',', '')), pie1[1]),
                  pie1[0],
                  title='Transcripts with Score >= 2',
                  save_dir=img_dirpath,
                  show_plot=True)

    pie2 = '''Promoter-associated RNA    6,314
Antisense  of RefSeq    5,604
Post-TTS, same-strand    6,940
Other RefSeq Proximal    3,119
Distal with H3K4me1    7,458
Distal within 2kbp of H3K4me1    1,639
Remainder    5,504'''
    pie2 = [row.split('    ') for row in pie2.split('\n')]
    pie2 = zip(*pie2)
    yzer.legend_columns = 2
    yzer.piechart(map(lambda s: int(s.replace(',', '')), pie2[1]),
                  pie2[0],
Esempio n. 8
0
            '''.format(total,
                       acetylated, (acetylated / total) * 100,
                       foxp3, (foxp3 / total) * 100,
                       both, (both / total) * 100,
                       k=k)
            print summary

            # Draw pie for each group, showing % with foxp3, % with ac, and % with both
            relevant_cells = ', '.join([s.title() for s in k.split('_')])
            counts = [acetylated - both, both, foxp3 - both, none]
            grapher.piechart(
                counts=counts,
                labels=[
                    'Has H3K27Ac in Tregs', 'Has Both', 'Has FoxP3 in Tregs',
                    'Has Neither'
                ],
                title='FoxP3 and H3K27Ac at Enhancers\nwith H3K4me2 in {}'.
                format(relevant_cells),
                small_legend=False,
                colors=['#FFFB97', '#D5F0CB', '#ABE4FF', 'white'],
                save_dir=graph_dirpath,
                show_plot=False)

        if True:
            data['with_foxp3'] = data['treg'][
                data['treg']['foxp3_tag_count'] >= min_score]
            data['without_foxp3'] = data['treg'][
                data['treg']['foxp3_tag_count'] < min_score]

            for k in ('with_foxp3', ):
                first_peak = 'treg'
                subset = data[k]
Esempio n. 9
0
    for celltype in ('hi', 'lo'):
        d7 = datasets['klrg{}_d7'.format(celltype)]
        de_novo = d7[d7['d0_tag_count'] < min_thresh]

        all_shared = d7[
            'foxo1_ko_klrg{}_d7_tag_count'.format(celltype)] >= min_thresh
        all_not_shared = d7[
            'foxo1_ko_klrg{}_d7_tag_count'.format(celltype)] < min_thresh
        shared = de_novo[
            'foxo1_ko_klrg{}_d7_tag_count'.format(celltype)] >= min_thresh
        not_shared = de_novo[
            'foxo1_ko_klrg{}_d7_tag_count'.format(celltype)] < min_thresh

        labels = ['Also in Foxo1 KO', 'Not in Foxo1 KO']
        yzer.piechart([sum(all_shared), sum(all_not_shared)],
                      labels,
                      title='WT KLRG{} d7 Enhancers'.format(celltype),
                      save_dir=save_path, show_plot=False)
        yzer.piechart([sum(shared), sum(not_shared)],
                      labels,
                      title='WT KLRG{} d7 De Novo Enhancers'.format(celltype),
                      save_dir=save_path, show_plot=False)

        yzer.boxplot([d7[all_shared]['tag_count'].tolist(),
                      d7[all_not_shared]['tag_count'].tolist()],
                     labels,
                     title='ATAC-seq tags in WT KLRG{} d7 Enhancers'.format(
                         celltype),
                     ylabel='ATAC peak tag count', save_dir=save_path,
                     show_plot=False)
        yzer.boxplot([de_novo[shared]['tag_count'].tolist(),
                      de_novo[not_shared]['tag_count'].tolist()],
Esempio n. 10
0
 data = data.merge(transcripts, how='left', on='nearest_refseq_transcript_id', suffixes=['','_trans'])
 
 data = data.fillna(0)
 
 transrepressed = data[(data['kla_1_lfc_trans'] >= 1) & (data['dex_over_kla_1_lfc_trans'] <= -.58)]
 not_trans = data[(data['kla_1_lfc_trans'] < 1) | (data['dex_over_kla_1_lfc_trans'] > -.58)]
 up_in_kla = data[(data['kla_1_lfc_trans'] >= 1) & (data['dex_over_kla_1_lfc_trans'] > -.58)]
 
 supersets = (('All', data),
              ('Not near transrepressed genes', not_trans), 
              ('Up in KLA', up_in_kla),
              ('Near transrepressed genes',transrepressed))
 
 # Plot trans versus not
 yzer.piechart([len(d) for d in zip(*supersets[1:])[1]], zip(*supersets[1:])[0],
              title='Enhancer-like Subsets by state in KLA+Dex', 
              save_dir=img_dirpath, show_plot=False)
 
 tfs = [('PU.1','pu_1'),('p65','p65'),('GR','gr')]
 contexts = [('DMSO',''),('Dex','dex'),('KLA','kla'),('KLA+Dex','kla_dex')]
 
 for name, dataset in supersets:
     total_for_set = len(dataset)
     for tf_name, tf in tfs:
         # Get count for enhancer elements with this TF at all
         cols = ['{0}_{1}tag_count'.format(tf, c and (c+'_') or '') for _, c in contexts]
         with_tf = dataset[dataset.filter(items=cols).max(axis=1) > min_tags]
         without_tf = dataset[dataset.filter(items=cols).max(axis=1) <= min_tags]
         
         # Plot with TF versus not
         yzer.piechart([ len(without_tf), len(with_tf)], 
Esempio n. 11
0
    transrepressed = data[(data['kla_1_lfc_trans'] >= 1)
                          & (data['dex_over_kla_1_lfc_trans'] <= -.58)]
    not_trans = data[(data['kla_1_lfc_trans'] < 1) |
                     (data['dex_over_kla_1_lfc_trans'] > -.58)]
    up_in_kla = data[(data['kla_1_lfc_trans'] >= 1)
                     & (data['dex_over_kla_1_lfc_trans'] > -.58)]

    supersets = (('All', data), ('Not near transrepressed genes', not_trans),
                 ('Up in KLA', up_in_kla), ('Near transrepressed genes',
                                            transrepressed))

    # Plot trans versus not
    yzer.piechart([len(d) for d in zip(*supersets[1:])[1]],
                  zip(*supersets[1:])[0],
                  title='Enhancer-like Subsets by state in KLA+Dex',
                  save_dir=img_dirpath,
                  show_plot=False)

    for name, dataset in supersets:
        total_for_set = len(dataset)
        dataset = dataset[dataset['gr_kla_dex_tag_count'] > min_tags]
        # Get count for enhancer elements with/out CpG
        with_cpg = dataset[dataset['has_cpg_enh'] == 1]
        without_cpg = dataset[dataset['has_cpg_enh'] == 0]

        # Plot with TF versus not
        yzer.piechart(
            [len(without_cpg), len(with_cpg)],
            ['No CpG Island', 'Has CpG Island'],
            title='Enhancer-like Subsets {0}\nby Overlap with CpG Island'.
Esempio n. 12
0
        # can be subsumed by a single H3K4me2 peak
        atac_only = atac[(atac['naive_h3k4me2_tag_count'] < me2_thresh)]
        atac_me2 = atac[(atac['naive_h3k4me2_tag_count'] >= me2_thresh)]
        me2_only = me2[(me2['naive_atac_tag_count'] < atac_thresh)]
        me2_atac = me2[(me2['naive_atac_tag_count'] >= atac_thresh)]

        print('ATAC only: ', len(atac_only))
        print('ATAC with H3K4me2: ', len(atac_me2))
        print('H3K4me2 only: ', len(me2_only))
        print('H3K4me2 with ATAC: ', len(me2_atac))

        save_path = yzer.get_and_create_path(
            dirpath, 'Figures', 'me2_atac_overlaps')

        yzer.piechart([len(atac_only), len(atac_me2)],
                      ['ATAC only', 'ATAC with H3K4me2'],
                      title='ATAC-seq region overlaps',
                      save_dir=save_path)

        yzer.piechart([len(me2_only), len(me2_atac)],
                      ['H3K4me2 only', 'H3K4me2 with ATAC'],
                      title='H3K4me2 overlaps',
                      save_dir=save_path)

        yzer.boxplot([atac_only['tag_count'], atac_me2['tag_count']],
                     ['ATAC only', 'ATAC with H3K4me2'],
                     title='ATAC-seq tag counts by H3K4me2 overlap',
                     xlabel='Group', ylabel='Peak tag count',
                     save_dir=save_path)
        yzer.boxplot([me2_only['tag_count'], me2_atac['tag_count']],
                     ['H3K4me2 only', 'H3K4me2 with ATAC'],
                     title='H3K4me2 tag counts by ATAC-seq overlap',