kla_only_no_p65 = sum((gr_group['gr_dex_tag_count'] <= min_tags) &
                                  (gr_group['gr_kla_dex_tag_count'] > min_tags) &
                                  (gr_group['p65_kla_tag_count'] + gr_group['p65_kla_dex_tag_count'] <= min_tags)
                          )
         counts = [tethered, direct_comp_gr, indirect_comp_gr, 
                        direct_comp_p65, cobound,
                        direct_novel, indirect_novel,
                        in_dex_no_p65, kla_only_no_p65]
     else: counts = [0]*9
     
     stats = stats + counts
     all_stats.append(dict(zip(labels, stats))), index.append(group.name)
 
 grouped.apply(count_enhancers)
 # There must be a better way to do this group-apply, but I can't make it turn back into a DF...
 enhancer_counts = DataFrame(all_stats, index=index)
 
 spaced_labels = ['\n'.join(map(' '.join,
                        [l.split()[i:i+2] for i in xrange(0,len(l.split()),2)] )) 
                        for l in labels]
 erna_title = 'Enhancers per Gene by enhancer subtype {0}'.format(name)
 ax = yzer.boxplot([enhancer_counts[col] for col in labels], spaced_labels, 
                  title=erna_title, 
                  xlabel='Subset', 
                  ylabel='Count', 
                  show_outliers=False, show_plot=False, wide=True
                  )
 yzer.ylim(ax, -1, 2)
 pyplot.setp(ax.get_xticklabels(), fontsize=10)
 yzer.save_plot(yzer.get_filename(img_dirpath, erna_title + '.png'))
 yzer.show_plot()
Esempio n. 2
0
                                            '{}_{}'.format(peptide, ab))

            filename = yzer.get_filename(
                pep_dirpath, '{}_{}_enhancers_batf.txt'.format(peptide, ab))

            data = yzer.import_file(filename)
            data = data.fillna(0)
            subset = data[data['no_pep_tag_count'] == 0]
            #subset = data[data['tag_count(2)'] == 0]
            #subset = subset[subset['tag_count(3)'] == 0]
            all_tag_counts.append(data['tag_count'])
            de_novo_tag_counts.append(subset['tag_count'])

        # Plot as boxplot
        ax = yzer.boxplot(all_tag_counts,
                          conditions,
                          title='Tag Counts in {} Enhancers'.format(
                              ab.title()),
                          xlabel='Condition',
                          ylabel='Normalized tag count',
                          save_dir=savepath,
                          show_plot=True)
        ax = yzer.boxplot(de_novo_tag_counts,
                          conditions,
                          title='Tag Counts in de novo {} Enhancers'.format(
                              ab.title()),
                          xlabel='Condition',
                          ylabel='Normalized tag count',
                          save_dir=savepath,
                          show_plot=True)
Esempio n. 3
0
                    fold >= wt_data['tag_count'])
                   & (wt_data['tag_count'] *
                      fold >= wt_data['foxo1_ko_naive_atac_tag_count'])]

    ko_only = ko_data[ko_data['naive_atac_tag_count'] < min_thresh]

    save_path = yzer.get_and_create_path(dirpath, 'Figures',
                                         'Foxo1_group_overlaps')

    groups = [wt_only, both, ko_only]
    labels = ['WT only', 'WT and KO', 'Foxo1 KO only']

    if True:
        yzer.boxplot([gp['naive_foxo1_tag_count'] for gp in groups],
                     labels,
                     title='Foxo1 tags in ATAC-seq regions by group',
                     ylabel='Foxo1 peak tag count',
                     save_dir=save_path,
                     show_plot=False)
        yzer.boxplot([gp['lcmv_d12_foxo1_tag_count'] for gp in groups],
                     labels,
                     title='LCMV d12 Foxo1 tags in ATAC-seq regions by group',
                     ylabel='Foxo1 peak tag count',
                     save_dir=save_path,
                     show_plot=False)
        yzer.boxplot([gp['naive_h3k4me2_tag_count'] for gp in groups],
                     labels,
                     title='H3K4me2 tags in ATAC-seq regions by group',
                     ylabel='H3K4me2 region count',
                     save_dir=save_path,
                     show_plot=False)
        yzer.boxplot(
Esempio n. 4
0
            groups = [data[none], data[kla_gt], data[nc], data[kla_dex_gt]]

            # We want to randomly sample to get equi-sized groups
            desired = len(nearby)
            for i, g in enumerate(groups):
                rows = random.sample(g.index, desired)
                groups[i] = g.ix[rows]

            to_plot = [g[colname] for g in (groups + [nearby])]


            title = 'LFC in KLA + Dex over KLA by change in p65:' \
                        + '\nRefSeq, randomly sampled to {0} transcripts'.format(desired)

            if pausing:
                title = 'Pausing Ratio Ratio by change in p65:' \
                            + '\nRefSeq, randomly sampled to {0} transcripts'.format(desired)

            ax = yzer.boxplot(to_plot,
                         names,
                         title=title,
                         xlabel='Transcript Status',
                         ylabel=(pausing and 'PausingRatio(KLA+Dex)/PausingRatio(KLA)')\
                            or 'log2(KLA+Dex GRO-seq/KLA GRO-seq)',
                         show_outliers=False, show_plot=False)
            yzer.save_plot(
                yzer.get_filename(
                    img_dirpath,
                    '{2}_with_nearby_unique_{0}x_{3}_sampled_{1}.png'.format(
                        ratio, random.randint(0, 9999), colname, change_type)))
            yzer.show_plot()
Esempio n. 5
0
    kla_4h_with_me2_counts = data['interacting_in_kla_4h_with_me2'][
        'count'].values.tolist() + [0] * zero_intxns_in_kla_4h_with_me2

    labels = [
        'Less than 1/4\navg H3K4me2 in notx,\ninteractions in notx',
        'At least avg H3K4me2 in notx\ninteractions in notx',
        'Less than 1/4\navg H3K4me2 in notx,\ninteractions in KLA 30m',
        'At least avg H3K4me2 in notx\ninteractions in KLA 30m',
        'Less than 1/4\navg H3K4me2 in notx,\ninteractions in KLA 4h',
        'At least avg H3K4me2 in notx\ninteractions in KLA 4h',
    ]
    vals = [
        notx_with_less_me2_counts, notx_with_me2_counts,
        kla_30m_with_less_me2_counts, kla_30m_with_me2_counts,
        kla_4h_with_less_me2_counts, kla_4h_with_me2_counts
    ]
    labels = [
        l + '\n(count: {0})'.format(len(v)) for l, v in zip(labels, vals)
    ]

    title = 'Number of interactions with "enhancers" by H3K4me2 state in notx'
    ax = yzer.boxplot(vals,
                      labels,
                      title=title,
                      xlabel='Enhancer subset',
                      ylabel='Number of interactions with other transcripts',
                      show_outliers=False,
                      show_plot=True,
                      wide=True,
                      save_dir=img_dirpath)
Esempio n. 6
0
    
    
    for group in (me2_only, ctcf_only, k27_only, ctcf_me2, k27_me2, nothing):
        print len(group), len(group)/len(data)
        
        
    if True:
        ax = yzer.boxplot([data['rpkm'], me2_tf['rpkm'], ctcf_tf['rpkm'],
                           k27_tf['rpkm'], ctcf_me2_tf['rpkm'], k27_me2_tf['rpkm'], tf['rpkm'],
                           me2_only['rpkm'], ctcf_only['rpkm'],
                           k27_only['rpkm'], ctcf_me2['rpkm'], k27_me2['rpkm'], nothing['rpkm']], 
                                  bar_names=['All Potential\nEnhancers', 
                                             'me2 + TF', 'CTCF + TF',  
                                             'K27 + TF', 
                                             'CTCF + me2\n+ TF', 'K27 + me2\n+ TF', 'TF',
                                             'me2 only', 'CTCF only',  
                                             'K27 only', 
                                             'CTCF + me2', 'K27 + me2',
                                             'No peaks',],
                                  title='GRO-seq RPKM at non-genic H3K4me2 regions', 
                                  xlabel='', ylabel='Tags per 1000bp in GRO-seq transcript overlapping H3K4me2 peak', 
                                  show_outliers=False, show_plot=False)
        
        yzer.save_plot(os.path.join(dirpath, 'groseq_rpkm_at_h3k4me2_peaks.png'))
        yzer.show_plot()
        
        
'''
-- With H3K27me3
select distinct on (e.id) e.*, e.id as me2, reg.id as refseq, p1.id as pu_1, p2.id as cebpa,
Esempio n. 7
0
        grapher.show_plot()

    if True:
        # Boxplots: avg PU.1 in Bl6 for whole set; avg PU.1 in BALB for whole set;
        # avg PU.1 for NOD in whole set; avg PU.1 in NOD set with Bl6; avg PU.1 in NOD set with BALB

        ax = grapher.boxplot(
            [
                data['wt_pu_1_tag_count'], data['balb_pu_1_tag_count_norm'],
                data['nod_pu_1_tag_count_norm'],
                nod_with_bl6['nod_pu_1_tag_count_norm'],
                nod_with_balb['nod_pu_1_tag_count_norm']
            ],
            bar_names=[
                'C57Bl6 Peaks',
                'BALBc Peaks',
                'NOD Peaks',
                'NOD Peaks\nwhere\nNOD == C57Bl6',
                'NOD Peaks\nwhere\nNOD == BALBc',
            ],
            title=
            'PU.1 peak tags where BALBc has a SNP that ruins its PU.1 Motif',
            xlabel='',
            ylabel='Tags per PU.1 peak',
            show_outliers=False,
            show_plot=False)
        grapher.save_plot(
            os.path.join(dirpath,
                         'peak_boxplots_no_balb_motif_filter_low_peaks.png'))
        grapher.show_plot()

    print 'p-val that BALBc is different than C57Bl6: %g' % ttest_ind(
Esempio n. 8
0
        save_path = yzer.get_and_create_path(
            dirpath, 'Figures', 'me2_atac_overlaps')

        yzer.piechart([len(atac_only), len(atac_me2)],
                      ['ATAC only', 'ATAC with H3K4me2'],
                      title='ATAC-seq region overlaps',
                      save_dir=save_path)

        yzer.piechart([len(me2_only), len(me2_atac)],
                      ['H3K4me2 only', 'H3K4me2 with ATAC'],
                      title='H3K4me2 overlaps',
                      save_dir=save_path)

        yzer.boxplot([atac_only['tag_count'], atac_me2['tag_count']],
                     ['ATAC only', 'ATAC with H3K4me2'],
                     title='ATAC-seq tag counts by H3K4me2 overlap',
                     xlabel='Group', ylabel='Peak tag count',
                     save_dir=save_path)
        yzer.boxplot([me2_only['tag_count'], me2_atac['tag_count']],
                     ['H3K4me2 only', 'H3K4me2 with ATAC'],
                     title='H3K4me2 tag counts by ATAC-seq overlap',
                     xlabel='Group', ylabel='Peak tag count',
                     save_dir=save_path)
        yzer.histogram(atac_only['tag_count'].tolist(), bins=20,
                       title='ATAC-seq-only peak tag count distribution',
                       xlabel='Tag count in peak', ylabel='Number of peaks',
                       save_dir=save_path)
        yzer.histogram(me2_only['tag_count'].tolist(), bins=20,
                       title='H3K4me2-only peak tag count distribution',
                       xlabel='Tag count in peak', ylabel='Number of peaks',
                       save_dir=save_path)
                dirpath,
                'balbc_vs_nod_pu_1_peak_tag_counts_bl6_gt_balb_unique.png'))
        grapher.show_plot()

    if True:
        # Boxplots

        ax = grapher.boxplot(
            [
                data['wt_tag_count'], data['balb_tag_count_norm'],
                data['nod_tag_count_norm'], nod_with_bl6['nod_tag_count_norm'],
                nod_with_balb['nod_tag_count_norm']
            ],
            bar_names=[
                'C57Bl6 Tags',
                'BALBc Tags',
                'NOD Tags',
                'NOD Tags\nwhere\nNOD == C57Bl6',
                'NOD Tags\nwhere\nNOD == BALBc',
            ],
            title='GRO-seq tags where BALBc has a SNP and half H3K4me2',
            xlabel='',
            ylabel='Tags in transcript at H3K4me2 peak',
            show_outliers=False,
            show_plot=False)
        grapher.save_plot(
            os.path.join(dirpath, 'peak_boxplots_all_h3k4me2_collapsed.png'))
        grapher.show_plot()

    if True:
        print 'p-val that BALBc is different than C57Bl6: %g' % ttest_ind(
            data['wt_tag_count'], data['balb_tag_count_norm'])[1]