Ejemplo n.º 1
0
     up = data[data['dex_1_lfc'] >= 1]
     nc = data[abs(data['dex_1_lfc']) < 1]
     
     key = 'p65_kla_tag_count'
     datasets = [down[key],nc[key],up[key]]
     datasets = [d['p65_kla_dex_tag_count'] - d[key] for d in [down, nc, up]]
     
     #title = 'Tags in p65 peaks in KLA 1h + DMSO 2h: Distal'
     title = 'Diff in tags in p65 peaks in KLA 1h + Dex 2h vs KLA 1h + DMSO 2h: RefSeq'
     ax = grapher.boxplot(datasets, 
                     ['Down in Dex 2h','No change in Dex 2h','Up in Dex 2h',],
                      title=title, 
                      xlabel='Condition', 
                      ylabel='Total tags in all peaks overlapping transcript', 
                      show_outliers=False, show_plot=False)
     grapher.save_plot(grapher.get_filename(base_dirpath, 'boxplots', 'dex',
                            title.replace(' ','_')))
     grapher.show_plot()
     for sub in datasets: print sub.mean()
 
 # Boxplots for gr_kla_dex peaks by lfc in Dex
 if False:
     #data = data[data['distal'] == 't']
     #data = data[data['has_refseq'] == 1]
     
     trans = data[(data['kla_1_lfc'] >= 1) & (data['dex_over_kla_1_lfc'] <= -.58)]
     rest = data[(data['kla_1_lfc'] < 1) | (data['dex_over_kla_1_lfc'] > -.58)]
     
     key = 'gr_dex_tag_count'
     datasets = [rest[key],trans[key]]
     datasets = [d['gr_kla_dex_tag_count'] - d[key] for d in [rest, trans]]
     
Ejemplo n.º 2
0
                        or ''),
                    add_noise=show_points,
                    show_points=show_points,
                    show_2x_range=(not show_points),
                    show_legend=(not show_points),
                    plot_regression=(not show_points),
                    show_count=(not show_points),
                    show_correlation=False,
                    show_plot=False,
                    ax=ax)

                #yzer.xlim(ax, min(data[xcolname]), max(data[xcolname]))
                #yzer.ylim(ax, min(data[ycolname]), max(data[ycolname]))

            yzer.save_plot(
                yzer.get_filename(
                    img_dirpath, '{0}_in_{1}_vs_KLA-Dex_by_{2}.png'.format(
                        main, basal_cond, compare)))
            yzer.show_plot()

    if False:
        for main, compare, basal_cond in (('p65', 'GR', 'KLA'), ('GR', 'p65',
                                                                 'Dex')):
            data = yzer.import_file(
                yzer.get_filename(dirpath, 'motifs', 'from_peaks',
                                  '{0}_kla_dex_vectors.txt'.format(main)))

            data = data.fillna(1)
            data = data.groupby(['id', 'chr_name'], as_index=False).mean()

            data['ratio_main'] = data['tag_count'] / data['tag_count_2']
            data['ratio_compare'] = data['tag_count_3'] / data['tag_count_4']
Ejemplo n.º 3
0
    raw_data = raw_data.split('\n')
    dates = [
        datetime.strptime(raw_data[x], '%Y_%m_%d')
        for x in xrange(0, len(raw_data), 3)
    ]
    set1 = numpy.array([
        map(int, raw_data[x].split(',')) for x in xrange(1, len(raw_data), 3)
    ]).T
    set2 = numpy.array([
        map(int, raw_data[x].split(',')) for x in xrange(2, len(raw_data), 3)
    ]).T

    grapher = SeqGrapher()
    ax = grapher.timeseries(
        dates, [set1, set2],
        show_median=True,
        colors=['blue', 'red'],
        labels=['Control', 'TDB treated'],
        title='Blood glucose values in NOD mice after TDB treatment',
        xlabel='Date',
        ylabel='Blood glucose (mg/dL, via Aviva AccuChek meter)',
        show_plot=False,
        show_legend=True)
    dirpath = '/Users/karmel/Desktop/Projects/GlassLab/Notes_and_Reports/NOD_BALBc/TDB in vitro/'

    grapher.save_plot(
        os.path.join(
            dirpath,
            'blood_glucose_values_after_tdb_with_delta_06_04_2012.png'))
    grapher.show_plot()
        # KLA tag count
        vals = [
            d['kla_1_tag_count{0}'.format(suffix)] /
            d['length{0}'.format(suffix)] for _, d in supersets
        ]
        title = 'GroSeq tag counts in {0}: DMSO 2h + KLA 1h by gene category'.format(
            subgroup)
        ax = yzer.boxplot(vals,
                          labels,
                          title=title,
                          xlabel=xlabel,
                          ylabel='KLA 1h Tag Count per basepair',
                          show_outliers=False,
                          show_plot=False)
        pyplot.setp(ax.get_xticklabels(), fontsize=10)
        yzer.save_plot(yzer.get_filename(img_dirpath, title + '.png'))

        # KLA LFC
        vals = [d['kla_1_lfc{0}'.format(suffix)] for _, d in supersets]
        title = 'GroSeq Log Fold Change in {0} in DMSO 2h + KLA 1h by gene category'.format(
            subgroup)
        ax = yzer.boxplot(vals,
                          labels,
                          title=title,
                          xlabel=xlabel,
                          ylabel='log2(KLA GRO-seq/DMSO GRO-seq)',
                          show_outliers=False,
                          show_plot=False)
        pyplot.setp(ax.get_xticklabels(), fontsize=10)
        yzer.save_plot(yzer.get_filename(img_dirpath, title + '.png'))
Ejemplo n.º 5
0
    if True:
        # non-d
        ax = grapher.scatterplot(refseq_up_nond,
                                 'balb_notx_0h_tag_count',
                                 'nod_notx_0h_tag_count_norm',
                                 log=True,
                                 color='blue',
                                 master_dataset=refseq,
                                 title='BALBc vs. NOD BMDC Refseq Transcripts',
                                 show_2x_range=True,
                                 show_legend=False,
                                 show_count=True,
                                 show_correlation=True,
                                 show_plot=False)
        grapher.save_plot(
            os.path.join(dirpath,
                         'nondiabetic_balbc_v_nod_up_scatterplot.png'))
        grapher.show_plot()

    if False:
        # diabetic
        ax = grapher.scatterplot(
            refseq,
            'diabetic_balb_notx_0h_tag_count',
            'diabetic_nod_notx_0h_tag_count_norm',
            log=True,
            color='blue',
            master_dataset=refseq,
            title='Diabetic BALBc vs. NOD BMDC Refseq Transcripts',
            show_2x_range=True,
            show_legend=False,
Ejemplo n.º 6
0
            ax = grapher.scatterplot(
                dataset,
                'dmso_tag_count',
                'kla_tag_count_norm',
                log=True,
                color='blue',
                title='DMSO vs. KLA tag counts: All runs, {0}'.format(label),
                xlabel='DMSO 2h tags',
                ylabel='KLA 1h + DMSO 2h tags',
                show_2x_range=True,
                show_legend=True,
                show_count=True,
                show_correlation=True,
                show_plot=False)
            grapher.save_plot(
                grapher.get_filename(
                    scatter_dirpath,
                    'dmso_vs_kla_all_runs_{0}.png'.format(slug_label)))
            grapher.show_plot()

            for x in xrange(1, 5):
                # By group
                ax = grapher.scatterplot(
                    dataset,
                    'dmso_{0}_tag_count'.format(x),
                    'kla_{0}_tag_count_norm'.format(x),
                    log=True,
                    color='blue',
                    title='DMSO vs. KLA tag counts: Group {0} runs, {1}'.
                    format(x, label),
                    xlabel='DMSO 2h tags',
                    ylabel='KLA 1h + DMSO 2h tags',
Ejemplo n.º 7
0
                             data['tag_count_2']) / data['tag_count']

            cond_1 = (data['tag_count_3'] == 0)
            cond_2 = (data['tag_count_3'] > 0) & (data['tag_count_3'] <
                                                  data['tag_count_4'])
            cond_3 = (data['tag_count_3'] > 0) & (data['tag_count_3'] >=
                                                  data['tag_count_4'])

            title = 'Difference in {0} peak tag counts by {1}'.format(
                main, compare)
            names = [
                s.format(compare) for s in [
                    'No {0} in KLA+Dex', 'Loses {0} in KLA+Dex',
                    'Gains/maintains {0} in KLA+Dex'
                ]
            ]
            ax = yzer.boxplot(
                [
                    data[cond_1][colname], data[cond_2][colname],
                    data[cond_3][colname]
                ],
                names,
                title=title,
                xlabel='Condition',
                ylabel='{0} KLA+Dex tags in peak - {0} {1} tags in peak'.
                format(main, basal_cond),
                show_outliers=False,
                show_plot=False)
            yzer.save_plot(
                yzer.get_filename(img_dirpath, title.replace(' ', '_')))
            yzer.show_plot()
Ejemplo n.º 8
0
 
 data['{0}_kla_to_kla_dex_ratio'.format(peak_type)] = data['{0}_kla_tag_count'.format(peak_type)]\
                                                         /data['{0}_kla_dex_nearby_tag_count'.format(peak_type)]
 
 print sum(kla_gt)
 print sum(kla_dex_gt)
 data[kla_gt].to_csv(yzer.get_filename(img_dirpath, 'enhancer_like_lose_{0}_{1}x_change_dsg_only.txt'.format(
                                                                                 peak_type, ratio)), 
                               sep='\t', header=True, index=False)
 data[kla_dex_gt].to_csv(yzer.get_filename(img_dirpath, 'enhancer_like_gain_{0}_{1}x_change_dsg_only.txt'.format(
                                                                                 peak_type, ratio)), 
                               sep='\t', header=True, index=False)
 
 title = 'LFC in KLA + Dex over KLA by change in {0}:\nEnhancer-Like, Has GR in KLA+Dex (DSG only)'.format(peak_type)
 if pu_1:
     title = 'PU.1 in KLA + Dex over KLA by change in {0}:\nEnhancer-Like, has GR in KLA+Dex, has PU.1'.format(peak_type)
 
 names = [s.format(peak_type) for s in ['No {0}','Loses {0} in KLA+Dex','No change in {0}', 'Gains {0} in KLA+Dex']]
 ax = yzer.boxplot([data[none][colname], data[kla_gt][colname], data[nc][colname], data[kla_dex_gt][colname]], 
              names,
              title=title, 
              xlabel='{0} Status'.format(peak_type), 
              ylabel=(pu_1 and 'log2(KLA+Dex PU.1/KLA PU.1)')\
                 or 'log2(KLA+Dex GRO-seq/KLA GRO-seq)', 
              show_outliers=False, show_plot=False)
 if pu_1:
     yzer.save_plot(yzer.get_filename(img_dirpath, 'dex_over_kla_pu_1_both_by_{0}_sums_{1}x_change.png'.format(peak_type, ratio)))
 else:
     yzer.save_plot(yzer.get_filename(img_dirpath, 'dex_over_kla_1_lfc_by_{0}_sums_{1}x_change_dsg_only.png'.format(peak_type, ratio)))
 
 yzer.show_plot()
Ejemplo n.º 9
0
                             label='Predicted Coding',
                             add_noise=False,
                             show_2x_range=False,
                             plot_regression=False,
                             show_count=False,
                             show_correlation=False,
                             show_legend=False,
                             show_plot=False)
    ax = grapher.scatterplot(
        data_noncoding,
        'score_orf',
        'score',
        log=False,
        color='green',
        title='CPC-derived Coding Potential Predictions for RefSeq mRNA',
        xlabel='ORF score',
        ylabel='Coding score',
        label='Predicted Non-coding',
        add_noise=False,
        show_2x_range=False,
        plot_regression=False,
        show_count=False,
        show_correlation=False,
        show_legend=True,
        show_plot=False,
        ax=ax)
    pyplot.plot([0, max(data['score_orf'])], [0, 0], '-', color='black')
    grapher.save_plot(
        os.path.join(dirpath, 'refseq_coding_potential_predictions_zoom.png'))
    grapher.show_plot()
        regrouped = regrouped[regrouped['kla_1_lfc'] >= 1]
        regrouped = regrouped[(regrouped['refseq'] == 'f')
                              & (regrouped['length'] < 6000)]
        regrouped = regrouped.groupby(['id', 'chr_name'],
                                      as_index=False).mean()

        transcripts = transcripts[(transcripts['refseq'] == 'f')
                                  & (transcripts['length'] < 6000)]
        transcripts = transcripts[transcripts['kla_1_lfc'] >= 1]

        all_trans = transcripts['dex_over_kla_1_lfc']
        with_p65 = transcripts[
            transcripts['p65_kla_tag_count'] > 0]['dex_over_kla_1_lfc']
        with_pair = regrouped['dex_over_kla_1_lfc']
        title = 'Transcript log fold change at redistribution pairs:\nEnhancer-like, up in KLA alone'
        names = [
            'All transcripts', 'Transcripts with p65',
            'Transcripts with\na redistribution pair'
        ]
        ax = yzer.boxplot([all_trans, with_p65, with_pair],
                          names,
                          title=title,
                          xlabel='Transcript subset',
                          ylabel='log2(KLA+Dex / KLA)',
                          show_outliers=False,
                          show_plot=False)
        yzer.save_plot(
            yzer.get_filename(
                dirpath, 'redistribution', 'boxplots',
                'dex_over_kla_lfc_boxplot_1_mean_enhancer_like_up_in_kla.png'))
        yzer.show_plot()
Ejemplo n.º 11
0
            groups = [data[none], data[kla_gt], data[nc], data[kla_dex_gt]]

            # We want to randomly sample to get equi-sized groups
            desired = len(nearby)
            for i, g in enumerate(groups):
                rows = random.sample(g.index, desired)
                groups[i] = g.ix[rows]

            to_plot = [g[colname] for g in (groups + [nearby])]


            title = 'LFC in KLA + Dex over KLA by change in p65:' \
                        + '\nRefSeq, randomly sampled to {0} transcripts'.format(desired)

            if pausing:
                title = 'Pausing Ratio Ratio by change in p65:' \
                            + '\nRefSeq, randomly sampled to {0} transcripts'.format(desired)

            ax = yzer.boxplot(to_plot,
                         names,
                         title=title,
                         xlabel='Transcript Status',
                         ylabel=(pausing and 'PausingRatio(KLA+Dex)/PausingRatio(KLA)')\
                            or 'log2(KLA+Dex GRO-seq/KLA GRO-seq)',
                         show_outliers=False, show_plot=False)
            yzer.save_plot(
                yzer.get_filename(
                    img_dirpath,
                    '{2}_with_nearby_unique_{0}x_{3}_sampled_{1}.png'.format(
                        ratio, random.randint(0, 9999), colname, change_type)))
            yzer.show_plot()
Ejemplo n.º 12
0
             #'srf_targets': ['Srf','Cnn2','Lima1','Coro1a','Vcl','Acta2','Actb','Dhcr24','Actg2','Actc1','Lcp1','Jup','Tpm4','Tnni2','Zyx','Tubb3','Pfn1','Gas7','Arpc4','Pstpip1','Bsn','Flna','Actn1'],
             #'inflammatory_genes': ['Cxcl1','Cxcl2','Il6','Ptgs2','Tnfsf9','Vegfa','Tnf', 'Siglec1','Mmp9', 'Il10','Il1b','Cxcl10','Tlr4','Il12b',]
                }         
 '''
                
         'clec4e_tlr2': ['Clec4e','Tlr2',],
                ,
 '''
 for i, genes in gene_groups.items():
     for gene in genes[:]:
         if not gene in refseq['gene_names'].values: 
             print gene
             genes.remove(gene)
     indices = [refseq[refseq['gene_names'] == gene].index[0] for gene in genes]
     
     for txt in ('notx','kla'):
         sorted_by_count = refseq.fillna(0).sort_index(axis=0, by='balb_{0}_1h_reads_per_base'.format(txt)).index.copy()
         sort_indexes = list(enumerate(sorted_by_count))
         sort_indexes.sort(key=lambda x: x[1])
         refseq['rank'] = zip(*sort_indexes)[0]
          
         yzer.bargraph_for_transcripts(refseq, indices, ['balb_nod_{0}_1h_fc'.format(txt)],
                         bar_names=genes,
                         title='NOD vs. BALBc {0} 1h GRO-seq'.format(txt=='kla' and txt.upper() or txt.capitalize()),
                         ylabel='Fold Change in NOD vs. BALBc',
                         rank_label='Rank of read per base pair value\nin BALBc {0} 1h, ascending'.format(
                                                             txt=='kla' and txt.upper() or txt.capitalize()),
                         show_plot=False)
         yzer.save_plot(yzer.get_filename(img_dirpath, 'balbc_nod_{0}_{1}_fold_change_bargraph.png'.format(txt,i)))
         #yzer.show_plot()
         
Ejemplo n.º 13
0
                           k27_tf['rpkm'], ctcf_me2_tf['rpkm'], k27_me2_tf['rpkm'], tf['rpkm'],
                           me2_only['rpkm'], ctcf_only['rpkm'],
                           k27_only['rpkm'], ctcf_me2['rpkm'], k27_me2['rpkm'], nothing['rpkm']], 
                                  bar_names=['All Potential\nEnhancers', 
                                             'me2 + TF', 'CTCF + TF',  
                                             'K27 + TF', 
                                             'CTCF + me2\n+ TF', 'K27 + me2\n+ TF', 'TF',
                                             'me2 only', 'CTCF only',  
                                             'K27 only', 
                                             'CTCF + me2', 'K27 + me2',
                                             'No peaks',],
                                  title='GRO-seq RPKM at non-genic H3K4me2 regions', 
                                  xlabel='', ylabel='Tags per 1000bp in GRO-seq transcript overlapping H3K4me2 peak', 
                                  show_outliers=False, show_plot=False)
        
        yzer.save_plot(os.path.join(dirpath, 'groseq_rpkm_at_h3k4me2_peaks.png'))
        yzer.show_plot()
        
        
'''
-- With H3K27me3
select distinct on (e.id) e.*, e.id as me2, reg.id as refseq, p1.id as pu_1, p2.id as cebpa,
p3.id as p65, p5.id as k27, p6.id as ctcf, t.id as trans, t.tag_count 
from thiomac_chipseq_2011.peak_wt_notx_1h_h3k4me2_05_11 e

left outer join genome_reference_mm9.sequence_transcription_region reg
on e.chromosome_id = reg.chromosome_id
and e.start_end && reg.start_end 
left outer join thiomac_chipseq_2011.peak_wt_notx_1h_pu_1_05_11 p1
on e.chromosome_id = p1.chromosome_id
and e.start_end && p1.start_end
Ejemplo n.º 14
0
            xlabel='C57Bl6 PU.1 tag counts',
            ylabel='NOD PU.1 tag counts',
            title=
            'C57Bl6 vs. NOD PU.1 peaks\nwhere C57Bl6 has a PU.1 motif and BALBc does not',
            label='NOD SNP == BALBc SNP',
            add_noise=False,
            show_2x_range=False,
            show_legend=True,
            text_color=True,
            show_count=True,
            show_correlation=True,
            show_plot=False,
            ax=ax)
        #ax.set_ylim(4,128)
        grapher.save_plot(
            os.path.join(dirpath,
                         'bl6_vs_nod_pu_1_peak_tag_counts_bl6_gt_balb.png'))
        grapher.show_plot()

    if True:
        # Boxplots: avg PU.1 in Bl6 for whole set; avg PU.1 in BALB for whole set;
        # avg PU.1 for NOD in whole set; avg PU.1 in NOD set with Bl6; avg PU.1 in NOD set with BALB

        ax = grapher.boxplot(
            [
                data['wt_pu_1_tag_count'], data['balb_pu_1_tag_count_norm'],
                data['nod_pu_1_tag_count_norm'],
                nod_with_bl6['nod_pu_1_tag_count_norm'],
                nod_with_balb['nod_pu_1_tag_count_norm']
            ],
            bar_names=[
            xlabel='BALBc tag counts',
            ylabel='NOD tag counts',
            title=
            'BALBc vs. NOD GRO-seq tag counts\nwhere C57Bl6 has a PU.1 motif and BALBc does not',
            label='NOD SNP == BALBc SNP',
            add_noise=False,
            show_2x_range=False,
            show_legend=True,
            text_color=True,
            show_count=True,
            show_correlation=True,
            show_plot=False,
            ax=ax)
        #ax.set_ylim(4,128)
        grapher.save_plot(
            os.path.join(
                dirpath,
                'balbc_vs_nod_pu_1_peak_tag_counts_bl6_gt_balb_unique.png'))
        grapher.show_plot()

    if True:
        # Boxplots

        ax = grapher.boxplot(
            [
                data['wt_tag_count'], data['balb_tag_count_norm'],
                data['nod_tag_count_norm'], nod_with_bl6['nod_tag_count_norm'],
                nod_with_balb['nod_tag_count_norm']
            ],
            bar_names=[
                'C57Bl6 Tags',
                'BALBc Tags',