Esempio n. 1
0
    refseq_down_slowd = refseq[
        refseq['slow_diabetic_balb_nod_notx_0h_fc'] <= -1]

    if False:
        #print set(grapher.get_gene_list(refseq_up_nond)) & set(grapher.get_gene_list(refseq_up_d))
        #print set(grapher.get_gene_list(refseq_down_nond)) & set(grapher.get_gene_list(refseq_down_d))
        print grapher.get_gene_names(refseq_up_nond)

    if True:
        # non-d
        ax = grapher.scatterplot(refseq_up_nond,
                                 'balb_notx_0h_tag_count',
                                 'nod_notx_0h_tag_count_norm',
                                 log=True,
                                 color='blue',
                                 master_dataset=refseq,
                                 title='BALBc vs. NOD BMDC Refseq Transcripts',
                                 show_2x_range=True,
                                 show_legend=False,
                                 show_count=True,
                                 show_correlation=True,
                                 show_plot=False)
        grapher.save_plot(
            os.path.join(dirpath,
                         'nondiabetic_balbc_v_nod_up_scatterplot.png'))
        grapher.show_plot()

    if False:
        # diabetic
        ax = grapher.scatterplot(
            refseq,
            'diabetic_balb_notx_0h_tag_count',
Esempio n. 2
0
        yzer.get_filename(dirpath, 'all_expressed_refseq.txt'))

    refseq_with_runoff = refseq[refseq['id'].isin(data['gene_id'])]
    refseq_no_runoff = refseq[~refseq['id'].isin(data['gene_id'])]
    if False:
        print len(refseq_no_runoff)
        print refseq_no_runoff.tail(100).to_string()

    # Calculate length of runoff
    data[
        'length'] = data['transcription_end'] - data['transcription_start'] + 1
    data['gene_length'] = data['gene_end'] - data['gene_start'] + 1

    # What might be correlated with length of runoff?
    if False:
        yzer.scatterplot(data, 'gene_length', 'length', log=True)
        yzer.scatterplot(data, 'gene_score', 'length', log=True)
        yzer.scatterplot(data, 'score', 'length', log=True)
        yzer.scatterplot(data, 'gene_score', 'score', log=True)
        yzer.scatterplot(data, 'gene_rpkm', 'rpkm', log=True)
        yzer.scatterplot(data, 'gene_rpkm', 'length', log=True)
        yzer.scatterplot(data, 'rpkm', 'length', log=True)
        yzer.boxplot([data['gene_score'], refseq_no_runoff['score']])
        yzer.boxplot([data['gene_rpkm'], refseq_no_runoff['rpkm']])

    if True:
        for subset in (refseq_no_runoff, refseq_with_runoff):
            subset['percent_covered'] = (subset['transcription_end'] - subset['transcription_start'] + 1)\
                        /(subset['transcription_end(2)'] - subset['transcription_start(2)'] + 1)
            print 'Total: ', len(subset)
            print 'Percentage: ', len(subset) / len(refseq)
Esempio n. 3
0
                # We want 'id    weight    enhancer_lfc    gene_lfc'
                val_set['weight'] = 1.0
                f = open(
                    yzer.get_filename(
                        change_subdir,
                        '{0}_for_{1}_pairs.cdt'.format(me2_col, key)), 'w')
                val_set.to_csv(f,
                               sep='\t',
                               header=False,
                               index=True,
                               cols=(['weight'] + col_set))

                if False:
                    ax = yzer.scatterplot(
                        val_set,
                        xcolname=me2_col + '_2',
                        ycolname=me2_col,
                        log=False,
                        color='blue',
                        title=
                        'Log fold change of genes and interacting enhancers in {0}: {1}, enhancer 1.5x fold changed'
                        .format(key.replace('_', ' '), kla_col),
                        xlabel='Enhancer LFC',
                        ylabel='Gene LFC',
                        show_2x_range=False,
                        plot_regression=True,
                        show_count=True,
                        show_correlation=True,
                        show_legend=False,
                        save_dir=img_dirpath,
                        show_plot=False)
Esempio n. 4
0
    shared['th2_tag_count'] = nonzero(shared['p2_tag_count'])
    only_th2['th1_tag_count'] = nonzero(only_th2['p2_tag_count'])
    only_th2['th2_tag_count'] = nonzero(only_th2['tag_count'])

    data = shared.append(only_th1, ignore_index=True)
    data = data.append(only_th2, ignore_index=True)

    if False:
        # Scatterplots of tag counts
        ax = yzer.scatterplot(
            data,
            'th1_tag_count',
            'th2_tag_count',
            log=True,
            color='blue',
            title='Th1 versus Th2 {0} Tag Counts at Peaks'.format(peak_pretty),
            show_2x_range=True,
            show_legend=False,
            plot_regression=False,
            show_count=True,
            show_correlation=True,
            save_dir=img_dirpath,
            show_plot=True)

    if True:
        # Motif finding.
        yzer = MotifAnalyzer()
        motifs_dirpath = yzer.get_and_create_path(dirpath, 'motifs')

        data['id'] = data.index

        if False:
 all_data = yzer.import_file(yzer.get_filename(dirpath,'refseq_all.txt'))
 all_data = all_data[~all_data['id'].isin(data['id'])]
 data = pandas.concat([data, all_data])
 data = data.reset_index().fillna(0)
 
 notx = data[data['sequencing_run_id'] == 765]
 kla_30m = data[data['sequencing_run_id'] == 766]
 kla_4h = data[data['sequencing_run_id'] == 773]
 no_intxns = data[data['sequencing_run_id'] == 0]
 
 # Zero won't show up in a log plot, so add one.
 no_intxns['count'] = 1
 
 
 ax = yzer.scatterplot(no_intxns, 
                  xcolname='length', ycolname='count', log=True, color='#CCCCCC', 
                  label='No {0}s'.format(counted), show_2x_range=False, plot_regression=False, 
                  show_count=False, show_correlation=False, show_legend=False, show_plot=False)
 ax = yzer.scatterplot(notx, 
                  xcolname='length', ycolname='count', log=True, color='#B5D8EB', 
                  label='Notx {0}s'.format(counted), show_2x_range=False, plot_regression=False, 
                  show_count=False, show_correlation=False, show_legend=False, show_plot=False, ax=ax)
 ax = yzer.scatterplot(kla_30m, 
                  xcolname='length', ycolname='count', log=True, color='#FFBDD8', 
                  label='KLA 30m {0}s'.format(counted), show_2x_range=False, plot_regression=False, 
                  show_count=False, show_correlation=False, show_legend=False, show_plot=False, ax=ax)
 ax = yzer.scatterplot(kla_4h, 
                  xcolname='length', ycolname='count', log=True, color='#E3AAD6', 
                  title='{0} counts as a function of gene length'.format(counted.title()), 
                  xlabel='Transcript length', ylabel='Distal {0} count'.format(counted), 
                  label='KLA 4h {0}s'.format(counted), show_2x_range=False, plot_regression=False, 
                  show_count=False, show_correlation=True, show_legend=True, 
     trans = data[(data['kla_1_lfc'] >= 1) & (data['dex_over_kla_1_lfc'] <= -.58)]
     rest = data[(data['kla_1_lfc'] < 1) | (data['dex_over_kla_1_lfc'] > -.58)]
     
     key = 'gr_dex_tag_count'
     datasets = [rest[key],trans[key]]
     datasets = [d['gr_kla_dex_tag_count'] - d[key] for d in [rest, trans]]
     
     title = 'Tags in p65 peaks in KLA 1h + Dex 2h: Distal'
     title = 'Diff in tags in GR peaks in KLA 1h + Dex 2h vs Dex 1h'
     ax = grapher.boxplot(datasets, 
                     ['Not transrepressed in KLA 1h + Dex 2h','Transrepressed in KLA 1h + Dex 2h',],
                      title=title, 
                      xlabel='Condition', 
                      ylabel='Total tags in all peaks overlapping transcript', 
                      show_outliers=False, show_plot=False)
     grapher.save_plot(grapher.get_filename(base_dirpath, 'boxplots', 'kla_dex',
                            title.replace(' ','_')))
     grapher.show_plot()
     for sub in datasets: print sub.mean()
     
 if True:
     #data = data[data['has_refseq'] == 1]
     data = data[data['distal'] == 't']
     data['gr_diff'] = data['gr_kla_dex_tag_count'] - data['gr_dex_tag_count']
     data['p65_diff'] = data['p65_kla_dex_tag_count'] - data['p65_kla_tag_count']
     data['gr_by_length'] = data['gr_kla_dex_tag_count']/data['length']*10000
     data['p65_by_length'] = data['p65_kla_dex_tag_count']/data['length']*10000
     grapher.scatterplot(data, 'gr_kla_dex_tag_count', 'p65_diff',log=True)
     
     
     
    enhancers = enhancers[enhancers['total_interactions'] > 0]
    enhancers[
        'dmso_tags_per_bp'] = enhancers['dmso_tag_count'] / enhancers['length']
    enhancers[
        'kla_tags_per_bp'] = enhancers['kla_tag_count'] / enhancers['length']

    # Plot tag counts versus interactions.
    ax = yzer.scatterplot(
        enhancers,
        xcolname='dmso_tags_per_bp',
        ycolname='notx_interactions',
        log=True,
        title=
        'Interactions in Notx as a function of GRO-seq tag counts in DMSO',
        xlabel='GRO-seq tags per bp in DMSO',
        ylabel='Number of interactions {0}in Notx'.format(
            tss_only and 'with gene TSSs ' or ''),
        show_2x_range=True,
        plot_regression=False,
        show_count=True,
        show_correlation=True,
        show_legend=False,
        save_dir=img_dirpath,
        show_plot=True)
    ax = yzer.scatterplot(
        enhancers,
        xcolname='kla_tags_per_bp',
        ycolname='kla_4h_interactions',
        log=True,
        title=
        'Interactions in KLA 4h as a function of GRO-seq tag counts in KLA 1h',
Esempio n. 8
0
    evidence_f = os.path.join(dirpath, 'refseq_evidence.orf')
    data = grapher.import_file(filename)
    evidence = grapher.import_file(evidence_f)

    data['score_orf'] = evidence['score']
    data = data[data['score_orf'] < 200]

    data_coding = data[data['score'] >= 0]
    data_noncoding = data[data['score'] < 0]
    ax = grapher.scatterplot(data_coding,
                             'score_orf',
                             'score',
                             log=False,
                             color='blue',
                             label='Predicted Coding',
                             add_noise=False,
                             show_2x_range=False,
                             plot_regression=False,
                             show_count=False,
                             show_correlation=False,
                             show_legend=False,
                             show_plot=False)
    ax = grapher.scatterplot(
        data_noncoding,
        'score_orf',
        'score',
        log=False,
        color='green',
        title='CPC-derived Coding Potential Predictions for RefSeq mRNA',
        xlabel='ORF score',
        ylabel='Coding score',
Esempio n. 9
0
        yzer.get_filename(dirpath, 'dp_with_thiomac_ctcf.txt')).fillna(0)
    thio = yzer.import_file(
        yzer.get_filename(dirpath, 'thiomac_with_dp_ctcf.txt')).fillna(0)

    # Get venn-diagram sets
    only_dp = dp[dp['thiomac_ctcf_tag_count'] == 0]
    only_thio = thio[thio['dp_ctcf_tag_count'] == 0]
    shared = dp[dp['thiomac_ctcf_tag_count'] != 0]
    shared_check = thio[thio['dp_ctcf_tag_count'] != 0]
    print len(only_dp), len(only_thio), len(shared), len(shared_check)

    data = shared.append(only_dp, ignore_index=True)
    data = data.append(only_thio, ignore_index=True)

    data['dp_nonzero'] = nonzero(data['dp_ctcf_tag_count'])
    data['thio_nonzero'] = nonzero(data['thiomac_ctcf_tag_count'])
    ax = yzer.scatterplot(
        data,
        'dp_nonzero',
        'thio_nonzero',
        xlabel='DP Thymocyte CTCF Tag Count',
        ylabel='ThioMac CTCF Tag Count',
        log=True,
        color='blue',
        title='Tags in CTCF Peaks in DP Thymocytes versus ThioMacs',
        show_2x_range=False,
        show_legend=False,
        show_count=True,
        show_correlation=True,
        save_dir=img_dirpath,
        show_plot=True)
Esempio n. 10
0
    print sum(stat1['foxp3_id'] > 0)
    print sum(stat1['foxp3_id'] > 0) / len(stat1)

    foxp3_enh = foxp3[(foxp3['tss_me2_id'] == 0) & (foxp3['tss_id'] == 0)]
    foxp3_tss = foxp3[(foxp3['tss_me2_id'] > 0) | (foxp3['tss_id'] > 0)]
    print len(foxp3_enh)
    print sum(foxp3_enh['stat1_id'] > 0) / len(foxp3_enh)
    print len(foxp3_tss)
    print sum(foxp3_tss['stat1_id'] > 0) / len(foxp3_tss)

    foxp3_with_stat = foxp3[foxp3['stat1_id'] > 0]
    if False:
        grapher = SeqGrapher()
        grapher.scatterplot(foxp3_with_stat,
                            xcolname='foxp3_tag_count',
                            ycolname='stat1_tag_count',
                            log=True,
                            show_plot=True)

    if False:
        subsets = [
            ('all', foxp3_with_stat),
            ('enh', foxp3_enh[foxp3_enh['stat1_id'] > 0]),
            ('tss', foxp3_tss[foxp3_tss['stat1_id'] > 0]),
        ]
        for k, subset in subsets:
            first_peak = 'foxp3'
            subset['id'] = subset[first_peak + '_id']
            subset['start'] = subset[first_peak + '_start']
            subset['end'] = subset[first_peak + '_end']
Esempio n. 11
0
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'with_me3',
                                           'basic_scatterplots')

    data = yzer.import_file(
        yzer.get_filename(dirpath, 'transcript_vectors.txt'))
    data = data.fillna(0)
    data = data[data['naive_me3_tag_count'] + data['act_me3_tag_count'] > 0]

    for key1, key2, norm_factor in comparison_sets:
        name1 = pretty_names[key1[:-1]] + key1[-1:]
        name2 = pretty_names[key2[:-1]] + key2[-1:]

        data_normed = yzer.normalize(data, key2 + '_tag_count', norm_factor)
        ax = yzer.scatterplot(
            data_normed,
            key1 + '_tag_count',
            key2 + '_tag_count_norm',
            log=True,
            color='blue',
            title='{0} versus {1} Normalized Tag Counts'.format(name1, name2),
            xlabel='{0} tags in RefSeq transcripts'.format(name1),
            ylabel='{0} tags in RefSeq transcripts, normalized'.format(name2),
            add_noise=False,
            show_2x_range=True,
            show_legend=False,
            plot_regression=False,
            show_count=True,
            show_correlation=True,
            save_dir=img_dirpath,
            show_plot=False)
Esempio n. 12
0
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/Demo-data'
    dirpath = yzer.get_path(dirpath)

    img_dirpath = yzer.get_and_create_path(dirpath, 'scatterplots')

    data = yzer.import_file(
        yzer.get_filename(dirpath, 'me2_peaks_with_transcripts.txt'))
    data = data.fillna(0)
    data = data.groupby(by='id', as_index=True).mean()
    data['transcript_score'] = data['score(2)']
    ax = yzer.scatterplot(
        data,
        xcolname='transcript_score',
        ycolname='tag_count',
        log=True,
        title='H3K4me2 Tag Count as a Function of Transcript Score',
        xlabel='Glass Atlas Transcript Score',
        ylabel='Normalized H3K4me2 tag count',
        show_2x_range=True,
        plot_regression=True,
        show_count=True,
        show_correlation=True,
        show_legend=False,
        save_dir=img_dirpath,
        show_plot=True)
Esempio n. 13
0
    data = data[data['transcript_score'] >= 4]
    data = data[data[['balb_notx_1h_tag_count','nod_notx_1h_tag_count_norm',
                      'balb_kla_1h_tag_count','nod_kla_1h_tag_count_norm']].max(axis=1) >= 10]
    
    refseq = yzer.get_refseq(data)
    
    # Remove low tag counts
    refseq = refseq[refseq['transcript_score'] >= 4]
    

    if False:
        # Non-diabetic balbc vs. nod
        ax = yzer.scatterplot(refseq, 'balb_notx_1h_tag_count', 'nod_notx_1h_tag_count_norm',
                            log=True, color='blue', 
                            xlabel='BALBc notx 1h tag count',ylabel='NOD notx 1h tag count',
                            title='Non-Diabetic BALBc vs. NOD Notx 1h Refseq Transcripts',
                            show_2x_range=True, show_legend=False,
                            show_count=True, show_correlation=True, 
                            save_dir=img_dirpath, show_plot=True)
        
        # Non-diabetic balbc vs. nod
        ax = yzer.scatterplot(refseq, 'balb_kla_1h_tag_count', 'nod_kla_1h_tag_count_norm',
                            log=True, color='blue', 
                            xlabel='BALBc KLA 1h tag count',ylabel='NOD KLA 1h tag count',
                            title='Non-Diabetic BALBc vs. NOD KLA 1h Refseq Transcripts',
                            show_2x_range=True, show_legend=False,
                            show_count=True, show_correlation=True, 
                            save_dir=img_dirpath, show_plot=True)
    if False:
        # Non-diabetic balbc vs. nod
        ax = yzer.scatterplot(data, 'balb_notx_1h_tag_count', 'nod_notx_1h_tag_count_norm',
Esempio n. 14
0
        'balb2_pu_1_tag_count']

    data['nod_with_bl6'] = data['nod_sv_id'] <= .1
    nod_with_bl6 = data[data['nod_with_bl6'] == True]
    nod_with_balb = data[data['nod_with_bl6'] == False]
    if False:
        ax = grapher.scatterplot(
            nod_with_bl6,
            'wt_pu_1_tag_count',
            'nod_pu_1_tag_count',
            subplot=121,
            log=True,
            color='blue',
            xlabel='C57Bl6 PU.1 tag counts',
            ylabel='NOD PU.1 tag counts',
            title=
            'C57Bl6 vs. NOD PU.1 peaks\nwhere C57Bl6 has a PU.1 motif and BALBc does not',
            label='NOD SNP == C57Bl6 SNP',
            add_noise=False,
            show_2x_range=False,
            show_legend=True,
            show_count=True,
            show_correlation=True,
            text_shift=False,
            text_color=True,
            show_plot=False)
        #grapher.save_plot(os.path.join(dirpath, 'bl6_vs_nod_pu_1_peak_tag_counts_bl6_gt_balb_no_balb_motif_nod_eq_bl6.png'))
        #grapher.show_plot()
        ax = grapher.scatterplot(
            nod_with_balb,
            'wt_pu_1_tag_count',
            'nod_pu_1_tag_count',
Esempio n. 15
0
    scatter_dirpath = grapher.get_filename(dirpath, 'scatterplots')

    #############################################
    # One color tag counts
    #############################################
    if False:
        for dataset, label in ((data, 'all transcripts'), (refseq, 'RefSeq')):
            slug_label = label.lower().replace(' ', '_')
            # All DMSO vs. all KLA
            ax = grapher.scatterplot(
                dataset,
                'dmso_tag_count',
                'kla_tag_count_norm',
                log=True,
                color='blue',
                title='DMSO vs. KLA tag counts: All runs, {0}'.format(label),
                xlabel='DMSO 2h tags',
                ylabel='KLA 1h + DMSO 2h tags',
                show_2x_range=True,
                show_legend=True,
                show_count=True,
                show_correlation=True,
                show_plot=False)
            grapher.save_plot(
                grapher.get_filename(
                    scatter_dirpath,
                    'dmso_vs_kla_all_runs_{0}.png'.format(slug_label)))
            grapher.show_plot()

            for x in xrange(1, 5):
                # By group
                ax = grapher.scatterplot(
Esempio n. 16
0
        grouped['kla_ratio'] = grouped['up_in_kla'] / grouped['count']
        grouped = grouped.sort(['kla_ratio']).reset_index(drop=True)
        grouped['idx'] = grouped.index

        shuffled_grouped['kla_ratio'] = shuffled_grouped[
            'up_in_kla'] / shuffled_grouped['count']
        shuffled_grouped = shuffled_grouped.sort(['kla_ratio'
                                                  ]).reset_index(drop=True)
        shuffled_grouped['idx'] = shuffled_grouped.index

        ax = yzer.scatterplot(shuffled_grouped,
                              'idx',
                              'kla_ratio',
                              color='green',
                              label='Shuffled Data'.format(rep),
                              show_2x_range=False,
                              plot_regression=False,
                              show_count=False,
                              show_correlation=False,
                              show_legend=False,
                              show_plot=False)

        ax = yzer.scatterplot(
            grouped,
            'idx',
            'kla_ratio',
            title='Up in KLA {0} Percentage by HiC Domain'.format(rep),
            xlabel='Ordered Index',
            ylabel='Percent of transcripts up in KLA',
            color='blue',
            label='Replicate {0} Data'.format(rep),
Esempio n. 17
0
        yzer.get_filename(dirpath, 'RNA_GroSeq_CountsGenes.txt'))
    homer_data['sequence_identifier'] = homer_data['Gene ID']
    homer_data['homer_tag_count'] = nonzero(homer_data[
        'ThioMac-GroSeq-notx-110513/ genes (Total: 12166480.0) normFactor 0.82']
                                            .fillna(0))
    homer_data = homer_data[['sequence_identifier', 'homer_tag_count']]

    merged = data.merge(homer_data, how='inner', on='sequence_identifier')
    merged = merged.fillna(1)

    if True:
        ax = yzer.scatterplot(merged,
                              xcolname='homer_tag_count',
                              ycolname='sum',
                              log=True,
                              title='RefSeq Tag Count via Homer and Vespucci',
                              xlabel='Tag Count in Homer',
                              ylabel='Tag Count in Vespucci',
                              show_2x_range=True,
                              plot_regression=False,
                              set_limits=True,
                              show_count=True,
                              show_correlation=True,
                              show_legend=False,
                              save_dir=img_dirpath,
                              show_plot=True)

    merged['ratio'] = merged['sum'] / merged['homer_tag_count']
    merged = merged.sort('ratio')
    print merged.head(10)
    print merged.tail(20)
Esempio n. 18
0
    img_dirpath = yzer.get_and_create_path(dirpath, 'figures')

    data = yzer.import_file(
        yzer.get_filename(dirpath, 'ctcf_with_stat1_binding.txt')).fillna(0)
    with_stat1 = data[data['p2_tag_count'] > 0]
    without_stat1 = data[data['p2_tag_count'] == 0]

    if True:
        ax = yzer.piechart(
            [len(with_stat1), len(without_stat1)],
            ['CTCF sites with STAT1', 'CTCF sites without STAT1'],
            title='DP Thymocyte CTCF Sites with STAT1 in Th1 Cells',
            save_dir=img_dirpath,
            show_plot=True)
    data['tag_count_nonzero'] = nonzero(data['tag_count'])
    data['p2_tag_count_nonzero'] = nonzero(data['p2_tag_count'])
    ax = yzer.scatterplot(
        data,
        'tag_count_nonzero',
        'p2_tag_count_nonzero',
        xlabel='CTCF Tag Count',
        ylabel='Stat1 Tag Count',
        log=True,
        color='blue',
        title='Tags in CTCF Peaks versus Overlapping Stat1 Peaks',
        show_2x_range=False,
        show_legend=False,
        show_count=True,
        show_correlation=True,
        save_dir=img_dirpath,
        show_plot=True)
Esempio n. 19
0
 cond_3 = (data['tag_count_3'] > 0) & (data['tag_count_3'] >=
                                       data['tag_count_4'])
 ax = None
 for show_points in (True, False):
     ax = yzer.scatterplot(
         data[cond_1],
         xcolname,
         ycolname,
         log=True,
         color=show_points and '#333333' or 'grey',
         master_dataset=data,
         xlabel='{0} {1} tag count'.format(main, basal_cond),
         ylabel='{0} KLA+Dex tag count'.format(main),
         label='No {0} in KLA+Dex {1}'.format(
             compare,
             show_points and ' ({0})'.format(len(data[cond_1]))
             or ''),
         add_noise=show_points,
         show_points=show_points,
         show_2x_range=False,
         show_legend=False,
         plot_regression=(not show_points),
         show_count=False,
         show_correlation=False,
         set_limits=True,
         show_plot=False,
         ax=ax)
     ax = yzer.scatterplot(
         data[cond_2],
         xcolname,
         ycolname,
        data['h4k8ac_kla_dex_tag_count']) / nonzero(
            data['h4k8ac_kla_tag_count'])

    for subgroup, suffix, dataset in (('RefSeq Transcripts', '_trans',
                                       data.groupby(
                                           by='nearest_refseq_transcript_id',
                                           as_index=False).mean()), ):

        ax = yzer.scatterplot(
            dataset[(dataset['kla_1_lfc_trans'] >= 1)],
            'dmso_1_rpkm',
            'dex_over_kla_1_lfc_trans',
            log=True,
            title=
            'GR transrepression by DMSO expression for Up-regulated genes',
            xlabel='DMSO 2h RPKM',
            ylabel='log2(KLA+Dex GRO-seq/DMSO GRO-seq)',
            show_2x_range=False,
            plot_regression=True,
            show_count=True,
            show_correlation=True,
            save_dir=img_dirpath,
            show_plot=True)
        ax = yzer.scatterplot(
            dataset[(dataset['kla_1_lfc_trans'] >= 1)],
            'h4k8ac_kla_ratio',
            'dex_over_kla_1_lfc_trans',
            log=True,
            title=
            'GR transrepression by KLA to DMSO H4K8ac tag ratio for Up-regulated genes',
            xlabel='KLA Tags/DMSO Tags',