예제 #1
0
'''
Created on Nov 7, 2012

@author: karmel
'''
from __future__ import division
from glasslab.dataanalysis.misc.cd4tcell_finland_2012.resources import replicate_sets
from glasslab.dataanalysis.motifs.motif_analyzer import MotifAnalyzer

if __name__ == '__main__':
    yzer = MotifAnalyzer()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/CD4TCells_Finland_2012/Analysis_2013_02'
    dirpath = yzer.get_path(dirpath)
    go_path = yzer.get_and_create_path(dirpath, 'with_me3', 'go_analysis',
                                       '0_8_min_lfc')

    data = yzer.import_file(
        yzer.get_filename(dirpath, 'transcript_vectors.txt'))
    data = data.fillna(0)
    data = data[data['naive_me3_tag_count'] + data['act_me3_tag_count'] > 0]

    if False:
        curr_path = yzer.get_and_create_path(dirpath, 'with_me3',
                                             'motif_analysis')

        yzer.run_homer(data,
                       'all_refseq_preceding',
                       curr_path,
                       center=False,
                       reverse=False,
                       preceding=True,
예제 #2
0
        for ratio in (3, 2, 1.5):
            enhancers = yzer.import_file(
                yzer.get_filename(
                    dirpath, 'boxplots_non_refseq_by_p65',
                    'enhancer_like_lose_p65_{0}x_change_dsg_only.txt'.format(
                        ratio)))
            enhancers['glass_transcript_id'] = enhancers['id']

            # Limit to peaks and touching transcripts, then pull out peaks that intersect our enhancer set
            data = all_data[all_data['touches'] == 't']
            data = data.merge(enhancers,
                              how='right',
                              on='glass_transcript_id',
                              suffixes=['', 'trans'])
            curr_path = yzer.get_and_create_path(motif_dirpath,
                                                 'enhancer_like_lose_p65',
                                                 'ratio_{0}'.format(ratio))
            # Group them after selecting those that we want
            data = data.groupby(['id', 'chr_name'], as_index=False).mean()

            #bg = yzer.get_filename(motif_dirpath,
            #        'peak_motifs_by_transcript_lfc', 'p65_kla',
            #        'all','all','all','all_regions_for_homer.txt')

            yzer.run_homer(data,
                           'ratio_{0}'.format(ratio),
                           curr_path,
                           center=False,
                           reverse=False,
                           preceding=False,
                           size=size,
예제 #3
0
        peak_type = 'p65_kla_dex'
        data = yzer.import_file(
            yzer.get_filename(
                dirpath,
                'from_peaks/{0}_promoter_vectors.txt'.format(peak_type)))
        data['id'] = data['peak_id']

        thresh = 2
        ids = grouped[
            grouped['relevant_sets_primary'] >= thresh]['glass_transcript_id']
        dataset = data[data['glass_transcript_id'].isin(ids)]

        if True:
            yzer.prep_files_for_homer(data,
                                      'all_{0}_200'.format(peak_type),
                                      yzer.get_and_create_path(
                                          dirpath, 'from_peaks', peak_type),
                                      center=True,
                                      reverse=False,
                                      preceding=False,
                                      size=200)

        yzer.prep_files_for_homer(
            dataset,
            'paused_in_{0}_at_least_{1}_min_{2}_down_in_dex_{3}_200'.format(
                thresh, min_ratio, secondary_min_ratio, peak_type),
            yzer.get_and_create_path(dirpath, 'from_peaks', peak_type),
            center=True,
            reverse=False,
            preceding=False,
            size=200)
예제 #4
0
        left outer join
        chipseq.peak_{} p{counter}
        on p1.chromosome_id = p{counter}.chromosome_id
        and p1.start_end && p{counter}.start_end
        '''.format(oth_breed[0][i], counter=counter))

            # Put it all together
            sql += ',\n'.join(selects)
            sql += '''
        from chipseq.peak_{} p1
        join genome_reference_mm10.chromosome chr 
        on p1.chromosome_id = chr.id
        '''.format(sample)
            sql += ''.join(joins)
            sql += '''
        left outer join genome_reference_mm10.sequence_transcription_region reg
        on p1.chromosome_id = reg.chromosome_id
        and p1.start_end && reg.start_site_1000
        where reg.id is NULL;
        '''
            print(sql)
            # Set up output dir
            sample_path = yzer.get_and_create_path(dirpath, curr_name)

            # Get data
            data = dataframe_from_query(sql, engine)

            output_file = yzer.get_filename(sample_path,
                                            curr_name + '_enhancers.txt')
            data.to_csv(output_file, sep='\t', header=True, index=False)
예제 #5
0
'''
Created on Feb 8, 2013

@author: karmel
'''
from glasslab.dataanalysis.motifs.motif_analyzer import MotifAnalyzer

if __name__ == '__main__':
    yzer = MotifAnalyzer()

    base_dirpath = yzer.get_path(
        'karmel/GlassLab/Notes_and_Reports/NOD_BALBc/ThioMacs/Analysis_2013_02/'
    )
    dirpath = yzer.get_and_create_path(base_dirpath, 'motifs/')
    filename = yzer.get_filename(base_dirpath, 'transcript_vectors.txt')
    data = yzer.import_file(filename)
    data = data.fillna(0)

    # Promoters
    if False:
        refseq = data[data['has_refseq'] == 1]
        refseq = refseq[refseq['transcript_score'] >= 4]
        if True:
            yzer.run_homer(refseq,
                           'refseq_promoter',
                           dirpath,
                           cpus=6,
                           center=False,
                           reverse=False,
                           preceding=True,
                           size=400,
                       #('with_pu_1_kla_dex', data[data['tag_count_5'] >= 10]),
                       ('no_pu_1_kla_dex', data[data['tag_count_5'] < 10]),
                       ('gt_partner', data[data['tag_count'] > 1.2*data['tag_count_2']]),
                       #('lt_partner', data[data['tag_count']*1.2 < data['tag_count_2']]),
                       ('with_partner', data[data['tag_count_2'] >= 10]),
                       ('no_partner', data[data['tag_count_2'] < 10]),
                       #('down_in_dex', data[data['dex_1_lfc'] <= -1]),
                       #('down_in_kla_dex', data[data['kla_dex_1_lfc'] <= -1]),
                       #('down_in_kla', data[data['kla_1_lfc'] <= -1]),
                       #('up_in_dex', data[data['dex_1_lfc'] >= 1]),
                       #('up_in_kla_dex', data[data['kla_dex_1_lfc'] >= 1]),
                       #('up_in_kla', data[data['kla_1_lfc'] >= 1]),
                       #('transrepressed', data[(data['kla_1_lfc'] >= 1) & (data['dex_over_kla_1_lfc'] <= -.58)]),
                       #('up_in_dex_down_in_kla_dex', data[(data['dex_1_lfc'] >= 1) & (data['kla_dex_1_lfc'] - data['dex_1_lfc'] <= -.58)]),
                       ):
     # We have multiple copies of peaks if they align to different transcripts
     parent_path = yzer.get_and_create_path(motif_dirpath,  
                                          'peak_motifs_by_transcript_lfc',
                                          peak_type, super_name)
     curr_path = yzer.get_and_create_path(parent_path, name)
     
     # Group them after selecting those that we want
     dataset = dataset.groupby(['id','chr_name'],as_index=False).mean()
     
     if name != 'all': bg = yzer.get_filename(parent_path, 'all','all','all_regions_for_homer.txt')
     else: bg = None
     
     yzer.run_homer(dataset, name, curr_path, 
                     center=True, reverse=False, preceding=False, size=size,
                     cpus=6, bg=bg)
 
예제 #7
0
'''
Created on Feb 20, 2013

@author: karmel
'''
from __future__ import division
from glasslab.dataanalysis.motifs.motif_analyzer import MotifAnalyzer

if __name__ == '__main__':
    yzer = MotifAnalyzer()

    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/CD4TCells/Rudensky_enhancers'
    dirpath = yzer.get_path(dirpath)
    motifs_dirpath = yzer.get_and_create_path(dirpath, 'motifs')

    peak_pretty = 'Foxp3'
    peak = peak_pretty.lower()
    foxp3 = yzer.import_file(
        yzer.get_filename(dirpath,
                          '{0}_1_with_naive_me2.txt'.format(peak))).fillna(0)
    naive = yzer.import_file(
        yzer.get_filename(dirpath,
                          'naive_me2_with_{0}.txt'.format(peak))).fillna(0)

    # Filter out promoters
    foxp3 = foxp3[foxp3['tss_id'] == 0]
    naive = naive[naive['tss_id'] == 0]

    # Get venn-diagram sets for foxp3/me2
    only_foxp3 = foxp3[foxp3['naive_id'] == 0]
    only_naive = naive[naive['foxp3_1_id'] == 0]
예제 #8
0
        size = 200
        if True:
            all_data = yzer.import_file(
                yzer.get_filename(dirpath,
                                  '{0}_vectors.txt'.format(peak_type)))

            all_data = all_data.fillna(0)

            for super_name, data in ((
                    'all',
                    all_data,
            ), ):
                for name, dataset in ((
                        'all',
                        data,
                ), ):
                    # We have multiple copies of peaks if they align to different transcripts
                    curr_path = yzer.get_and_create_path(
                        dirpath, peak_type, super_name, name)
                    # Group them after selecting those that we want
                    dataset = dataset.groupby(['id', 'chr_name'],
                                              as_index=False).mean()
                    yzer.run_homer(dataset,
                                   name,
                                   curr_path,
                                   center=True,
                                   reverse=False,
                                   preceding=False,
                                   size=size,
                                   cpus=6)
                                      & (data['tag_count_4'] >= 10)]),
                ('with_gr_either', data[(data['tag_count_3'] >= 10) |
                                        (data['tag_count_4'] >= 10)]),
                ('with_gr_kla_dex', data[data['tag_count_3'] >= 10]),
                ('with_gr_dex', data[data['tag_count_4'] >= 10]),
                    #('down_in_dex', data[data['dex_1_lfc'] <= -1]),
                    #('down_in_kla_dex', data[data['kla_dex_1_lfc'] <= -1]),
                    #('down_in_kla', data[data['kla_1_lfc'] <= -1]),
                    #('up_in_dex', data[data['dex_1_lfc'] >= 1]),
                    #('up_in_kla_dex', data[data['kla_dex_1_lfc'] >= 1]),
                    #('up_in_kla', data[data['kla_1_lfc'] >= 1]),
                    #('transrepressed', data[(data['kla_1_lfc'] >= 1) & (data['dex_over_kla_1_lfc'] <= -.58)]),
                    #('up_in_dex_down_in_kla_dex', data[(data['dex_1_lfc'] >= 1) & (data['kla_dex_1_lfc'] - data['dex_1_lfc'] <= -.58)]),
            ):
                curr_path = yzer.get_and_create_path(
                    motif_dirpath, 'redistributed_pairs',
                    'high_quality_pairs_vs_kla_bg', super_name, name)
                # Group them after selecting those that we want
                dataset = dataset.groupby(['id', 'chr_name'],
                                          as_index=False).mean()

                bg = yzer.get_filename(motif_dirpath,
                                       'peak_motifs_by_transcript_lfc',
                                       'p65_kla', 'all', 'all', 'all',
                                       'all_regions_for_homer.txt')

                yzer.run_homer(dataset,
                               name,
                               curr_path,
                               center=True,
                               reverse=False,