Exemple #1
0
'''
Created on Jan 11, 2013

@author: karmel

What do enhancers that are gaining methyl with KLA look like?

'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.utils.functions import nonzero
from collections import OrderedDict

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/'
    dirpath = yzer.get_path(dirpath)
    data_dirpath = yzer.get_filename(dirpath, 'enhancer_sets')

    kla_col = 'kla_lfc'

    tss_only = False
    img_dirpath = yzer.get_and_create_path(
        dirpath, 'novel_me2_sites', tss_only and 'genic' or 'all_interactions',
        'ratio_10')

    if False:
        enhancers = yzer.import_file(
            yzer.get_filename(data_dirpath,
                              'all_distal_enhancers_inc_me2.txt'))
Exemple #2
0
'''
Created on Mar 4, 2013

@author: karmel
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher

if __name__ == '__main__':
    yzer = SeqGrapher()

    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/NOD_BALBc/ThioMacs/Analysis_2013_02/'
    dirpath_bmdc = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/NOD_BALBc/BMDCs/Analysis_2013_03/'
    dirpath = yzer.get_path(dirpath)
    dirpath_bmdc = yzer.get_path(dirpath_bmdc)
    img_dirpath = yzer.get_and_create_path(dirpath, 'bmdc_vs_thiomac')
    thio = yzer.import_file(
        yzer.get_filename(dirpath, 'transcript_vectors.txt'))
    bmdc = yzer.import_file(
        yzer.get_filename(dirpath_bmdc, 'transcript_vectors.txt'))

    sets = []

    for data in (thio, bmdc):
        data = data.fillna(0)

        refseq = yzer.get_refseq(data)

        # Remove low tag counts
        #refseq = refseq[refseq['transcript_score'] >= 4]
Exemple #3
0
'''
Created on Jul 11, 2012

@author: karmel
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/Classes/Rotations/Finland_2012/GR_Project/'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'peak_scatterplots')

    if True:
        for main, compare, basal_cond in (('p65', 'GR', 'KLA'), ('GR', 'p65',
                                                                 'Dex')):
            data = yzer.import_file(
                yzer.get_filename(dirpath, 'motifs', 'from_peaks',
                                  '{0}_kla_dex_vectors.txt'.format(main)))

            data = data.fillna(0)
            data = data.groupby(['id', 'chr_name'], as_index=False).mean()

            xcolname, ycolname = 'tag_count_2', 'tag_count'  #'p65_kla_tag_count', 'p65_kla_dex_tag_count',
            data = data[data[ycolname] >= 10]

            cond_1 = (data['tag_count_3'] == 0)
            cond_2 = (data['tag_count_3'] > 0) & (data['tag_count_3'] <
                                                  data['tag_count_4'])
            cond_3 = (data['tag_count_3'] > 0) & (data['tag_count_3'] >=
Exemple #4
0
'''
Created on Aug 25, 2013

@author: karmel

Plot supplementary figure showing Hah et al error rates 
against MAX EDGE values when Vespucci is built without knowledge
of RefSeq boundaries.
 
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/NAR_review_data/no_refseq'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'plots')

    ax = yzer.set_up_plot()
    title = 'Benchmarking without Foreknowledge of RefSeq'
    yzer.add_title(title, ax)
    yzer.add_axis_labels('MAX_EDGE value',
                         'Error rate defined by Hah et al. (%)')

    max_edges = [100, 500, 1000, 4000, 5000, 5500, 6000, 10000]
    error_rates = [
        0.388551822833, 0.372390444765, 0.263807982126, 0.124663089396,
        0.121784970634, 0.121807917409, 0.123263849815, 0.142530838464
    ]
    error_pcts = [e * 100 for e in error_rates]
'''
Created on Oct 26, 2012

@author: karmel
'''
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from matplotlib import pyplot
from glasslab.utils.functions import nonzero
from glasslab.dataanalysis.misc.gr_project_2012.v1.elongation import total_tags_per_run

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/GR_Analysis/enhancer_classification'
    dirpath = yzer.get_path(dirpath)

    consistent = False
    img_dirpath = yzer.get_and_create_path(
        dirpath, 'boxplots_by_expression', consistent and 'consistent'
        or 'rep1')

    data = yzer.import_file(
        yzer.get_filename(dirpath, 'enhancers_with_nearest_gene.txt'))
    data['ucsc_link_nod'] = data['ucsc_link_nod'].apply(
        lambda s: s.replace('nod_balbc', 'gr_project_2012'))

    draw_pies = True
    min_tags = 30
    ratio = 1.5
    # Make sure we have dimethyl
    data = data[data.filter(like='h3k4me2').max(axis=1) > min_tags]
    data = data[data['minimal_distance'] >= 1000]
Exemple #6
0
'''
Created on Mar 11, 2013

@author: karmel

Note: Made font.weight = bold and axes.titlesize = 24, font.size = 16 in matplotlibrc
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.utils.functions import nonzero

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/Demo-data'
    dirpath = yzer.get_path(dirpath)

    img_dirpath = yzer.get_and_create_path(dirpath,
                                           'refseq_to_homer/large_gap_500bp')

    data = yzer.import_file(
        yzer.get_filename(dirpath, 'refseq_tag_counts_500bp.txt'))
    data['sum'] = nonzero(data['sum'].fillna(0))

    homer_data = yzer.import_file(
        yzer.get_filename(dirpath, 'RNA_GroSeq_CountsGenes.txt'))
    homer_data['sequence_identifier'] = homer_data['Gene ID']
    homer_data['homer_tag_count'] = nonzero(homer_data[
        'ThioMac-GroSeq-notx-110513/ genes (Total: 12166480.0) normFactor 0.82']
                                            .fillna(0))
    homer_data = homer_data[['sequence_identifier', 'homer_tag_count']]
Exemple #7
0
'''
Created on Nov 26, 2012

@author: karmel

Do the gene groups outlined in Ramirez-Carrozzi 2006 and 2009 correlate
with expression changes in Dex+KLA?
'''
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/GR_Analysis/cpg_island_promoters'
    dirpath = yzer.get_path(dirpath)

    for rep in (4, 3, 1):
        img_dirpath = yzer.get_and_create_path(dirpath,
                                               'boxplots_by_expression',
                                               'genes_with_gr',
                                               'rep{0}'.format(rep),
                                               'transrepressed')

        data = yzer.import_file(
            yzer.get_filename(dirpath, 'transcript_vectors.txt'))
        data['ucsc_link_nod'] = data['ucsc_link_nod'].apply(
            lambda s: s.replace('nod_balbc', 'gr_project_2012'))
        data = data.fillna(0)

        data = data[(data['kla_{0}_lfc'.format(rep)] >= 1)
                    & (data['dex_over_kla_{0}_lfc'.format(rep)] <= -.58)]
Exemple #8
0
'''
Created on Feb 12, 2013

@author: karmel
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.dataanalysis.motifs.motif_analyzer import MotifAnalyzer

if __name__ == '__main__':
    yzer = SeqGrapher()

    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/NOD_BALBc/ThioMacs/Analysis_2013_02/'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'srf_binding')
    data = yzer.import_file(
        yzer.get_filename(dirpath, 'transcript_vectors.txt'))

    data = data.fillna(0)
    data = data[data[['nod_notx_1h_tag_count', 'balb_notx_1h_tag_count']].max(
        axis=1) >= 10]

    subsets = [
        data,
        data[(data['has_refseq'] == 1) & (data['transcript_score'] >= 4)],
        data[(data['distal'] == 't') & (data['h3k4me2_tag_count'] > 10)]
    ]

    # Add in nearest genes for enhancers
    enh = subsets[2].copy()
    nearest_genes = yzer.import_file(
'''
Created on Jan 11, 2013

@author: karmel

What do enhancers that are gaining methyl with KLA look like?

'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.utils.functions import nonzero
from collections import OrderedDict

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/'
    dirpath = yzer.get_path(dirpath)
    data_dirpath = yzer.get_filename(dirpath, 'enhancer_sets')

    kla_col = 'kla_lfc'

    tss_only = False
    img_dirpath = yzer.get_and_create_path(
        dirpath, 'interactions_by_kla_lfc', tss_only and 'genic'
        or 'all_interactions', 'lfc_2')

    # File generated in novel_me2_sites
    enhancers = yzer.import_file(
        yzer.get_filename(
            data_dirpath,
            'all_enhancers_with_me2_and_{0}interaction_stats.txt'.format(
Exemple #10
0
'''
Created on May 2, 2013

@author: karmel

Note: Made font.weight = normal and axes.titlesize = 24 in matplotlibrc
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.dataanalysis.misc.demoatlas.rpkm_to_score import PrettyAxisGrapher

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/NAR_review_data/Post-gene'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'scatterplots')

    data = yzer.import_file(
        yzer.get_filename(dirpath, 'post_gene_transcripts.txt'))
    refseq = yzer.import_file(
        yzer.get_filename(dirpath, 'all_expressed_refseq.txt'))

    refseq_with_runoff = refseq[refseq['id'].isin(data['gene_id'])]
    refseq_no_runoff = refseq[~refseq['id'].isin(data['gene_id'])]
    if False:
        print len(refseq_no_runoff)
        print refseq_no_runoff.tail(100).to_string()

    # Calculate length of runoff
    data[
        'length'] = data['transcription_end'] - data['transcription_start'] + 1
'''
Created on Jan 9, 2013

@author: karmel

Do novel interactions gain or lose me2? 
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.utils.functions import nonzero

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/'
    dirpath = yzer.get_path(dirpath)
    data_dirpath = yzer.get_filename(dirpath, 'enhancer_sets')
    img_dirpath = yzer.get_and_create_path(dirpath,
                                           'novel_interactions_kla_lfc',
                                           'all_interactions')

    interactions = yzer.import_file(
        yzer.get_filename(
            data_dirpath,
            'transcript_pairs_enhancer_with_anything_with_me2_inc_me2_counts.txt'
        ))
    interactions = interactions[interactions['count'] > 1]

    all_transcripts = yzer.import_file(
        yzer.get_filename(data_dirpath, 'transcript_vectors.txt'))

    kla_col = 'kla_lfc'
'''
Created on Jan 9, 2013

@author: karmel

Do novel interactions gain or lose me2? 
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.utils.functions import nonzero

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/'
    dirpath = yzer.get_path(dirpath)
    data_dirpath = yzer.get_filename(dirpath, 'enhancer_sets')
    img_dirpath = yzer.get_and_create_path(dirpath,
                                           'novel_enhancer_me2_change',
                                           'all_interactions')

    interactions = yzer.import_file(
        yzer.get_filename(
            data_dirpath,
            'transcript_pairs_enhancer_with_anything_with_me2_inc_me2_counts.txt'
        ))
    interactions = interactions[interactions['count'] > 1]

    interactions = interactions.fillna(0)

    # Key on peak id, not enhancer id, which could be bidirectional
    #interactions['id_2'] = interactions['h3k4me2_id']
'''
Created on Jan 2, 2013

@author: karmel
'''

from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
import pandas

if __name__ == '__main__':
    enhancer_counts = True # Are we looking at enhancer interactions (False) or counts (True)?
    
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/enhancers_by_gene_length'
    dirpath = yzer.get_path(dirpath)
    
    img_dirpath = yzer.get_and_create_path(dirpath, 'scatterplots')

    counted = enhancer_counts and 'enhancer' or 'interaction'
    
    # The first set has length with interaction counts; 
    # the second has length for all transcripts, even those without interactions.
    # We want to merge such that we add the interaction-less genes with a count of 0.
    data = yzer.import_file(yzer.get_filename(dirpath,'{0}_counts_by_refseq.txt'.format(counted)))
    all_data = yzer.import_file(yzer.get_filename(dirpath,'refseq_all.txt'))
    all_data = all_data[~all_data['id'].isin(data['id'])]
    data = pandas.concat([data, all_data])
    data = data.reset_index().fillna(0)
    
    notx = data[data['sequencing_run_id'] == 765]
    kla_30m = data[data['sequencing_run_id'] == 766]
Exemple #14
0
'''
Created on Feb 12, 2013

@author: karmel
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.utils.functions import nonzero
from glasslab.dataanalysis.motifs.motif_analyzer import MotifAnalyzer

if __name__ == '__main__':
    yzer = SeqGrapher()

    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/CD4TCells/Oshea_enhancers/peak_overlaps'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'figures')

    peak_pretty = 'p300'
    peak = peak_pretty.lower()
    th1 = yzer.import_file(
        yzer.get_filename(dirpath,
                          'th1_with_th2_{0}.txt'.format(peak))).fillna(0)
    th2 = yzer.import_file(
        yzer.get_filename(dirpath,
                          'th2_with_th1_{0}.txt'.format(peak))).fillna(0)

    # Filter out promoters
    th1 = th1[th1['tss_id'] == 0]
    th2 = th2[th2['tss_id'] == 0]

    # Get venn-diagram sets
Exemple #15
0
'''
Created on Oct 26, 2012

@author: karmel
'''
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from matplotlib import pyplot

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/GR_Analysis/enhancer_classification'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'piecharts_for_genes_by_mechanism')
    
    data = yzer.import_file(yzer.get_filename(dirpath, 'enhancers_with_nearest_gene.txt'))
    data['ucsc_link_nod'] = data['ucsc_link_nod'].apply(lambda s: s.replace('nod_balbc','gr_project_2012'))
    
    draw_pies = True
    min_tags = 30
    ratio = 1.5
    # Make sure we have dimethyl
    data = data[data.filter(like='h3k4me2').max(axis=1) > min_tags]
    data = data[data['minimal_distance'] >= 1000]
    
    #data = yzer.collapse_strands(data)
    
    transcripts = yzer.import_file(yzer.get_filename(dirpath, 'transcript_vectors.txt'))
    transcripts['nearest_refseq_transcript_id'] = transcripts['id']
    # Join, keeping all transcripts
    data = data.merge(transcripts, how='left', on='nearest_refseq_transcript_id', suffixes=['','_trans'])
    
Exemple #16
0
'''
Created on Jan 3, 2013

@author: karmel

Plot gen-enhancer me2 LFC; do we see correlation?
'''
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.utils.functions import nonzero
import numpy

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/'
    dirpath = yzer.get_path(dirpath)
    data_dirpath = yzer.get_filename(dirpath, 'enhancer_sets')
    img_dirpath = yzer.get_and_create_path(dirpath, 'gene_enhancer_me2_lfc',
                                           'scatterplots')

    interactions = yzer.import_file(
        yzer.get_filename(
            data_dirpath,
            'transcript_pairs_enhancer_with_anything_with_me2_inc_me2_counts.txt'
        ))
    interactions = interactions[interactions['count'] > 1]
    all_transcripts = yzer.import_file(
        yzer.get_filename(data_dirpath, 'transcript_vectors.txt'))

    for me2_timepoint in ('6h', '24h'):
        me2_col = 'me2_{0}_ratio'.format(me2_timepoint)
        kla_col = 'kla_lfc'
Exemple #17
0
'''
Created on Oct 1, 2012

@author: karmel
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from matplotlib import pyplot
import numpy

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/Classes/Rotations/Finland_2012/GR_Project/'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'piecharts_from_p65_gr')

    if True:
        for main, compare, basal_cond in (
            ('GR', 'p65', 'Dex'),
            ('p65', 'GR', 'KLA'),
        ):
            data = yzer.import_file(
                yzer.get_filename(dirpath, 'motifs', 'from_peaks',
                                  '{0}_kla_dex_vectors.txt'.format(main)))

            # Get nearby peaks first
            ids_with_nearby = data[
                (data['distance_to_tss_2'].isnull() == False)
                & (data['distance_to_peak_2'] <= 1000)]['id']

            data = data.fillna(0)
Exemple #18
0
'''
Created on Nov 26, 2012

@author: karmel


'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from random import shuffle

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/GR_Analysis/hic_domains'
    dirpath = yzer.get_path(dirpath)

    data = yzer.import_file(
        yzer.get_filename(dirpath, 'transcript_vectors.txt'))
    data['ucsc_link_nod'] = data['ucsc_link_nod'].apply(
        lambda s: s.replace('nod_balbc', 'gr_project_2012'))
    data = data.fillna(0)

    for rep in (4, 3, 1):
        img_dirpath = yzer.get_and_create_path(dirpath, 'lfc_histograms',
                                               'rep{0}'.format(rep))

        kla_key = 'kla_{0}_lfc'.format(rep)
        dex_kla_key = 'dex_over_kla_{0}_lfc'.format(rep)

        data = data[data[kla_key] >= 1]
563,449,491,118
359,108,148,169'''

    raw_data = raw_data.split('\n')
    dates = [
        datetime.strptime(raw_data[x], '%Y_%m_%d')
        for x in xrange(0, len(raw_data), 3)
    ]
    set1 = numpy.array([
        map(int, raw_data[x].split(',')) for x in xrange(1, len(raw_data), 3)
    ]).T
    set2 = numpy.array([
        map(int, raw_data[x].split(',')) for x in xrange(2, len(raw_data), 3)
    ]).T

    grapher = SeqGrapher()
    ax = grapher.timeseries(
        dates, [set1, set2],
        show_median=True,
        colors=['blue', 'red'],
        labels=['Control', 'TDB treated'],
        title='Blood glucose values in NOD mice after TDB treatment',
        xlabel='Date',
        ylabel='Blood glucose (mg/dL, via Aviva AccuChek meter)',
        show_plot=False,
        show_legend=True)
    dirpath = '/Users/karmel/Desktop/Projects/GlassLab/Notes_and_Reports/NOD_BALBc/TDB in vitro/'

    grapher.save_plot(
        os.path.join(
            dirpath,
Exemple #20
0
'''
Created on Feb 15, 2013

@author: karmel
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher

if __name__ == '__main__':
    yzer = SeqGrapher()

    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/NOD_BALBc/ThioMacs/Analysis_2013_02/'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'srf_ko_targets')
    data = yzer.import_file(
        yzer.get_filename(dirpath, 'transcript_vectors.txt'))

    data = data.fillna(0)
    data = data[data[['nod_notx_1h_tag_count', 'balb_notx_1h_tag_count']].max(
        axis=1) >= 10]
    data = data[(data['has_refseq'] == 1) & (data['transcript_score'] >= 4)]

    # From Amy Sullivan SRF paper
    down_in_srf_ko = [
        'Cnn2', 'Srf', 'Lima1', 'Rhoj', 'Coro1a', 'Il1rn', 'Lsp1', 'LOC277203',
        'Vcl', 'Card11', 'Cbr2', 'Cd83', 'Acta2', 'Actb', 'Tspan7', 'Ebi2',
        'Gpr162', 'Ckb', 'Dhcr24', 'LOC638632', 'Actg2', 'Trim29', 'Ppap2b',
        'Klk1b11', 'Actc1', 'Pcp4l1', 'LOC621324', 'Cdkn1c', 'Slco2b1',
        'Cd24a', 'Pdgfa', 'Lrrc58', 'Dnmt3a', 'Slamf9', '1100001H23Rik',
        'Aldoc', 'Cd28', '1500003O03Rik', 'Rab15', 'Pld4', 'Pilra', 'Xlr',
        'Tgm1', 'Lcp1', 'Fstl1', 'Slc40a1', 'Usp24', 'Jup', 'Cd74', 'Tpm4',
Exemple #21
0
        if False:
            yzer.prep_files_for_homer(
                repr_data,
                'repressed_in_{0}_kla_{1}_promoter_200'.format(
                    thresh, min_ratio),
                yzer.get_filename(dirpath, 'from_genes', 'derepressed'),
                center=False,
                reverse=False,
                preceding=False,
                size=200)
            yzer.prep_files_for_homer(
                repr_data,
                'repressed_in_{0}_kla_{1}_preceding_200'.format(
                    thresh, min_ratio),
                yzer.get_filename(dirpath, 'from_genes', 'derepressed'),
                center=False,
                reverse=False,
                preceding=True,
                size=200)

        if True:
            grapher = SeqGrapher()
            grapher.boxplot(
                [rest_data['length_5_utr'], repr_data['length_5_utr']],
                ['All Genes', 'LFC in KLA <= {0}'.format(min_ratio)],
                title="Length of 5'UTR according to KLA response",
                xlabel='Gene Set',
                ylabel='Length in bp',
                show_outliers=False,
                show_plot=True)
'''
Created on Oct 1, 2012

@author: karmel
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.utils.functions import nonzero
from glasslab.dataanalysis.misc.gr_project_2012.v1.enhancer_subsets_for_supershift import ucsc_link_cleanup
import numpy

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/Classes/Rotations/Finland_2012/GR_Project/'
    dirpath = yzer.get_path(dirpath)
    peak_type = 'p65'
        
    img_dirpath = yzer.get_and_create_path(dirpath, 'boxplots_non_refseq_by_{0}'.format(peak_type))
    
    transcripts = yzer.import_file(yzer.get_filename(dirpath, 'motifs', 'transcript_vectors_with_nearby_peaks.txt'))
    
    
    if True:
        pu_1 = False
        for ratio in (1.5, 2, 3):
            data = transcripts[transcripts['refseq'] == 'f']
            data = data[data['has_infrastructure'] == 0]
            data = data[data['length'] < 6000]
            data = data[data['dex_1_lfc'] < 1]
            data = data[data['kla_1_lfc'] >= 1]
            data = data[data['gr_kla_dex_tag_count'] > 0]
Exemple #23
0
'''
Created on Sep 28, 2014

@author: karmel
'''
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.dataanalysis.misc.rodrigo.samples import sample_name,\
    get_threshold
if __name__ == '__main__':
    yzer = SeqGrapher()

    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/' +\
        'Miscellaneous_Collaborations/Rodrigo_CD8s_2014_09/Promoters'
    dirpath = yzer.get_path(dirpath)

    cond, seq, breed = ('naive', 'atac', '')
    wt_prefix = sample_name(cond, seq, breed)
    ko_prefix = sample_name(cond, seq, 'foxo1_ko_')

    wt_dirpath = yzer.get_filename(dirpath, wt_prefix)
    ko_dirpath = yzer.get_filename(dirpath, ko_prefix)

    wt_filename = yzer.get_filename(wt_dirpath,
                                    wt_prefix + '_promoters.txt')
    ko_filename = yzer.get_filename(ko_dirpath,
                                    ko_prefix + '_promoters.txt')

    wt_data = yzer.import_file(wt_filename)
    wt_data = wt_data.fillna(0)
    ko_data = yzer.import_file(ko_filename)
    ko_data = ko_data.fillna(0)
'''
Created on Oct 8, 2012

@author: karmel
'''
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from matplotlib import pyplot
from glasslab.dataanalysis.misc.gr_project_2012.boxplots_redistribution_pairs import get_high_quality_pairs

if __name__ == '__main__':
    yzer = SeqGrapher()

    dirpath = 'karmel/Desktop/Projects/Classes/Rotations/Finland_2012/GR_Project/'
    dirpath = yzer.get_path(dirpath)
    motif_dirpath = yzer.get_filename(dirpath, 'motifs', 'from_peaks')

    transcripts = yzer.import_file(
        yzer.get_filename(dirpath, 'motifs', 'transcript_vectors.txt'))
    transcripts['glass_transcript_id'] = transcripts['id']

    if True:
        all_data = yzer.import_file(
            yzer.get_filename(
                dirpath, 'redistribution',
                'p65_peaks_bigger_in_kla_dex_with_nearby_bigger_kla_peaks.txt')
        )

        data = get_high_quality_pairs(all_data, transcripts)

        data = data.groupby(['id', 'chr_name'], as_index=False).mean()
'''
Created on Apr 19, 2013

@author: karmel

Note: Made font.weight = bold and axes.titlesize = 24 in matplotlibrc
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/NAR_review_data/'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'hg19_mcf7_pie_charts')

    yzer.legend_location = 'lower left'
    pie1 = '''Annotated by RefSeq and/or ncRNA.org    14,022
Unannotated    67,046'''
    pie1 = [row.split('    ') for row in pie1.split('\n')]
    pie1 = zip(*pie1)
    yzer.piechart(map(lambda s: int(s.replace(',', '')), pie1[1]),
                  pie1[0],
                  title='Hah et al MCF-7 Transcripts\nwith Score >= 1',
                  save_dir=img_dirpath,
                  show_plot=True)

    pie2 = '''Promoter-associated RNA    7,055
Antisense of RefSeq    7,539
Other RefSeq Proximal    13,664
Distal with H3K4me2    2,352
Exemple #26
0
'''
Created on Mar 23, 2012

@author: karmel
'''
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
import os

if __name__ == '__main__':
    grapher = SeqGrapher()

    dirpath = '/Users/karmel/Desktop/Projects/GlassLab/Notes_and_Reports/NOD_BALBc/BMDCs/Analysis/'
    filename = os.path.join(dirpath, 'balbc_nod_vectors.txt')
    data = grapher.import_file(filename)

    # vs balbc counterpart
    data = grapher.normalize(data, 'nod_notx_0h_tag_count', 2.790489)
    data = grapher.normalize(data, 'diabetic_nod_notx_0h_tag_count', 1.083990)
    data = grapher.normalize(data, 'slow_diabetic_nod_notx_0h_tag_count',
                             0.349747)

    # Vs nod notx
    data = grapher.normalize(data,
                             'diabetic_nod_notx_0h_tag_count',
                             0.483232,
                             suffix='_norm_2')
    data = grapher.normalize(data,
                             'slow_diabetic_nod_notx_0h_tag_count',
                             0.276080,
                             suffix='_norm_2')
'''
Created on Sep 7, 2012

@author: karmel
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher


if __name__ == '__main__':
    grapher = SeqGrapher()
    base_dirpath = 'karmel/Desktop/Projects/Classes/Rotations/Finland_2012/GR_Project/'
    base_dirpath = grapher.get_path(base_dirpath)
    dirpath = grapher.get_filename(base_dirpath, 'motifs')
    filename = grapher.get_filename(dirpath, 'transcript_vectors.txt')
    
    data = grapher.import_file(filename)
    
    
    # Boxplots for gr_dex peaks by lfc in Dex
    if False:
        #data = data[data['distal'] == 't']
        data = data[data['has_refseq'] == 1]
        
        down = data[data['dex_1_lfc'] <= -1]
        up = data[data['dex_1_lfc'] >= 1]
        nc = data[abs(data['dex_1_lfc']) < 1]
        
        key = 'p65_kla_tag_count'
        datasets = [down[key],nc[key],up[key]]
        datasets = [d['p65_kla_dex_tag_count'] - d[key] for d in [down, nc, up]]
Exemple #28
0
'''
Created on Jun 26, 2012

@author: karmel
'''
from glasslab.dataanalysis.misc.gr_project_2012.elongation import set_up_sequencing_run_ids, \
    get_sequencing_run_id_sets, get_rep_string, total_tags_per_run
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.dataanalysis.motifs.motif_analyzer import MotifAnalyzer

if __name__ == '__main__':
    grapher = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/Classes/Rotations/Finland_2012/GR_Project/'
    dirpath = grapher.get_path(dirpath)
    filename = grapher.get_filename(dirpath, 'transcript_vectors.txt')

    data = grapher.import_file(filename)

    run_ids = set_up_sequencing_run_ids()
    dmso, kla, kla_dex, all_dmso, all_kla, all_kla_dex = get_sequencing_run_id_sets(
    )
    total_tags = total_tags_per_run()

    # Norm sum scalars listed for all, group 1, group 2, group 3, group 4
    kla_scalars = [1.223906, 1.281572, 1.118363, 1.104860, 1.503260]
    kla_dex_scalars = [1.182574, 1.147695, 1.248636, 1.069588, 1.388871]
    dex_over_kla_scalars = [1.069073, 0.967659, 1.122628, 1.008758, 0.927466]

    for i, scalar in enumerate(kla_scalars):
        data = grapher.normalize(data,
                                 'kla_{0}tag_count'.format(get_rep_string(i)),
Exemple #29
0
'''
Created on Jan 30, 2013

@author: karmel
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from collections import OrderedDict

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/'
    dirpath = yzer.get_path(dirpath)
    data_dirpath = yzer.get_filename(dirpath, 'enhancer_rewiring_lfc')
    transcripts = yzer.import_file(
        yzer.get_filename(dirpath, 'enhancer_sets', 'transcript_vectors.txt'))

    sets = OrderedDict((
        ('all',
         yzer.import_file(yzer.get_filename(data_dirpath, 'all_vectors.cdt'))),
        #('all_6h', yzer.import_file(yzer.get_filename(data_dirpath,'kla_6h','all_vectors.cdt'))),
        ('rewired',
         yzer.import_file(
             yzer.get_filename(data_dirpath, 'rewired_vectors.cdt'))),
        #('rewired_6h', yzer.import_file(yzer.get_filename(data_dirpath,'kla_6h','rewired_vectors.cdt'))),
        ('shared',
         yzer.import_file(yzer.get_filename(data_dirpath,
                                            'shared_vectors.cdt'))),
    ))

    for key, val in sets.items():
Exemple #30
0
'''
Created on Feb 14, 2013

@author: karmel
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.utils.functions import nonzero

if __name__ == '__main__':
    yzer = SeqGrapher()

    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/CD4TCells/Oshea_enhancers/ctcf_stat1_overlap'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'figures')

    data = yzer.import_file(
        yzer.get_filename(dirpath, 'ctcf_with_stat1_binding.txt')).fillna(0)
    with_stat1 = data[data['p2_tag_count'] > 0]
    without_stat1 = data[data['p2_tag_count'] == 0]

    if True:
        ax = yzer.piechart(
            [len(with_stat1), len(without_stat1)],
            ['CTCF sites with STAT1', 'CTCF sites without STAT1'],
            title='DP Thymocyte CTCF Sites with STAT1 in Th1 Cells',
            save_dir=img_dirpath,
            show_plot=True)
    data['tag_count_nonzero'] = nonzero(data['tag_count'])
    data['p2_tag_count_nonzero'] = nonzero(data['p2_tag_count'])
    ax = yzer.scatterplot(