''' Created on Nov 7, 2012 @author: karmel ''' from __future__ import division from glasslab.dataanalysis.misc.cd4tcell_finland_2012.resources import replicate_sets from glasslab.dataanalysis.motifs.motif_analyzer import MotifAnalyzer if __name__ == '__main__': yzer = MotifAnalyzer() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/CD4TCells_Finland_2012/Analysis_2013_02' dirpath = yzer.get_path(dirpath) go_path = yzer.get_and_create_path(dirpath, 'with_me3', 'go_analysis', '0_8_min_lfc') data = yzer.import_file( yzer.get_filename(dirpath, 'transcript_vectors.txt')) data = data.fillna(0) data = data[data['naive_me3_tag_count'] + data['act_me3_tag_count'] > 0] if False: curr_path = yzer.get_and_create_path(dirpath, 'with_me3', 'motif_analysis') yzer.run_homer(data, 'all_refseq_preceding', curr_path, center=False, reverse=False, preceding=True,
for ratio in (3, 2, 1.5): enhancers = yzer.import_file( yzer.get_filename( dirpath, 'boxplots_non_refseq_by_p65', 'enhancer_like_lose_p65_{0}x_change_dsg_only.txt'.format( ratio))) enhancers['glass_transcript_id'] = enhancers['id'] # Limit to peaks and touching transcripts, then pull out peaks that intersect our enhancer set data = all_data[all_data['touches'] == 't'] data = data.merge(enhancers, how='right', on='glass_transcript_id', suffixes=['', 'trans']) curr_path = yzer.get_and_create_path(motif_dirpath, 'enhancer_like_lose_p65', 'ratio_{0}'.format(ratio)) # Group them after selecting those that we want data = data.groupby(['id', 'chr_name'], as_index=False).mean() #bg = yzer.get_filename(motif_dirpath, # 'peak_motifs_by_transcript_lfc', 'p65_kla', # 'all','all','all','all_regions_for_homer.txt') yzer.run_homer(data, 'ratio_{0}'.format(ratio), curr_path, center=False, reverse=False, preceding=False, size=size,
peak_type = 'p65_kla_dex' data = yzer.import_file( yzer.get_filename( dirpath, 'from_peaks/{0}_promoter_vectors.txt'.format(peak_type))) data['id'] = data['peak_id'] thresh = 2 ids = grouped[ grouped['relevant_sets_primary'] >= thresh]['glass_transcript_id'] dataset = data[data['glass_transcript_id'].isin(ids)] if True: yzer.prep_files_for_homer(data, 'all_{0}_200'.format(peak_type), yzer.get_and_create_path( dirpath, 'from_peaks', peak_type), center=True, reverse=False, preceding=False, size=200) yzer.prep_files_for_homer( dataset, 'paused_in_{0}_at_least_{1}_min_{2}_down_in_dex_{3}_200'.format( thresh, min_ratio, secondary_min_ratio, peak_type), yzer.get_and_create_path(dirpath, 'from_peaks', peak_type), center=True, reverse=False, preceding=False, size=200)
left outer join chipseq.peak_{} p{counter} on p1.chromosome_id = p{counter}.chromosome_id and p1.start_end && p{counter}.start_end '''.format(oth_breed[0][i], counter=counter)) # Put it all together sql += ',\n'.join(selects) sql += ''' from chipseq.peak_{} p1 join genome_reference_mm10.chromosome chr on p1.chromosome_id = chr.id '''.format(sample) sql += ''.join(joins) sql += ''' left outer join genome_reference_mm10.sequence_transcription_region reg on p1.chromosome_id = reg.chromosome_id and p1.start_end && reg.start_site_1000 where reg.id is NULL; ''' print(sql) # Set up output dir sample_path = yzer.get_and_create_path(dirpath, curr_name) # Get data data = dataframe_from_query(sql, engine) output_file = yzer.get_filename(sample_path, curr_name + '_enhancers.txt') data.to_csv(output_file, sep='\t', header=True, index=False)
''' Created on Feb 8, 2013 @author: karmel ''' from glasslab.dataanalysis.motifs.motif_analyzer import MotifAnalyzer if __name__ == '__main__': yzer = MotifAnalyzer() base_dirpath = yzer.get_path( 'karmel/GlassLab/Notes_and_Reports/NOD_BALBc/ThioMacs/Analysis_2013_02/' ) dirpath = yzer.get_and_create_path(base_dirpath, 'motifs/') filename = yzer.get_filename(base_dirpath, 'transcript_vectors.txt') data = yzer.import_file(filename) data = data.fillna(0) # Promoters if False: refseq = data[data['has_refseq'] == 1] refseq = refseq[refseq['transcript_score'] >= 4] if True: yzer.run_homer(refseq, 'refseq_promoter', dirpath, cpus=6, center=False, reverse=False, preceding=True, size=400,
#('with_pu_1_kla_dex', data[data['tag_count_5'] >= 10]), ('no_pu_1_kla_dex', data[data['tag_count_5'] < 10]), ('gt_partner', data[data['tag_count'] > 1.2*data['tag_count_2']]), #('lt_partner', data[data['tag_count']*1.2 < data['tag_count_2']]), ('with_partner', data[data['tag_count_2'] >= 10]), ('no_partner', data[data['tag_count_2'] < 10]), #('down_in_dex', data[data['dex_1_lfc'] <= -1]), #('down_in_kla_dex', data[data['kla_dex_1_lfc'] <= -1]), #('down_in_kla', data[data['kla_1_lfc'] <= -1]), #('up_in_dex', data[data['dex_1_lfc'] >= 1]), #('up_in_kla_dex', data[data['kla_dex_1_lfc'] >= 1]), #('up_in_kla', data[data['kla_1_lfc'] >= 1]), #('transrepressed', data[(data['kla_1_lfc'] >= 1) & (data['dex_over_kla_1_lfc'] <= -.58)]), #('up_in_dex_down_in_kla_dex', data[(data['dex_1_lfc'] >= 1) & (data['kla_dex_1_lfc'] - data['dex_1_lfc'] <= -.58)]), ): # We have multiple copies of peaks if they align to different transcripts parent_path = yzer.get_and_create_path(motif_dirpath, 'peak_motifs_by_transcript_lfc', peak_type, super_name) curr_path = yzer.get_and_create_path(parent_path, name) # Group them after selecting those that we want dataset = dataset.groupby(['id','chr_name'],as_index=False).mean() if name != 'all': bg = yzer.get_filename(parent_path, 'all','all','all_regions_for_homer.txt') else: bg = None yzer.run_homer(dataset, name, curr_path, center=True, reverse=False, preceding=False, size=size, cpus=6, bg=bg)
''' Created on Feb 20, 2013 @author: karmel ''' from __future__ import division from glasslab.dataanalysis.motifs.motif_analyzer import MotifAnalyzer if __name__ == '__main__': yzer = MotifAnalyzer() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/CD4TCells/Rudensky_enhancers' dirpath = yzer.get_path(dirpath) motifs_dirpath = yzer.get_and_create_path(dirpath, 'motifs') peak_pretty = 'Foxp3' peak = peak_pretty.lower() foxp3 = yzer.import_file( yzer.get_filename(dirpath, '{0}_1_with_naive_me2.txt'.format(peak))).fillna(0) naive = yzer.import_file( yzer.get_filename(dirpath, 'naive_me2_with_{0}.txt'.format(peak))).fillna(0) # Filter out promoters foxp3 = foxp3[foxp3['tss_id'] == 0] naive = naive[naive['tss_id'] == 0] # Get venn-diagram sets for foxp3/me2 only_foxp3 = foxp3[foxp3['naive_id'] == 0] only_naive = naive[naive['foxp3_1_id'] == 0]
size = 200 if True: all_data = yzer.import_file( yzer.get_filename(dirpath, '{0}_vectors.txt'.format(peak_type))) all_data = all_data.fillna(0) for super_name, data in (( 'all', all_data, ), ): for name, dataset in (( 'all', data, ), ): # We have multiple copies of peaks if they align to different transcripts curr_path = yzer.get_and_create_path( dirpath, peak_type, super_name, name) # Group them after selecting those that we want dataset = dataset.groupby(['id', 'chr_name'], as_index=False).mean() yzer.run_homer(dataset, name, curr_path, center=True, reverse=False, preceding=False, size=size, cpus=6)
& (data['tag_count_4'] >= 10)]), ('with_gr_either', data[(data['tag_count_3'] >= 10) | (data['tag_count_4'] >= 10)]), ('with_gr_kla_dex', data[data['tag_count_3'] >= 10]), ('with_gr_dex', data[data['tag_count_4'] >= 10]), #('down_in_dex', data[data['dex_1_lfc'] <= -1]), #('down_in_kla_dex', data[data['kla_dex_1_lfc'] <= -1]), #('down_in_kla', data[data['kla_1_lfc'] <= -1]), #('up_in_dex', data[data['dex_1_lfc'] >= 1]), #('up_in_kla_dex', data[data['kla_dex_1_lfc'] >= 1]), #('up_in_kla', data[data['kla_1_lfc'] >= 1]), #('transrepressed', data[(data['kla_1_lfc'] >= 1) & (data['dex_over_kla_1_lfc'] <= -.58)]), #('up_in_dex_down_in_kla_dex', data[(data['dex_1_lfc'] >= 1) & (data['kla_dex_1_lfc'] - data['dex_1_lfc'] <= -.58)]), ): curr_path = yzer.get_and_create_path( motif_dirpath, 'redistributed_pairs', 'high_quality_pairs_vs_kla_bg', super_name, name) # Group them after selecting those that we want dataset = dataset.groupby(['id', 'chr_name'], as_index=False).mean() bg = yzer.get_filename(motif_dirpath, 'peak_motifs_by_transcript_lfc', 'p65_kla', 'all', 'all', 'all', 'all_regions_for_homer.txt') yzer.run_homer(dataset, name, curr_path, center=True, reverse=False,