for k, count in stats.iteritems(): print '{}\t{}\t{:.2f}\t{:.2f}'.format( k, count, count / stats['all'] * 100, count / stats['enhancers'] * 100, ) subsets = {} if True: subsets['with_me2'] = data[(data['me2_id'] > 0)] subsets['with_ac'] = data[(data['ac_id'] > 0)] subsets['inactive'] = data[(data['ac_id'] == 0) & (data['me2_id'] == 0)] for k, subset in subsets.iteritems(): first_peak = 'foxp3' subset['id'] = subset[first_peak + '_id'] subset['start'] = subset[first_peak + '_start'] subset['end'] = subset[first_peak + '_end'] yzer.run_homer(subset, first_peak + '_enh_' + k, motif_dirpath, cpus=6, center=True, reverse=False, preceding=False, size=200, length=[8, 10, 12, 15])
yzer = MotifAnalyzer() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/' +\ 'Miscellaneous_Collaborations/Rodrigo_CD8s_2014_09/Enhancers' dirpath = yzer.get_path(dirpath) for cond, seq, breed in SAMPLES: sample_prefix = sample_name(cond, seq, breed) sample_dirpath = yzer.get_filename(dirpath, sample_prefix) filename = yzer.get_filename(sample_dirpath, sample_prefix + '_enhancers.txt') data = yzer.import_file(filename) data = data.fillna(0) if True: min_thresh = get_threshold(seq) subdata = data[data['tag_count'] >= min_thresh] yzer.run_homer(subdata, 'all', sample_dirpath, cpus=10, center=True, reverse=False, preceding=False, size=200, length=[8, 10, 12], mock=True)
for peptide in ('K99A', 'NoPep', 'PCC'): pep_dirpath = yzer.get_filename(dirpath, '{}_{}'.format(peptide, ab)) if False: filename = yzer.get_filename( pep_dirpath, '{}_{}_enhancers.txt'.format(peptide, ab)) data = yzer.import_file(filename) data = data.fillna(0) yzer.run_homer(data, 'all', pep_dirpath, cpus=6, center=True, reverse=False, preceding=False, size=200, length=[8, 10, 12, 15], mock=True) yzer.run_homer(data[data['tag_count'] >= 10], 'tag_thresh_10', pep_dirpath, cpus=6, center=True, reverse=False, preceding=False, size=200, length=[8, 10, 12, 15], mock=True)
'{}_{}'.format(condition, ab)) if False: filename = yzer.get_filename( cond_dirpath, '{}_{}_enhancers.txt'.format(condition, ab)) data = yzer.import_file(filename) data = data.fillna(0) min_thresh = 20 data = data[data['tag_count'] > min_thresh] yzer.run_homer(data, 'filtered_{}'.format(min_thresh), cond_dirpath, cpus=6, center=True, reverse=False, preceding=False, size=200, length=[8, 10, 12, 15], mock=True) if True: filename = yzer.get_filename( cond_dirpath, '{}_{}_enhancers.txt'.format(condition, ab)) data = yzer.import_file(filename) data = data.fillna(0) min_thresh = 20 subdata = data[data['tag_count'] > min_thresh] subdata = subdata[subdata['tag_count(2)'] <= min_thresh]
filename = yzer.get_filename(sample_dirpath, sample_prefix + '_enhancers.txt') data = yzer.import_file(filename) data = data.fillna(0) min_thresh = get_threshold(seq) data = data[data['tag_count'] >= min_thresh] fold = 2 if True: # ATAC peaks that are absent in the FOXO1 competent ko_new = data[ data['naive_atac_tag_count'] < min_thresh] yzer.run_homer(ko_new, 'ko_new', sample_dirpath, cpus=10, center=True, reverse=False, preceding=False, size=200, length=[8, 10, 12], mock=True) print('ko_new', len(ko_new)) # ATAC peaks exist, but are less than half as big ko_grows = data[ (data['naive_atac_tag_count'] >= min_thresh) & (data['naive_atac_tag_count'] * fold < data['tag_count'])] yzer.run_homer(ko_grows, 'ko_grows', sample_dirpath, cpus=10, center=True, reverse=False, preceding=False, size=200, length=[8, 10, 12], mock=True) print('ko_grows', len(ko_grows)) # The sum of new and bigger ko_dependent = data[
) dirpath = yzer.get_and_create_path(base_dirpath, 'motifs/') filename = yzer.get_filename(base_dirpath, 'transcript_vectors.txt') data = yzer.import_file(filename) data = data.fillna(0) # Promoters if False: refseq = data[data['has_refseq'] == 1] refseq = refseq[refseq['transcript_score'] >= 4] if True: yzer.run_homer(refseq, 'refseq_promoter', dirpath, cpus=6, center=False, reverse=False, preceding=True, size=400, length=[8, 10, 12, 15]) bg = yzer.get_filename( dirpath, 'refseq_promoter/refseq_promoter_regions_for_homer.txt') subset = refseq[refseq['balb_nod_notx_1h_fc'] <= -1] yzer.run_homer(subset, 'promoter_overlap_notx_1h_nod_down', dirpath, cpus=6, center=False, reverse=False,
pep_dirpath = yzer.get_filename(dirpath, '{}_{}'.format(peptide, ab)) if True: filename = yzer.get_filename( pep_dirpath, '{}_{}_enhancers.txt'.format(peptide, ab)) data = yzer.import_file(filename) data = data.fillna(0) if True: yzer.run_homer(data, 'all', pep_dirpath, cpus=8, center=True, reverse=False, preceding=False, size=200, length=[8, 10, 12], mock=True) if True: subset = data[(data['id(2)'] == 0) & (data['id(3)'] == 0)] yzer.run_homer(subset, 'only', pep_dirpath, cpus=8, center=True, reverse=False, preceding=False, size=200,
# Get venn-diagram sets for foxp3/me2 only_treg = treg[treg['naive_id'] == 0] only_naive = naive[naive['treg_id'] == 0] shared = treg[treg['naive_id'] > 0] print len(only_treg), len(only_naive), len(shared) datasets = [treg, naive, only_treg, only_naive, shared] main_peak = ['treg', 'naive', 'treg', 'naive', 'treg'] names = [ x.format(antibody) for x in ('all_treg_{0}_enhancers', 'all_naive_{0}_enhancers', 'only_treg_{0}_enhancers', 'only_naive_{0}_enhancers', 'shared_{0}_enhancers') ] for i, subset in enumerate(datasets): if i > 1: continue subset['id'] = subset['{0}_id'.format(main_peak[i])] subset['start'] = subset['{0}_start'.format(main_peak[i])] subset['end'] = subset['{0}_end'.format(main_peak[i])] yzer.run_homer(subset, names[i], motif_dirpath, cpus=6, center=True, reverse=False, preceding=False, size=200, length=[8, 10, 12, 15])
data = yzer.import_file(filename) data = data.fillna(0) min_thresh = 20 if False: subdata = data[data['tag_count'] > min_thresh] subdata = subdata[subdata['tag_count(2)'] > min_thresh] subdata = subdata[subdata['tag_count(3)'] <= 0] subdata = subdata[subdata['tag_count(4)'] <= 0] subdir = 'treg_shared_' + ab yzer.run_homer(subdata, subdir, motif_dirpath, cpus=6, center=True, reverse=False, preceding=False, size=200, length=[8, 10, 12, 15], mock=True) output_file = yzer.get_filename(motif_dirpath, subdir, subdir + '_enhancers.txt') subdata.to_csv(output_file, header=True, index=False, sep='\t') subdata = data[data['tag_count'] > min_thresh] subdata = subdata[subdata['tag_count(2)'] <= 0] subdata = subdata[subdata['tag_count(3)'] <= 0] subdata = subdata[subdata['tag_count(4)'] <= 0] subdir = 'ntreg_only_' + ab yzer.run_homer(subdata, subdir,
'{}_{}'.format(condition, ab)) if False: filename = yzer.get_filename(cond_dirpath, '{}_{}_enhancers.txt'.format(condition, ab)) data = yzer.import_file(filename) data = data.fillna(0) min_thresh = 0 cutoff = 10000 data = data[data['tag_count'] > min_thresh] data = data.sort('tag_count', ascending=False)[:cutoff] yzer.run_homer(data, 'top_{}'.format(cutoff), cond_dirpath, cpus=6, center=True, reverse=False, preceding=False, size=200, length=[8, 10, 12, 15], mock=True) for condition in ('itreg','treg'): cond_dirpath = yzer.get_filename(dirpath, '{}_{}'.format(condition, ab)) if False: filename = yzer.get_filename(cond_dirpath, '{}_{}_enhancers.txt'.format(condition, ab)) data = yzer.import_file(filename) data = data.fillna(0) min_thresh = 0 cutoff = 10000 data = data[data['tag_count'] > min_thresh]
data['all'] = data['treg'][data['treg']\ [[c + '_id' for c in celltypes]].min(axis=1) > 0] # Special cases # Treg and Th1 shared and not shared, regardless of others data['treg_th1_shared'] = data['treg'][data['treg']['th1_id'] > 0] data['treg_not_th1'] = data['treg'][data['treg']['th1_id'] == 0] data['th1_not_treg'] = data['th1'][data['th1']['treg_id'] == 0] for k in sorted(data.keys()): subset = data[k] print k, len(subset) if k in celltypes or len(subset) < 1000: continue first_peak = k == 'all' and 'naive' or k.split('_')[0] subset['id'] = subset[first_peak + '_id'] subset['start'] = subset[first_peak + '_start'] subset['end'] = subset[first_peak + '_end'] if k in ('treg_th1_shared', 'treg_not_th1', 'th1_not_treg'): yzer.run_homer(subset, 'four_way_venn_' + k, motif_dirpath, cpus=6, center=True, reverse=False, preceding=False, size=200, length=[8, 10, 12, 15])
data = yzer.import_file(filename) data = data.fillna(0) min_thresh = get_threshold(seq) data = data[data['tag_count'] >= min_thresh] fold = 2 if True: # ATAC peaks that are absent in the FOXO1 KO foxo1_critical = data[ data['foxo1_ko_naive_atac_tag_count'] < min_thresh] yzer.run_homer(foxo1_critical, 'foxo1_critical', sample_dirpath, cpus=10, center=True, reverse=False, preceding=False, size=200, length=[8, 10, 12], mock=True) print('foxo1_critical', len(foxo1_critical)) # ATAC peaks that don't change with KO of Foxo1 foxo1_independent = data[ (data['tag_count'] * fold >= data['foxo1_ko_naive_atac_tag_count']) & (data['foxo1_ko_naive_atac_tag_count'] * fold >= data['tag_count'])] yzer.run_homer(foxo1_independent, 'foxo1_independent', sample_dirpath, cpus=10,
sample_prefix + '_enhancers.txt') data = yzer.import_file(filename) data = data.fillna(0) min_thresh = get_threshold(seq) data = data[data['tag_count'] >= min_thresh] fold = 2 if True: naive_only = data[data['lcmv_d12_foxo1_tag_count'] < min_thresh] yzer.run_homer(naive_only, 'naive_only', sample_dirpath, cpus=10, center=True, reverse=False, preceding=False, size=200, length=[8, 10, 12], mock=True) print('naive_only', len(naive_only)) shared = data[ (data['tag_count'] * fold >= data['lcmv_d12_foxo1_tag_count']) & (data['lcmv_d12_foxo1_tag_count'] * fold >= data['tag_count'])] yzer.run_homer(shared, 'shared', sample_dirpath, cpus=10, center=True, reverse=False,
go_path = yzer.get_and_create_path(dirpath, 'with_me3', 'go_analysis', '0_8_min_lfc') data = yzer.import_file( yzer.get_filename(dirpath, 'transcript_vectors.txt')) data = data.fillna(0) data = data[data['naive_me3_tag_count'] + data['act_me3_tag_count'] > 0] if False: curr_path = yzer.get_and_create_path(dirpath, 'with_me3', 'motif_analysis') yzer.run_homer(data, 'all_refseq_preceding', curr_path, center=False, reverse=False, preceding=True, size=200, cpus=6) yzer.run_homer(data, 'all_refseq', curr_path, center=False, reverse=False, preceding=False, size=200, cpus=6) yzer.run_homer(data, 'all_refseq', curr_path, center=True,
dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/CD4TCells/H3K4me2/Analysis' dirpath = yzer.get_path(dirpath) motif_dirpath = yzer.get_filename(dirpath, 'motifs') filename = yzer.get_filename(dirpath, 'thio_peak_vectors.txt') data = yzer.import_file(filename) data = data.fillna(0) # me2 if True: if False: yzer.run_homer(data, 'thio_all', motif_dirpath, cpus=6, center=True, reverse=False, preceding=False, size=200, length=[8, 10, 12, 15]) data = data[data['tss_id'] == 0] if True: yzer.run_homer(data, 'thio_h3k4me2_distal', motif_dirpath, cpus=6, center=True, reverse=False, preceding=False, size=200,
for condition in ('treg', 'itreg', 'activated'): cond_dirpath = yzer.get_filename(dirpath, '{}_{}'.format(condition, ab)) if True: filename = yzer.get_filename( cond_dirpath, '{}_{}_enhancers.txt'.format(condition, ab)) data = yzer.import_file(filename) data = data.fillna(0) yzer.run_homer(data, 'all', cond_dirpath, cpus=6, center=True, reverse=False, preceding=False, size=200, length=[8, 10, 12, 15], mock=True) for condition in ('itreg', 'treg'): cond_dirpath = yzer.get_filename(dirpath, '{}_{}'.format(condition, ab)) if False: filename = yzer.get_filename( cond_dirpath, '{}_{}_enhancers.txt'.format(condition, ab)) data = yzer.import_file(filename) data = data.fillna(0)
#('with_pu_1_kla_dex', data[data['tag_count_5'] >= 10]), ('no_pu_1_kla_dex', data[data['tag_count_5'] < 10]), ('gt_partner', data[data['tag_count'] > 1.2*data['tag_count_2']]), #('lt_partner', data[data['tag_count']*1.2 < data['tag_count_2']]), ('with_partner', data[data['tag_count_2'] >= 10]), ('no_partner', data[data['tag_count_2'] < 10]), #('down_in_dex', data[data['dex_1_lfc'] <= -1]), #('down_in_kla_dex', data[data['kla_dex_1_lfc'] <= -1]), #('down_in_kla', data[data['kla_1_lfc'] <= -1]), #('up_in_dex', data[data['dex_1_lfc'] >= 1]), #('up_in_kla_dex', data[data['kla_dex_1_lfc'] >= 1]), #('up_in_kla', data[data['kla_1_lfc'] >= 1]), #('transrepressed', data[(data['kla_1_lfc'] >= 1) & (data['dex_over_kla_1_lfc'] <= -.58)]), #('up_in_dex_down_in_kla_dex', data[(data['dex_1_lfc'] >= 1) & (data['kla_dex_1_lfc'] - data['dex_1_lfc'] <= -.58)]), ): # We have multiple copies of peaks if they align to different transcripts parent_path = yzer.get_and_create_path(motif_dirpath, 'peak_motifs_by_transcript_lfc', peak_type, super_name) curr_path = yzer.get_and_create_path(parent_path, name) # Group them after selecting those that we want dataset = dataset.groupby(['id','chr_name'],as_index=False).mean() if name != 'all': bg = yzer.get_filename(parent_path, 'all','all','all_regions_for_homer.txt') else: bg = None yzer.run_homer(dataset, name, curr_path, center=True, reverse=False, preceding=False, size=size, cpus=6, bg=bg)
dirpath, 'boxplots_non_refseq_by_p65', 'enhancer_like_lose_p65_{0}x_change_dsg_only.txt'.format( ratio))) enhancers['glass_transcript_id'] = enhancers['id'] # Limit to peaks and touching transcripts, then pull out peaks that intersect our enhancer set data = all_data[all_data['touches'] == 't'] data = data.merge(enhancers, how='right', on='glass_transcript_id', suffixes=['', 'trans']) curr_path = yzer.get_and_create_path(motif_dirpath, 'enhancer_like_lose_p65', 'ratio_{0}'.format(ratio)) # Group them after selecting those that we want data = data.groupby(['id', 'chr_name'], as_index=False).mean() #bg = yzer.get_filename(motif_dirpath, # 'peak_motifs_by_transcript_lfc', 'p65_kla', # 'all','all','all','all_regions_for_homer.txt') yzer.run_homer(data, 'ratio_{0}'.format(ratio), curr_path, center=False, reverse=False, preceding=False, size=size, bg=None, cpus=7)
filename = yzer.get_filename(sample_dirpath, sample_prefix + '_enhancers.txt') data = yzer.import_file(filename) data = data.fillna(0) min_thresh = get_threshold('atac') data = data[data['tag_count'] >= min_thresh] datasets[sample_prefix] = data if False: yzer.run_homer(data, 'all', sample_dirpath, cpus=10, center=True, reverse=False, preceding=False, size=200, length=[8, 10, 12], mock=True) if True: # Versus comparable sample in other breed subdata = data[ data['{}_tag_count'.format(oth_breed[1][j])] < min_thresh] yzer.run_homer(subdata, 'not_in_' + oth_breed[1][j], sample_dirpath, cpus=10, center=True, reverse=False, preceding=False, size=200, length=[8, 10, 12], mock=True) # Versus hi/lo sample and prior sample if not naive. if j > 0: if 'klrghi' in sample_prefix: other = sample_prefix.replace('hi', 'lo')