from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/NAR_review_data/' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'hg19_mcf7_pie_charts') yzer.legend_location = 'lower left' pie1 = '''Annotated by RefSeq and/or ncRNA.org 14,022 Unannotated 67,046''' pie1 = [row.split(' ') for row in pie1.split('\n')] pie1 = zip(*pie1) yzer.piechart(map(lambda s: int(s.replace(',', '')), pie1[1]), pie1[0], title='Hah et al MCF-7 Transcripts\nwith Score >= 1', save_dir=img_dirpath, show_plot=True) pie2 = '''Promoter-associated RNA 7,055 Antisense of RefSeq 7,539 Other RefSeq Proximal 13,664 Distal with H3K4me2 2,352 Distal w/in 2kbp of H3K4me2 5,524 Distal remainder with LINE 16,292 Remainder 14,620''' pie2 = [row.split(' ') for row in pie2.split('\n')] pie2 = zip(*pie2) yzer.legend_columns = 2 yzer.piechart( map(lambda s: int(s.replace(',', '')), pie2[1]),
] in_dex_no_p65 = dataset[(dataset['gr_dex_tag_count'] > min_tags) & (dataset['p65_kla_tag_count'] + dataset['p65_kla_dex_tag_count'] <= min_tags) ] kla_only_no_p65 = dataset[(dataset['gr_dex_tag_count'] <= min_tags) & (dataset['gr_kla_dex_tag_count'] > min_tags) & (dataset['p65_kla_tag_count'] + dataset['p65_kla_dex_tag_count'] <= min_tags) ] sets = [tethered, direct_comp_gr, indirect_comp_gr, direct_comp_p65, cobound, direct_novel, indirect_novel, in_dex_no_p65, kla_only_no_p65] id_sets = [d['nearest_refseq_transcript_id'].unique() for d in sets] for id_set in id_sets: total_gr = total_gr - set(id_set) counts = [len(id_set) for id_set in id_sets] + [len(total_gr)] labels = ['Tethered', 'Direct competition, favor to GR', 'Indirect competition, favor to GR', 'Direct competition, favor to p65', 'Directly co-bound without loss', 'Directly bound novel p65 site', 'Indirectly bound novel p65 site', 'Has GR in Dex, no p65', 'Has GR in KLA+Dex only, no p65', 'Other with GR'] if draw_pies: yzer.piechart(counts, labels, title='Genes near Enhancer-like Subsets {0} with GR\nby Putative Enhancer Mechanism'.format(name.title()), small_legend=True, save_dir=img_dirpath, show_plot=True)
yzer.boxplot([gp['naive_foxo1_tag_count'] for gp in groups], labels, title='Foxo1 tags in ATAC-seq regions by group', ylabel='Foxo1 peak tag count', save_dir=save_path, show_plot=False) yzer.boxplot([gp['lcmv_d12_foxo1_tag_count'] for gp in groups], labels, title='LCMV d12 Foxo1 tags in ATAC-seq regions by group', ylabel='Foxo1 peak tag count', save_dir=save_path, show_plot=False) if True: for i, gp in enumerate(groups): yzer.piechart([sum(gp['naive_foxo1_tag_count'] >= min_thresh), sum(gp['naive_foxo1_tag_count'] < min_thresh)], ['With Foxo1', 'Without Foxo1'], title='Co-occurrence with Foxo1- ' + labels[i], save_dir=save_path, show_plot=False) yzer.piechart([sum(gp['lcmv_d12_foxo1_tag_count'] >= min_thresh), sum(gp['lcmv_d12_foxo1_tag_count'] < min_thresh)], ['With Foxo1', 'Without Foxo1'], title='Co-occurrence with LCMV d12 Foxo1- ' + labels[i], save_dir=save_path, show_plot=False) yzer.histogram(gp['naive_foxo1_tag_count'].tolist(), bins=20, title='Foxo1 peak tag count distribution- ' + labels[i], xlabel='Tag count in Foxo1 peak', ylabel='Number of peaks', save_dir=save_path, show_plot=False)
if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/CD4TCells/Oshea_enhancers/ctcf_stat1_overlap' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'figures') data = yzer.import_file( yzer.get_filename(dirpath, 'ctcf_with_stat1_binding.txt')).fillna(0) with_stat1 = data[data['p2_tag_count'] > 0] without_stat1 = data[data['p2_tag_count'] == 0] if True: ax = yzer.piechart( [len(with_stat1), len(without_stat1)], ['CTCF sites with STAT1', 'CTCF sites without STAT1'], title='DP Thymocyte CTCF Sites with STAT1 in Th1 Cells', save_dir=img_dirpath, show_plot=True) data['tag_count_nonzero'] = nonzero(data['tag_count']) data['p2_tag_count_nonzero'] = nonzero(data['p2_tag_count']) ax = yzer.scatterplot( data, 'tag_count_nonzero', 'p2_tag_count_nonzero', xlabel='CTCF Tag Count', ylabel='Stat1 Tag Count', log=True, color='blue', title='Tags in CTCF Peaks versus Overlapping Stat1 Peaks', show_2x_range=False, show_legend=False,
& (refseq_no_runoff['refseq'] == 't') & (refseq_no_runoff['percent_covered'] < 1.5)] if False: yzer.histogram(mrna_with_runoff['percent_covered']) yzer.histogram(mrna_no_runoff['percent_covered']) relationships = ['is contained by', 'contains', 'overlaps with'] with_runoff_counts = [ sum(mrna_with_runoff['relationship'] == rel) for rel in relationships ] no_runoff_counts = [ sum(mrna_no_runoff['relationship'] == rel) for rel in relationships ] yzer.piechart(with_runoff_counts, labels=relationships) yzer.piechart(no_runoff_counts, labels=relationships) if True: # Filter down to high-expression genes def distance_to_reg_end(row): if row['strand'] == 0: # RefSeq annotated end - transcript end; pos if Refseq is longer distance = row['transcription_end(2)'] - row[ 'transcription_end'] elif row['strand'] == 1: # transcript start - RefSeq annotated start; pos if Refseq is longer distance = row['transcription_start'] - row[ 'transcription_start(2)'] return distance
transrepressed = data[(data['kla_1_lfc_trans'] >= 1) & (data['dex_over_kla_1_lfc_trans'] <= -.58)] not_trans = data[(data['kla_1_lfc_trans'] < 1) | (data['dex_over_kla_1_lfc_trans'] > -.58)] up_in_kla = data[(data['kla_1_lfc_trans'] >= 1) & (data['dex_over_kla_1_lfc_trans'] > -.58)] supersets = (('All', data), ('Not near transrepressed genes', not_trans), ('Up in KLA', up_in_kla), ('Near transrepressed genes', transrepressed)) # Plot trans versus not if draw_pies: yzer.piechart([len(d) for d in zip(*supersets[1:])[1]], zip(*supersets[1:])[0], title='Enhancer-like Subsets by state in KLA+Dex', save_dir=img_dirpath, show_plot=False) tfs = [('PU.1', 'pu_1'), ('p65', 'p65'), ('GR', 'gr')] contexts = [('DMSO', ''), ('Dex', 'dex'), ('KLA', 'kla'), ('KLA+Dex', 'kla_dex')] for name, dataset in supersets: total_for_set = len(dataset) # Have GR dataset = dataset[dataset['gr_dex_tag_count'] + dataset['gr_kla_dex_tag_count'] > min_tags] total_gr = len(dataset) if draw_pies:
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/Demo-data' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'figure_4_pie_charts') yzer.legend_location = 'lower left' pie1 = '''Annotated by RefSeq and/or ncRNA.org 16,945 Unannotated 36,578''' pie1 = [row.split(' ') for row in pie1.split('\n')] pie1 = zip(*pie1) yzer.piechart(map(lambda s: int(s.replace(',', '')), pie1[1]), pie1[0], title='Transcripts with Score >= 2', save_dir=img_dirpath, show_plot=True) pie2 = '''Promoter-associated RNA 6,314 Antisense of RefSeq 5,604 Post-TTS, same-strand 6,940 Other RefSeq Proximal 3,119 Distal with H3K4me1 7,458 Distal within 2kbp of H3K4me1 1,639 Remainder 5,504''' pie2 = [row.split(' ') for row in pie2.split('\n')] pie2 = zip(*pie2) yzer.legend_columns = 2 yzer.piechart(map(lambda s: int(s.replace(',', '')), pie2[1]), pie2[0],
'''.format(total, acetylated, (acetylated / total) * 100, foxp3, (foxp3 / total) * 100, both, (both / total) * 100, k=k) print summary # Draw pie for each group, showing % with foxp3, % with ac, and % with both relevant_cells = ', '.join([s.title() for s in k.split('_')]) counts = [acetylated - both, both, foxp3 - both, none] grapher.piechart( counts=counts, labels=[ 'Has H3K27Ac in Tregs', 'Has Both', 'Has FoxP3 in Tregs', 'Has Neither' ], title='FoxP3 and H3K27Ac at Enhancers\nwith H3K4me2 in {}'. format(relevant_cells), small_legend=False, colors=['#FFFB97', '#D5F0CB', '#ABE4FF', 'white'], save_dir=graph_dirpath, show_plot=False) if True: data['with_foxp3'] = data['treg'][ data['treg']['foxp3_tag_count'] >= min_score] data['without_foxp3'] = data['treg'][ data['treg']['foxp3_tag_count'] < min_score] for k in ('with_foxp3', ): first_peak = 'treg' subset = data[k]
for celltype in ('hi', 'lo'): d7 = datasets['klrg{}_d7'.format(celltype)] de_novo = d7[d7['d0_tag_count'] < min_thresh] all_shared = d7[ 'foxo1_ko_klrg{}_d7_tag_count'.format(celltype)] >= min_thresh all_not_shared = d7[ 'foxo1_ko_klrg{}_d7_tag_count'.format(celltype)] < min_thresh shared = de_novo[ 'foxo1_ko_klrg{}_d7_tag_count'.format(celltype)] >= min_thresh not_shared = de_novo[ 'foxo1_ko_klrg{}_d7_tag_count'.format(celltype)] < min_thresh labels = ['Also in Foxo1 KO', 'Not in Foxo1 KO'] yzer.piechart([sum(all_shared), sum(all_not_shared)], labels, title='WT KLRG{} d7 Enhancers'.format(celltype), save_dir=save_path, show_plot=False) yzer.piechart([sum(shared), sum(not_shared)], labels, title='WT KLRG{} d7 De Novo Enhancers'.format(celltype), save_dir=save_path, show_plot=False) yzer.boxplot([d7[all_shared]['tag_count'].tolist(), d7[all_not_shared]['tag_count'].tolist()], labels, title='ATAC-seq tags in WT KLRG{} d7 Enhancers'.format( celltype), ylabel='ATAC peak tag count', save_dir=save_path, show_plot=False) yzer.boxplot([de_novo[shared]['tag_count'].tolist(), de_novo[not_shared]['tag_count'].tolist()],
data = data.merge(transcripts, how='left', on='nearest_refseq_transcript_id', suffixes=['','_trans']) data = data.fillna(0) transrepressed = data[(data['kla_1_lfc_trans'] >= 1) & (data['dex_over_kla_1_lfc_trans'] <= -.58)] not_trans = data[(data['kla_1_lfc_trans'] < 1) | (data['dex_over_kla_1_lfc_trans'] > -.58)] up_in_kla = data[(data['kla_1_lfc_trans'] >= 1) & (data['dex_over_kla_1_lfc_trans'] > -.58)] supersets = (('All', data), ('Not near transrepressed genes', not_trans), ('Up in KLA', up_in_kla), ('Near transrepressed genes',transrepressed)) # Plot trans versus not yzer.piechart([len(d) for d in zip(*supersets[1:])[1]], zip(*supersets[1:])[0], title='Enhancer-like Subsets by state in KLA+Dex', save_dir=img_dirpath, show_plot=False) tfs = [('PU.1','pu_1'),('p65','p65'),('GR','gr')] contexts = [('DMSO',''),('Dex','dex'),('KLA','kla'),('KLA+Dex','kla_dex')] for name, dataset in supersets: total_for_set = len(dataset) for tf_name, tf in tfs: # Get count for enhancer elements with this TF at all cols = ['{0}_{1}tag_count'.format(tf, c and (c+'_') or '') for _, c in contexts] with_tf = dataset[dataset.filter(items=cols).max(axis=1) > min_tags] without_tf = dataset[dataset.filter(items=cols).max(axis=1) <= min_tags] # Plot with TF versus not yzer.piechart([ len(without_tf), len(with_tf)],
transrepressed = data[(data['kla_1_lfc_trans'] >= 1) & (data['dex_over_kla_1_lfc_trans'] <= -.58)] not_trans = data[(data['kla_1_lfc_trans'] < 1) | (data['dex_over_kla_1_lfc_trans'] > -.58)] up_in_kla = data[(data['kla_1_lfc_trans'] >= 1) & (data['dex_over_kla_1_lfc_trans'] > -.58)] supersets = (('All', data), ('Not near transrepressed genes', not_trans), ('Up in KLA', up_in_kla), ('Near transrepressed genes', transrepressed)) # Plot trans versus not yzer.piechart([len(d) for d in zip(*supersets[1:])[1]], zip(*supersets[1:])[0], title='Enhancer-like Subsets by state in KLA+Dex', save_dir=img_dirpath, show_plot=False) for name, dataset in supersets: total_for_set = len(dataset) dataset = dataset[dataset['gr_kla_dex_tag_count'] > min_tags] # Get count for enhancer elements with/out CpG with_cpg = dataset[dataset['has_cpg_enh'] == 1] without_cpg = dataset[dataset['has_cpg_enh'] == 0] # Plot with TF versus not yzer.piechart( [len(without_cpg), len(with_cpg)], ['No CpG Island', 'Has CpG Island'], title='Enhancer-like Subsets {0}\nby Overlap with CpG Island'.
# can be subsumed by a single H3K4me2 peak atac_only = atac[(atac['naive_h3k4me2_tag_count'] < me2_thresh)] atac_me2 = atac[(atac['naive_h3k4me2_tag_count'] >= me2_thresh)] me2_only = me2[(me2['naive_atac_tag_count'] < atac_thresh)] me2_atac = me2[(me2['naive_atac_tag_count'] >= atac_thresh)] print('ATAC only: ', len(atac_only)) print('ATAC with H3K4me2: ', len(atac_me2)) print('H3K4me2 only: ', len(me2_only)) print('H3K4me2 with ATAC: ', len(me2_atac)) save_path = yzer.get_and_create_path( dirpath, 'Figures', 'me2_atac_overlaps') yzer.piechart([len(atac_only), len(atac_me2)], ['ATAC only', 'ATAC with H3K4me2'], title='ATAC-seq region overlaps', save_dir=save_path) yzer.piechart([len(me2_only), len(me2_atac)], ['H3K4me2 only', 'H3K4me2 with ATAC'], title='H3K4me2 overlaps', save_dir=save_path) yzer.boxplot([atac_only['tag_count'], atac_me2['tag_count']], ['ATAC only', 'ATAC with H3K4me2'], title='ATAC-seq tag counts by H3K4me2 overlap', xlabel='Group', ylabel='Peak tag count', save_dir=save_path) yzer.boxplot([me2_only['tag_count'], me2_atac['tag_count']], ['H3K4me2 only', 'H3K4me2 with ATAC'], title='H3K4me2 tag counts by ATAC-seq overlap',