if True: for i, gp in enumerate(groups): yzer.piechart([sum(gp['naive_foxo1_tag_count'] >= min_thresh), sum(gp['naive_foxo1_tag_count'] < min_thresh)], ['With Foxo1', 'Without Foxo1'], title='Co-occurrence with Foxo1- ' + labels[i], save_dir=save_path, show_plot=False) yzer.piechart([sum(gp['lcmv_d12_foxo1_tag_count'] >= min_thresh), sum(gp['lcmv_d12_foxo1_tag_count'] < min_thresh)], ['With Foxo1', 'Without Foxo1'], title='Co-occurrence with LCMV d12 Foxo1- ' + labels[i], save_dir=save_path, show_plot=False) yzer.histogram(gp['naive_foxo1_tag_count'].tolist(), bins=20, title='Foxo1 peak tag count distribution- ' + labels[i], xlabel='Tag count in Foxo1 peak', ylabel='Number of peaks', save_dir=save_path, show_plot=False) if True: yzer.boxplot([gp[gp['naive_foxo1_tag_count'] > 0]['naive_foxo1_tag_count'] for gp in groups], labels, title='ATAC-seq regions with Foxo1 peaks by group', ylabel='Foxo1 peak tag count', save_dir=save_path, show_plot=False) # TCF1 if True: for i, gp in enumerate(groups): yzer.piechart([sum(gp['tcf1_tag_count'] >= min_thresh),
shuffled = data['domain_id'].values.copy() shuffle(shuffled) data['shuffled_domain_id'] = shuffled grouped = data.groupby(by='domain_id', as_index=False).mean() shuffled_grouped = data.groupby(by='shuffled_domain_id', as_index=False).mean() grouped = grouped[grouped['domain_id'] != 0] shuffled_grouped = shuffled_grouped[ shuffled_grouped['shuffled_domain_id'] != 0] ax = yzer.histogram(grouped[dex_kla_key], bins=50, label='Replicate {0} Data'.format(rep), show_legend=False, show_plot=False) ax = yzer.histogram( shuffled_grouped[dex_kla_key], bins=50, title='Dex+KLA over KLA LFC {0} by HiC Domain'.format(rep), xlabel='Mean Dex+KLA log fold change for transcripts up in KLA', ylabel='Count of Domains', color='black', fill=False, label='Shuffled Data'.format(rep), show_legend=True, save_dir=img_dirpath, show_plot=False,
grouped = grouped[grouped['up_in_kla'] >= 1] shuffled_grouped = shuffled_grouped[shuffled_grouped['up_in_kla'] >= 1] grouped['kla_ratio'] = grouped['up_in_kla'] / grouped['count'] grouped = grouped.sort(['kla_ratio']).reset_index(drop=True) grouped['idx'] = grouped.index shuffled_grouped['kla_ratio'] = shuffled_grouped[ 'up_in_kla'] / shuffled_grouped['count'] shuffled_grouped = shuffled_grouped.sort(['kla_ratio' ]).reset_index(drop=True) shuffled_grouped['idx'] = shuffled_grouped.index ax = yzer.histogram(grouped['kla_ratio'], bins=50, label='Replicate {0} Data'.format(rep), show_legend=False, show_plot=False) ax = yzer.histogram( shuffled_grouped['kla_ratio'], bins=50, title='Percent of transcripts up in KLA {0} by HiC Domain'.format( rep), ylabel='Count of domains', xlabel='Percent of transcripts up in KLA', color='black', fill=False, label='Shuffled Data'.format(rep), show_legend=True, save_dir=img_dirpath,
print 'Percentage: ', len(subset) / len(refseq) print 'mRNA: ', sum(subset['type'] == 'mRNA'), sum( subset['type'] == 'mRNA') / len(subset) print 'rRNA: ', sum(subset['type'] != 'mRNA'), sum( subset['type'] != 'mRNA') / len(subset) # Note that some transcripts encompass two very close refseq genes; we're filtering those out for now. mrna_with_runoff = refseq_with_runoff[ (refseq_with_runoff['type'] == 'mRNA') & (refseq_with_runoff['refseq'] == 't') & (refseq_with_runoff['percent_covered'] < 1.5)] mrna_no_runoff = refseq_no_runoff[ (refseq_no_runoff['type'] == 'mRNA') & (refseq_no_runoff['refseq'] == 't') & (refseq_no_runoff['percent_covered'] < 1.5)] if False: yzer.histogram(mrna_with_runoff['percent_covered']) yzer.histogram(mrna_no_runoff['percent_covered']) relationships = ['is contained by', 'contains', 'overlaps with'] with_runoff_counts = [ sum(mrna_with_runoff['relationship'] == rel) for rel in relationships ] no_runoff_counts = [ sum(mrna_no_runoff['relationship'] == rel) for rel in relationships ] yzer.piechart(with_runoff_counts, labels=relationships) yzer.piechart(no_runoff_counts, labels=relationships) if True:
dirpath, 'Figures', 'me2_atac_overlaps') yzer.piechart([len(atac_only), len(atac_me2)], ['ATAC only', 'ATAC with H3K4me2'], title='ATAC-seq region overlaps', save_dir=save_path) yzer.piechart([len(me2_only), len(me2_atac)], ['H3K4me2 only', 'H3K4me2 with ATAC'], title='H3K4me2 overlaps', save_dir=save_path) yzer.boxplot([atac_only['tag_count'], atac_me2['tag_count']], ['ATAC only', 'ATAC with H3K4me2'], title='ATAC-seq tag counts by H3K4me2 overlap', xlabel='Group', ylabel='Peak tag count', save_dir=save_path) yzer.boxplot([me2_only['tag_count'], me2_atac['tag_count']], ['H3K4me2 only', 'H3K4me2 with ATAC'], title='H3K4me2 tag counts by ATAC-seq overlap', xlabel='Group', ylabel='Peak tag count', save_dir=save_path) yzer.histogram(atac_only['tag_count'].tolist(), bins=20, title='ATAC-seq-only peak tag count distribution', xlabel='Tag count in peak', ylabel='Number of peaks', save_dir=save_path) yzer.histogram(me2_only['tag_count'].tolist(), bins=20, title='H3K4me2-only peak tag count distribution', xlabel='Tag count in peak', ylabel='Number of peaks', save_dir=save_path)