data = grapher.import_file(filename) run_ids = set_up_sequencing_run_ids() dmso, kla, kla_dex, all_dmso, all_kla, all_kla_dex = get_sequencing_run_id_sets( ) total_tags = total_tags_per_run() # Norm sum scalars listed for all, group 1, group 2, group 3, group 4 kla_scalars = [1.223906, 1.281572, 1.118363, 1.104860, 1.503260] kla_dex_scalars = [1.182574, 1.147695, 1.248636, 1.069588, 1.388871] dex_over_kla_scalars = [1.069073, 0.967659, 1.122628, 1.008758, 0.927466] for i, scalar in enumerate(kla_scalars): data = grapher.normalize(data, 'kla_{0}tag_count'.format(get_rep_string(i)), scalar) for i, scalar in enumerate(kla_dex_scalars): data = grapher.normalize( data, 'kla_dex_{0}tag_count'.format(get_rep_string(i)), scalar) for i, scalar in enumerate(dex_over_kla_scalars): data = grapher.normalize(data, 'kla_dex_{0}tag_count'.format( get_rep_string(i)), scalar, suffix='_norm_2') refseq = data[data['has_refseq'] != 0] refseq = refseq[refseq['transcript_score'] >= 10] scatter_dirpath = grapher.get_filename(dirpath, 'scatterplots')
Created on Mar 23, 2012 @author: karmel ''' from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher import os if __name__ == '__main__': grapher = SeqGrapher() dirpath = '/Users/karmel/Desktop/Projects/GlassLab/Notes_and_Reports/NOD_BALBc/BMDCs/Analysis/' filename = os.path.join(dirpath, 'balbc_nod_vectors.txt') data = grapher.import_file(filename) # vs balbc counterpart data = grapher.normalize(data, 'nod_notx_0h_tag_count', 2.790489) data = grapher.normalize(data, 'diabetic_nod_notx_0h_tag_count', 1.083990) data = grapher.normalize(data, 'slow_diabetic_nod_notx_0h_tag_count', 0.349747) # Vs nod notx data = grapher.normalize(data, 'diabetic_nod_notx_0h_tag_count', 0.483232, suffix='_norm_2') data = grapher.normalize(data, 'slow_diabetic_nod_notx_0h_tag_count', 0.276080, suffix='_norm_2') refseq = grapher.get_refseq(data)
yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/CD4TCells_Finland_2012/Analysis_2013_02' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'with_me3', 'basic_scatterplots') data = yzer.import_file( yzer.get_filename(dirpath, 'transcript_vectors.txt')) data = data.fillna(0) data = data[data['naive_me3_tag_count'] + data['act_me3_tag_count'] > 0] for key1, key2, norm_factor in comparison_sets: name1 = pretty_names[key1[:-1]] + key1[-1:] name2 = pretty_names[key2[:-1]] + key2[-1:] data_normed = yzer.normalize(data, key2 + '_tag_count', norm_factor) ax = yzer.scatterplot( data_normed, key1 + '_tag_count', key2 + '_tag_count_norm', log=True, color='blue', title='{0} versus {1} Normalized Tag Counts'.format(name1, name2), xlabel='{0} tags in RefSeq transcripts'.format(name1), ylabel='{0} tags in RefSeq transcripts, normalized'.format(name2), add_noise=False, show_2x_range=True, show_legend=False, plot_regression=False, show_count=True, show_correlation=True,
from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher from glasslab.utils.functions import nonzero if __name__ == '__main__': yzer = SeqGrapher() yzer.fig_size = 15 yzer.disable_show_plot = True dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/NOD_BALBc/ThioMacs/Analysis_2013_02/' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'refseq_expression') data = yzer.import_file(yzer.get_filename(dirpath, 'transcript_vectors.txt')) data = data.fillna(0) data = yzer.normalize(data, 'nod_notx_1h_tag_count', 1.095436) data = yzer.normalize(data, 'nod_kla_1h_tag_count', 0.652898) #data = yzer.normalize(data, 'nonplated_diabetic_nod_notx_tag_count', 0.885427) #data = yzer.normalize(data, 'nonplated_diabetic_balb_notx_tag_count', 0.645579) data['balb_notx_1h_reads_per_base'] = data['balb_notx_1h_tag_count']/data['length'] data['balb_kla_1h_reads_per_base'] = data['balb_kla_1h_tag_count']/data['length'] data['balb_notx_1h_tag_count'] = nonzero(data['balb_notx_1h_tag_count']) data['nod_notx_1h_tag_count_norm'] = nonzero(data['nod_notx_1h_tag_count_norm']) data['balb_kla_1h_tag_count'] = nonzero(data['balb_kla_1h_tag_count']) data['nod_kla_1h_tag_count_norm'] = nonzero(data['nod_kla_1h_tag_count_norm']) data = data[data['transcript_score'] >= 4] data = data[data[['balb_notx_1h_tag_count','nod_notx_1h_tag_count_norm', 'balb_kla_1h_tag_count','nod_kla_1h_tag_count_norm']].max(axis=1) >= 10]
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher import os from scipy.stats.stats import ttest_ind if __name__ == '__main__': grapher = SeqGrapher() dirpath = '/Volumes/karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Inbred strains/Peak comparisons/Compared with NOD/' filename = os.path.join(dirpath, 'bl6_gt_balb_with_nod_pu_1_unique.txt') data = grapher.import_file(filename) data = data.fillna(0) # For easy log graphing wt_peaks, balb_peaks, nod_peaks, balb2_peaks = 67074, 79353, 107716, 94199 data = grapher.normalize(data, 'balb_pu_1_tag_count', 1) #balb_peaks/wt_peaks) data = grapher.normalize(data, 'nod_pu_1_tag_count', 1) #nod_peaks/wt_peaks) data = grapher.normalize(data, 'balb2_pu_1_tag_count', 1) #balb2_peaks/wt_peaks) for i, row in data[data['wt_pu_1_tag_count'] < 20].iterrows(): data['wt_pu_1_tag_count'][i] = 0 for i, row in data[data['balb_pu_1_tag_count_norm'] < 20].iterrows(): data['balb_pu_1_tag_count_norm'][i] = 0 for i, row in data[data['nod_pu_1_tag_count_norm'] < 20].iterrows(): data['nod_pu_1_tag_count_norm'][i] = 0 data[ 'wt_to_balb'] = data['wt_pu_1_tag_count'] / data['balb_pu_1_tag_count'] data['nod_to_balb'] = data['nod_pu_1_tag_count'] / data[
if __name__ == '__main__': grapher = SeqGrapher() dirpath = grapher.get_path( '/karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Inbred strains/Groseq comparisons/' ) filename = os.path.join(dirpath, 'groseq_with_h3k4me2.txt') data = grapher.import_file(filename) data = data.fillna(0) nod_norm, balb_norm = 0.393529, 0.359488 nod_to_balb_norm = 1.102844 data = grapher.normalize(data, 'balb_tag_count', nod_norm) data = grapher.normalize(data, 'nod_tag_count', balb_norm) print len(data) data['nod_with_bl6'] = data['nod_sv_id'] <= .1 nod_with_bl6 = data[data['nod_with_bl6'] == True] nod_with_balb = data[data['nod_with_bl6'] == False] nod_with_bl6 = grapher.collapse_strands(nod_with_bl6) nod_with_balb = grapher.collapse_strands(nod_with_balb) nod_with_bl6 = nod_with_bl6[nod_with_bl6['wt_peak_tag_count'] > 2 * nod_with_bl6['balb_peak_tag_count']] nod_with_balb = nod_with_balb[nod_with_balb['wt_peak_tag_count'] > 2 * nod_with_balb['balb_peak_tag_count']]