def get_log(self): mod_utils.make_dir( os.path.join(self.experiment_settings.get_rdir(), 'logs')) log = os.path.join( self.experiment_settings.get_rdir(), 'logs', '%(sample_name)s.log' % {'sample_name': self.sample_name}) return log
def write_wigs(self, suffix, subtract_background=False, subtract_control=False): mod_utils.make_dir(self.rdir_path('mutation_wigs')) mod_utils.make_dir(self.rdir_path('rt_stop_wigs')) if subtract_background or subtract_control: libs_to_write = self.get_normalizable_libs() else: libs_to_write = self.libs #will also write a file to make batch import into mochiview easier f = open( os.path.join(self.rdir_path('mutation_wigs'), 'mochi_batch_' + suffix + '.txt'), 'w') f.write('SEQUENCE_SET\tFILE_NAME\tDATA_TYPE\tNAME\n') for lib in libs_to_write: f.write('<replace>\t%s\t<replace>\t%s\n' % (lib.lib_settings.sample_name + '_' + suffix + '.wig.gz', lib.lib_settings.sample_name + '_' + suffix)) lib.write_mutation_rates_to_wig( os.path.join(self.rdir_path('mutation_wigs'), lib.lib_settings.sample_name + '_' + suffix), subtract_background=subtract_background, subtract_control=subtract_control) lib.write_rt_stops_to_wig( os.path.join(self.rdir_path('rt_stop_wigs'), lib.lib_settings.sample_name + '_' + suffix)) f.close()
def get_log(self): mod_utils.make_dir(os.path.join(self.experiment_settings.get_rdir(), 'logs')) log = os.path.join( self.experiment_settings.get_rdir(), 'logs', '%(sample_name)s.log' % {'sample_name': self.sample_name}) return log
def make_plots(self, exclude_constitutive=False): if exclude_constitutive: mod_utils.make_dir(self.rdir_path('plots', 'exclude_constitutive')) mod_utils.make_dir(self.rdir_path('plots', 'exclude_constitutive', 'functional_groups')) mod_utils.make_dir(self.rdir_path('plots', 'exclude_constitutive', 'interactive')) rdir = self.rdir_path('plots','exclude_constitutive') file_tag = '_exclude_constitutive' mod_plotting.generate_roc_curves(self.settings.get_property('tptn_file_25s'), self.settings.rRNA_seqs, os.path.join(rdir, '25S_ROC_curves'), self.get_modified_libs(), 'S.c.25S__rRNA', self.settings.get_property('affected_nucleotides')) mod_plotting.generate_roc_curves(self.settings.get_property('tptn_file_18s'), self.settings.rRNA_seqs, os.path.join(rdir, '18S_ROC_curves'), self.get_modified_libs(), 'S.c.18S_rRNA', self.settings.get_property('affected_nucleotides')) mod_plotting.plot_functional_group_changes(self.get_normalizable_libs(), os.path.join(rdir, 'functional_groups', 'group_changes'), self.settings.get_property('functional_groupings'), nucleotides_to_count=self.settings.get_property('affected_nucleotides'), exclude_constitutive=exclude_constitutive, max_fold_reduction=0.001, max_fold_increase=100) else: mod_utils.make_dir(self.rdir_path('plots')) mod_utils.make_dir(self.rdir_path('plots', 'interactive')) rdir = self.rdir_path('plots') file_tag = '' mod_plotting.plot_mutated_nts_pie(self.libs, os.path.join(rdir, 'raw_mutation_fractions'+file_tag), exclude_constitutive=exclude_constitutive) mod_plotting.plot_mutation_breakdown_pie(self.libs, os.path.join(rdir, 'raw_mutation_types'+file_tag), exclude_constitutive=exclude_constitutive) mod_plotting.plot_mutated_nts_pie(self.libs, os.path.join(rdir, 'background_sub_mutation_fractions'+file_tag), subtract_background = True, exclude_constitutive=exclude_constitutive) mod_plotting.plot_mutation_rate_cdfs(self.libs, os.path.join(rdir, 'mutation_rate_cdf'+file_tag), nucleotides_to_count=self.settings.get_property('affected_nucleotides'), exclude_constitutive=exclude_constitutive) mod_plotting.plot_mutation_rate_violins(self.libs, os.path.join(rdir, 'mutation_rate_cdf'+file_tag), nucleotides_to_count=self.settings.get_property('affected_nucleotides'), exclude_constitutive=exclude_constitutive) mod_plotting.plot_changes_vs_control(self.get_normalizable_libs(), os.path.join(rdir, 'changes'+file_tag), nucleotides_to_count=self.settings.get_property('affected_nucleotides'), exclude_constitutive=exclude_constitutive) mod_plotting.ma_plots(self.get_normalizable_libs(), os.path.join(rdir, 'MA'+file_tag), nucleotides_to_count=self.settings.get_property('affected_nucleotides'), exclude_constitutive=exclude_constitutive) if self.settings.get_property('make_interactive_plots'): # mod_plotting.plot_changes_vs_control_interactive(self.get_normalizable_libs(), os.path.join(rdir, 'interactive', 'changes'+file_tag), # nucleotides_to_count=self.settings.get_property('affected_nucleotides'), # exclude_constitutive=False) mod_plotting.ma_plots_interactive(self.get_normalizable_libs(), os.path.join(rdir, 'interactive', 'MA'+file_tag), nucleotides_to_count=self.settings.get_property('affected_nucleotides'), exclude_constitutive=False)
def generate_mapping_index(self): """ builds a STAR index from the input fasta file """ self.settings.write_to_log('building STAR index') if not self.settings.star_index_exists(): mod_utils.make_dir(self.settings.get_star_index()) subprocess.Popen( 'STAR --runThreadN %d --runMode genomeGenerate --genomeDir %s --genomeFastaFiles %s --genomeSAindexNbases 4 1>>%s 2>>%s' % (self.threads, self.settings.get_star_index(), self.settings.get_rRNA_fasta(), self.settings.get_log(), self.settings.get_log()), shell=True).wait() self.settings.write_to_log('building STAR index complete')
def write_wigs(self, suffix, subtract_background=False, subtract_control=False): mod_utils.make_dir(self.rdir_path('wigs')) if subtract_background or subtract_control: libs_to_write = self.get_normalizable_libs() else: libs_to_write = self.libs #will also write a file to make batch import into mochiview easier f = open(os.path.join(self.rdir_path('wigs'), 'mochi_batch_'+suffix+'.txt'), 'w') f.write('SEQUENCE_SET\tFILE_NAME\tDATA_TYPE\tNAME\n') for lib in libs_to_write: f.write('<replace>\t%s\t<replace>\t%s\n' % (lib.lib_settings.sample_name+'_'+suffix+'.wig.gz', lib.lib_settings.sample_name+'_'+suffix)) lib.write_mutation_rates_to_wig(os.path.join(self.rdir_path('wigs'), lib.lib_settings.sample_name+'_'+suffix), subtract_background=subtract_background, subtract_control=subtract_control) f.close()
def annotate_structures(self, exclude_constitutive=False): if exclude_constitutive: mod_utils.make_dir( self.rdir_path('structures', 'protections_highlighted', 'exclude_constitutive')) mod_utils.make_dir( self.rdir_path('structures', 'colored_by_change', 'exclude_constitutive')) file_tag = '_exclude_constitutive' else: mod_utils.make_dir( self.rdir_path('structures', 'protections_highlighted')) mod_utils.make_dir( self.rdir_path('structures', 'colored_by_change')) file_tag = '' if exclude_constitutive: mod_plotting.highlight_structure( self.get_normalizable_libs(), self.rdir_path('structures', 'protections_highlighted', 'exclude_constitutive'), nucleotides_to_count=self.settings.get_property( 'affected_nucleotides'), exclude_constitutive=exclude_constitutive) # mod_plotting.color_by_change(self.get_normalizable_libs(), self.rdir_path('structures', 'colored_by_change', 'exclude_constitutive'), # nucleotides_to_count=self.settings.get_property('affected_nucleotides'), # exclude_constitutive=exclude_constitutive) else: mod_plotting.highlight_structure( self.get_normalizable_libs(), self.rdir_path('structures', 'protections_highlighted'), nucleotides_to_count=self.settings.get_property( 'affected_nucleotides'), exclude_constitutive=exclude_constitutive)
def remove_adaptor(self): if not self.settings.get_property('force_retrim'): for lib_settings in self.settings.iter_lib_settings(): if not lib_settings.adaptorless_reads_exist(): break else: return if self.settings.get_property('trim_adaptor'): self.settings.write_to_log( 'trimming adaptors') mod_utils.make_dir(self.rdir_path('adaptor_removed')) mod_utils.parmap(lambda lib_setting: self.remove_adaptor_one_lib(lib_setting), self.settings.iter_lib_settings(), nprocs=self.threads) self.settings.write_to_log( 'trimming adaptors done')
def trim_reads(self): """ Trim reads by given amount, removing potential random barcoding sequences from 5' end Trimming from 3' end can also help if mapping is problematic by reducing chance for indels to prevent mapping :return: """ self.settings.write_to_log( 'trimming reads') if not self.settings.get_property('force_retrim'): for lib_settings in self.settings.iter_lib_settings(): if not lib_settings.trimmed_reads_exist(): break else: return mod_utils.make_dir(self.rdir_path('trimmed_reads')) mod_utils.parmap(lambda lib_setting: self.trim_one_lib(lib_setting), self.settings.iter_lib_settings(), nprocs = self.threads) self.settings.write_to_log('trimming reads complete')
def remove_adaptor(self): self.settings.write_to_log('removing adaptors with skewer') for lib_settings in self.settings.iter_lib_settings(): if not lib_settings.adaptorless_reads_exist(): break else: self.settings.write_to_log('using existing adaptor-trimmed reads') return mod_utils.make_dir(self.rdir_path('adaptor_removed')) num_datasets = len([lib for lib in self.settings.iter_lib_settings()]) num_instances = min(num_datasets, self.threads) threads_per_instance = self.threads / num_instances mod_utils.parmap(lambda lib_setting: self.remove_adaptor_one_lib( lib_setting, threads_per_instance), self.settings.iter_lib_settings(), nprocs=num_instances) self.settings.write_to_log('removing adaptors done')
def collapse_identical_reads(self): """ collapses all identical reads using FASTX toolkit :return: """ self.settings.write_to_log('collapsing reads') if not self.settings.get_property('force_recollapse'): for lib_settings in self.settings.iter_lib_settings(): if not lib_settings.collapsed_reads_exist(): break else: return mod_utils.make_dir(self.rdir_path('collapsed_reads')) if self.settings.get_property('collapse_identical_reads'): mod_utils.parmap(lambda lib_setting: self.collapse_one_fastq_file(lib_setting), self.settings.iter_lib_settings(), nprocs = self.threads) else: mod_utils.parmap(lambda lib_setting: self.fastq_to_fasta(lib_setting), self.settings.iter_lib_settings(), nprocs = self.threads) self.settings.write_to_log('collapsing reads complete')
def trim_reads(self): """ Trim reads by given amount, removing potential random barcoding sequences from 5' end Trimming from 3' end can also help if mapping is problematic by reducing chance for indels to prevent mapping :return: """ self.settings.write_to_log('trimming reads with seqtk') for lib_settings in self.settings.iter_lib_settings(): if not lib_settings.trimmed_reads_exist(): break else: self.settings.write_to_log('using existing trimmed reads') return mod_utils.make_dir(self.rdir_path('trimmed_reads')) num_datasets = len([lib for lib in self.settings.iter_lib_settings()]) num_instances = min(num_datasets, self.threads) threads_per_instance = max((self.threads / num_instances) - 1, 1) mod_utils.parmap(lambda lib_setting: self.trim_one_lib( lib_setting, threads_per_instance), self.settings.iter_lib_settings(), nprocs=self.threads) self.settings.write_to_log('trimming reads complete')
def run_shapemapper(self): """ runs shapemapper2.0 on the samples in batches :return: """ self.settings.write_to_log('running shapemapper') if self.need_to_run_shapemapper(): mod_utils.make_dir(self.rdir_path('shapemapper')) all_settings = [ lib_setting for lib_setting in self.settings.iter_lib_settings() ] num_datasets = len(all_settings) num_instances = min(num_datasets, self.threads) threads_per_instance = self.threads / num_instances mod_utils.parmap(lambda lib_setting: self.run_single_shapemapper( lib_setting, threads_per_instance), all_settings, nprocs=num_instances) else: self.settings.write_to_log('using existing shapemapper output') self.settings.write_to_log('done running shapemapper')
def map_reads(self): """ map all reads using STAR :return: """ self.settings.write_to_log('mapping reads') for lib_settings in self.settings.iter_lib_settings(): if not lib_settings.mapped_reads_exist(): break else: return mod_utils.make_dir(self.rdir_path('mapped_reads')) all_settings = [ lib_setting for lib_setting in self.settings.iter_lib_settings() ] num_datasets = len(all_settings) num_instances = min(num_datasets, self.threads) threads_per_instance = max(self.threads / num_instances - 1, 1) mod_utils.parmap(lambda lib_setting: self.map_one_library( lib_setting, threads_per_instance), all_settings, nprocs=num_instances) self.settings.write_to_log('finished mapping reads')
def main(): outfolder, genome_fasta, normalization_file_name = sys.argv[1:4] experimental_file_names = sys.argv[4:] mod_utils.make_dir(outfolder) normalization_dict = mod_utils.unPickle(normalization_file_name) norm_name = '.'.join(os.path.basename(normalization_file_name).split('.')[:-2]) experimental_dict_names = ['.'.join(os.path.basename(file_name).split('.')[:-2]) for file_name in experimental_file_names] experimental_dicts = [mod_utils.unPickle(file_name) for file_name in experimental_file_names] normed_mutation_rate_histogram(experimental_dicts, experimental_dict_names, os.path.join(outfolder, 'mutation_rate_histogram'), title='nonzero positions') background_subtracted_sets = [] write_wig(normalization_dict, norm_name, os.path.join(outfolder, norm_name)) for i in range(len(experimental_dict_names)): write_wig(experimental_dicts[i], experimental_dict_names[i], os.path.join(outfolder, experimental_dict_names[i])) background_subtracted = subtract_background(experimental_dicts[i], normalization_dict) background_subtracted_sets.append(background_subtracted) mod_utils.makePickle(background_subtracted, os.path.join(outfolder, experimental_dict_names[i]+'_subtracted.pkl')) write_wig(background_subtracted, experimental_dict_names[i]+'_subtracted', os.path.join(outfolder, experimental_dict_names[i]+'_subtracted')) try: plot_weighted_nts_pie(background_subtracted, genome_fasta, '%s backround-subtracted fractions' % experimental_dict_names[i], os.path.join(outfolder, experimental_dict_names[i]+'_sub_pie')) except: pass normed_mutation_rate_histogram(background_subtracted_sets, experimental_dict_names, os.path.join(outfolder, 'back_subtracted_mutation_rate_histogram'), title = 'nonzero positions, background subtracted')
def make_tables(self, exclude_constitutive=False): subfolders = ['raw', 'background_subtracted', 'control_subtracted', 'fold_change'] for subfolder in subfolders: mod_utils.make_dir(self.rdir_path('tables', subfolder)) mod_utils.make_dir(self.rdir_path('pickles', subfolder)) mod_utils.make_dir(self.rdir_path('tables', subfolder, 'exclude_constitutive')) mod_utils.make_dir(self.rdir_path('pickles', subfolder, 'exclude_constitutive')) self.pickle_mutation_rates('mutation_rates.pkl', exclude_constitutive=exclude_constitutive) self.pickle_mutation_rates('back_subtracted_mutation_rates.pkl', subtract_background=True, exclude_constitutive=exclude_constitutive) self.pickle_mutation_rates('control_subtracted_mutation_rates.pkl', subtract_control=True, exclude_constitutive=exclude_constitutive) self.pickle_fold_changes('mutation_rate_fold_changes.pkl', exclude_constitutive=True) self.write_wigs('') self.write_wigs('back_subtract', subtract_background=True) self.write_wigs('control_subtract', subtract_control=True) self.write_mutation_rates_tsv('mutation_rates.tsv', exclude_constitutive=exclude_constitutive) self.write_mutation_rates_tsv('back_subtracted_mutation_rates.tsv', subtract_background=True, exclude_constitutive=exclude_constitutive) self.write_mutation_rates_tsv('control_subtracted_mutation_rates.tsv', subtract_control=True, exclude_constitutive=exclude_constitutive) self.write_combined_mutation_rates_tsv() self.write_combined_mutation_rates_tsv(exclude_constitutive=True)
def make_tables(self, exclude_constitutive=False): #subfolders = ['raw', 'background_subtracted', 'control_subtracted', 'fold_change'] subfolders = ['raw', 'fold_change'] for subfolder in subfolders: mod_utils.make_dir(self.rdir_path('rt_stop_tables', subfolder)) mod_utils.make_dir(self.rdir_path('mutation_tables', subfolder)) #mod_utils.make_dir(self.rdir_path('pickles', subfolder)) mod_utils.make_dir( self.rdir_path('rt_stop_tables', subfolder, 'exclude_constitutive')) mod_utils.make_dir( self.rdir_path('mutation_tables', subfolder, 'exclude_constitutive')) #mod_utils.make_dir(self.rdir_path('pickles', subfolder, 'exclude_constitutive')) #self.pickle_mutation_rates('mutation_rates.pkl', exclude_constitutive=exclude_constitutive) #self.pickle_mutation_rates('back_subtracted_mutation_rates.pkl', subtract_background=True, exclude_constitutive=exclude_constitutive) #self.pickle_mutation_rates('control_subtracted_mutation_rates.pkl', subtract_control=True, exclude_constitutive=exclude_constitutive) #self.pickle_fold_changes('mutation_rate_fold_changes.pkl', exclude_constitutive=True) self.write_wigs('') #self.write_wigs('back_subtract', subtract_background=True) #self.write_wigs('control_subtract', subtract_control=True) self.write_mutation_rates_tsv( 'mutation_rates.tsv', exclude_constitutive=exclude_constitutive) #self.write_mutation_rates_tsv('back_subtracted_mutation_rates.tsv', subtract_background=True, exclude_constitutive=exclude_constitutive) self.write_mutation_rates_tsv( 'control_subtracted_mutation_rates_lowess.tsv', subtract_control=True, exclude_constitutive=exclude_constitutive, lowess_correct=True) self.write_mutation_rates_tsv( 'control_subtracted_mutation_rates.tsv', subtract_control=True, exclude_constitutive=exclude_constitutive, lowess_correct=False) #self.write_mutation_rates_tsv('lowess_control_subtracted_mutation_rates.tsv', subtract_control=True, # exclude_constitutive=exclude_constitutive, lowess_correct=True) self.write_combined_mutation_rates_tsv() self.write_combined_mutation_counts_tsv() self.write_combined_rt_stop_tsv(type='rpm') self.write_combined_rt_stop_tsv(type='count') self.write_combined_rt_stop_tsv(type='score')
def annotate_structures(self, exclude_constitutive=False): if exclude_constitutive: mod_utils.make_dir(self.rdir_path('structures', 'protections_highlighted', 'exclude_constitutive')) mod_utils.make_dir(self.rdir_path('structures', 'colored_by_change', 'exclude_constitutive')) file_tag = '_exclude_constitutive' else: mod_utils.make_dir(self.rdir_path('structures', 'protections_highlighted')) mod_utils.make_dir(self.rdir_path('structures', 'colored_by_change')) file_tag = '' if exclude_constitutive: mod_plotting.highlight_structure(self.get_normalizable_libs(), self.rdir_path('structures', 'protections_highlighted', 'exclude_constitutive'), nucleotides_to_count=self.settings.get_property('affected_nucleotides'), exclude_constitutive=exclude_constitutive) mod_plotting.color_by_change(self.get_normalizable_libs(), self.rdir_path('structures', 'colored_by_change', 'exclude_constitutive'), nucleotides_to_count=self.settings.get_property('affected_nucleotides'), exclude_constitutive=exclude_constitutive) else: mod_plotting.highlight_structure(self.get_normalizable_libs(), self.rdir_path('structures', 'protections_highlighted'), nucleotides_to_count=self.settings.get_property('affected_nucleotides'), exclude_constitutive=exclude_constitutive) mod_plotting.color_by_change(self.get_normalizable_libs(), self.rdir_path('structures', 'colored_by_change'), nucleotides_to_count=self.settings.get_property('affected_nucleotides'), exclude_constitutive=exclude_constitutive)
def process_settings(self, settings_file): """ - reads the settings file and converts str to float, list, etc. - stores result in self.settings as a dict() """ int_keys = [ 'first_base_to_keep', 'last_base_to_keep', 'min_post_adaptor_length', 'min_base_quality', 'min_mapping_quality' ] float_keys = [ 'confidence_interval_cutoff', 'fold_change_cutoff', 'winsorization_upper_limit' ] str_keys = [ 'adaptor_sequence', 'rrna_fasta', 'experiment_name', 'affected_nucleotides', 'pymol_base_script', 'pymol_base_script_colorchange', 'tptn_file_18s', 'tptn_file_25s' ] boolean_keys = ['make_interactive_plots'] list_str_keys = [ 'fastq_gz_files', 'sample_names', 'experimentals', 'no_mod_controls', 'with_mod_controls', 'exclude_constitutive' ] #list_float_keys = ['probe_concentrations'] config = ConfigParser.ConfigParser() config.read(settings_file) settings = {} for section in config.sections(): for option in config.options(section): settings[option] = config.get(section, option) settings[section] = True for k in int_keys: settings[k] = int(settings[k]) for k in str_keys: settings[k] = settings[k] for k in float_keys: settings[k] = float(settings[k]) for k in boolean_keys: if not settings[k].lower() in ['true', 'false']: raise ValueError('Boolean value %s must be "true" or "false"' % k) settings[k] = settings[k].lower() == 'true' #for k in list_float_keys: # settings[k] = map(float, simplejson.loads(settings[k])) #for k in list_int_keys: # settings[k] = map(int, simplejson.loads(settings[k])) for k in list_str_keys: settings[k] = simplejson.loads(settings[k]) self.fqdir = settings['fastq_dir'] self.sample_names = settings['sample_names'] self.experimentals = settings['experimentals'] self.no_mod_controls = settings['no_mod_controls'] self.with_mod_controls = settings['with_mod_controls'] self.exclude_constitutive = settings['exclude_constitutive'] try: assert len(self.experimentals) == len(self.no_mod_controls) assert len(self.experimentals) == len(self.with_mod_controls) except: print 'error: experimentals, no_mod_controls, and with_mod_controls should all be the same length' print 'for mutation rate purposes, its ok to reuse a dataset here, it really doesnt matter' try: for sample_name in self.experimentals + self.no_mod_controls + self.with_mod_controls: assert sample_name in self.sample_names except: print sample_name, ' not in sample names, make sure you are using regular quotation marks' self.fastq_gz_file_handles = [ os.path.join(self.fqdir, fastq_gz_file) for fastq_gz_file in settings['fastq_gz_files'] ] for file_handle in self.fastq_gz_file_handles: assert mod_utils.file_exists(file_handle) self.settings = settings self.rdir = settings['results_dir'] mod_utils.make_dir(self.rdir) shutil.copy(settings_file, self.rdir)
def get_wdir(self): mod_utils.make_dir(self.wdir) return self.wdir
def process_settings(self, settings_file): """ - reads the settings file and converts str to float, list, etc. - stores result in self.settings as a dict() """ int_keys = [ 'first_base_to_keep', 'last_base_to_keep', 'min_post_adaptor_length', 'min_base_quality', 'min_mapping_quality'] float_keys = ['confidence_interval_cutoff', 'fold_change_cutoff'] str_keys = ['adaptor_sequence', 'rrna_fasta', 'experiment_name', 'shapemapper_ref_file', 'affected_nucleotides', 'pymol_base_script', 'pymol_base_script_colorchange', 'tptn_file_18s', 'tptn_file_25s', 'functional_groupings'] boolean_keys = ['collapse_identical_reads', 'force_read_resplit', 'force_remapping', 'force_recollapse', 'force_recount', 'force_index_rebuild', 'force_retrim', 'trim_adaptor', 'discard_untrimmed', 'force_shapemapper', 'make_interactive_plots'] list_str_keys = ['fastq_gz_files', 'sample_names', 'experimentals', 'no_mod_controls', 'with_mod_controls', 'exclude_constitutive'] #list_float_keys = ['probe_concentrations'] config = ConfigParser.ConfigParser() config.read(settings_file) settings = {} for section in config.sections(): for option in config.options(section): settings[option] = config.get(section, option) settings[section] = True for k in int_keys: settings[k] = int(settings[k]) for k in str_keys: settings[k] = settings[k] for k in float_keys: settings[k] = float(settings[k]) for k in boolean_keys: if not settings[k].lower() in ['true', 'false']: raise ValueError( 'Boolean value %s must be "true" or "false"' % k) settings[k] = settings[k].lower() == 'true' #for k in list_float_keys: # settings[k] = map(float, simplejson.loads(settings[k])) #for k in list_int_keys: # settings[k] = map(int, simplejson.loads(settings[k])) for k in list_str_keys: settings[k] = simplejson.loads(settings[k]) self.fqdir = settings['fastq_dir'] self.sample_names = settings['sample_names'] self.experimentals = settings['experimentals'] self.no_mod_controls = settings['no_mod_controls'] self.with_mod_controls = settings['with_mod_controls'] self.exclude_constitutive = settings['exclude_constitutive'] try: assert len(self.experimentals) == len(self.no_mod_controls) assert len(self.experimentals) == len(self.with_mod_controls) except: print 'error: experimentals, no_mod_controls, and with_mod_controls should all be the same length' print 'for mutation rate purposes, its ok to reuse a dataset here, it really doesnt matter' try: for sample_name in self.experimentals+self.no_mod_controls+self.with_mod_controls: assert sample_name in self.sample_names except: print sample_name, ' not in sample names, make sure you are using regular quotation marks' self.fastq_gz_file_handles = [os.path.join(self.fqdir, fastq_gz_file) for fastq_gz_file in settings['fastq_gz_files']] for file_handle in self.fastq_gz_file_handles: assert mod_utils.file_exists(file_handle) self.settings = settings self.rdir = settings['results_dir'] mod_utils.make_dir(self.rdir) shutil.copy(settings_file, self.rdir)
def make_plots(self, exclude_constitutive=False): if exclude_constitutive: mod_utils.make_dir( self.rdir_path('mutation_plots', 'exclude_constitutive')) mod_utils.make_dir( self.rdir_path('mutation_plots', 'exclude_constitutive', 'interactive')) mut_dir = self.rdir_path('mutation_plots', 'exclude_constitutive') mod_utils.make_dir( self.rdir_path('rt_stop_plots', 'exclude_constitutive')) mod_utils.make_dir( self.rdir_path('rt_stop_plots', 'exclude_constitutive', 'interactive')) stop_dir = self.rdir_path('rt_stop_plots', 'exclude_constitutive') file_tag = '_exclude_constitutive' #TODO: the names for the ROC curve chromosomes are hard coded and need to be changed between samples #mod_plotting.generate_roc_curves(self.settings.get_property('tptn_file_25s'), self.settings.rRNA_seqs, os.path.join(rdir, '23S_ROC_curves'), self.get_modified_libs(), 'E.c.23S_rRNA', self.settings.get_property('affected_nucleotides')) #mod_plotting.generate_roc_curves(self.settings.get_property('tptn_file_18s'), self.settings.rRNA_seqs, os.path.join(rdir, '16S_ROC_curves'), self.get_modified_libs(), 'E.c.16S_rRNA', self.settings.get_property('affected_nucleotides')) else: mod_utils.make_dir(self.rdir_path('mutation_plots')) mod_utils.make_dir(self.rdir_path('mutation_plots', 'interactive')) mut_dir = self.rdir_path('mutation_plots') mod_utils.make_dir(self.rdir_path('rt_stop_plots')) mod_utils.make_dir(self.rdir_path('rt_stop_plots', 'interactive')) stop_dir = self.rdir_path('rt_stop_plots') file_tag = '' #MUTATION PLOTS mod_plotting.plot_mutated_nts_pie( self.libs, os.path.join(mut_dir, 'raw_mutation_fractions' + file_tag), exclude_constitutive=exclude_constitutive) mod_plotting.plot_mutation_breakdown_pie( self.libs, os.path.join(mut_dir, 'raw_mutation_types' + file_tag), exclude_constitutive=exclude_constitutive) mod_plotting.plot_mutated_nts_pie( self.libs, os.path.join(mut_dir, 'background_sub_mutation_fractions' + file_tag), subtract_background=True, exclude_constitutive=exclude_constitutive) mod_plotting.plot_mutation_rate_cdfs( self.libs, os.path.join(mut_dir, 'mutation_rate_cdf' + file_tag), nucleotides_to_count=self.settings.get_property( 'affected_nucleotides'), exclude_constitutive=exclude_constitutive) mod_plotting.plot_mutation_rate_violins( self.libs, os.path.join(mut_dir, 'mutation_rate_violin' + file_tag), nucleotides_to_count=self.settings.get_property( 'affected_nucleotides'), exclude_constitutive=exclude_constitutive) mod_plotting.ma_plots_by_count( self.get_normalizable_libs(), os.path.join(mut_dir, 'MA_raw_counts' + file_tag), nucleotides_to_count=self.settings.get_property( 'affected_nucleotides'), exclude_constitutive=exclude_constitutive) mod_plotting.ma_plots_by_count( self.get_normalizable_libs(), os.path.join(mut_dir, 'MA_raw_counts_lowess' + file_tag), nucleotides_to_count=self.settings.get_property( 'affected_nucleotides'), exclude_constitutive=exclude_constitutive, lowess_correct=True) mod_plotting.mutation_rate_scatter( self.get_normalizable_libs(), os.path.join(mut_dir, 'scatter_mismatch_rate' + file_tag), nucleotides_to_count=self.settings.get_property( 'affected_nucleotides'), exclude_constitutive=exclude_constitutive) if self.settings.get_property('make_interactive_plots'): mod_plotting.scatter_interactive( self.get_normalizable_libs(), os.path.join(mut_dir, 'interactive', 'scatter' + file_tag), nucleotides_to_count=self.settings.get_property( 'affected_nucleotides'), exclude_constitutive=False) mod_plotting.ma_plots_interactive_by_count( self.get_normalizable_libs(), os.path.join(mut_dir, 'interactive', 'MA_counts' + file_tag), nucleotides_to_count=self.settings.get_property( 'affected_nucleotides'), exclude_constitutive=False) mod_plotting.ma_plots_interactive_by_count( self.get_normalizable_libs(), os.path.join(mut_dir, 'interactive', 'MA_counts_lowess' + file_tag), nucleotides_to_count=self.settings.get_property( 'affected_nucleotides'), exclude_constitutive=False, lowess_correct=True) #RT STOP PLOTS mod_plotting.plot_rt_stop_pie( self.libs, os.path.join(stop_dir, 'raw_rt_stops' + file_tag), exclude_constitutive=exclude_constitutive) mod_plotting.plot_rt_stop_pie( self.libs, os.path.join(stop_dir, 'back_sub_rt_stops' + file_tag), subtract_background=True, exclude_constitutive=exclude_constitutive) mod_plotting.plot_rt_stop_cdfs( self.libs, os.path.join(stop_dir, 'rt_stop_cdf' + file_tag), nucleotides_to_count=self.settings.get_property( 'affected_nucleotides'), exclude_constitutive=exclude_constitutive) mod_plotting.plot_rt_stop_violins( self.libs, os.path.join(stop_dir, 'rt_stop_violin' + file_tag), nucleotides_to_count=self.settings.get_property( 'affected_nucleotides'), exclude_constitutive=exclude_constitutive)