def remove_adaptor(self): if not self.settings.get_property('force_retrim'): for lib_settings in self.settings.iter_lib_settings(): if not lib_settings.adaptorless_reads_exist(): break else: return if self.settings.get_property('trim_adaptor'): self.settings.write_to_log( 'trimming adaptors') mod_utils.make_dir(self.rdir_path('adaptor_removed')) mod_utils.parmap(lambda lib_setting: self.remove_adaptor_one_lib(lib_setting), self.settings.iter_lib_settings(), nprocs=self.threads) self.settings.write_to_log( 'trimming adaptors done')
def remove_adaptor(self): self.settings.write_to_log('removing adaptors with skewer') for lib_settings in self.settings.iter_lib_settings(): if not lib_settings.adaptorless_reads_exist(): break else: self.settings.write_to_log('using existing adaptor-trimmed reads') return mod_utils.make_dir(self.rdir_path('adaptor_removed')) num_datasets = len([lib for lib in self.settings.iter_lib_settings()]) num_instances = min(num_datasets, self.threads) threads_per_instance = self.threads / num_instances mod_utils.parmap(lambda lib_setting: self.remove_adaptor_one_lib( lib_setting, threads_per_instance), self.settings.iter_lib_settings(), nprocs=num_instances) self.settings.write_to_log('removing adaptors done')
def trim_reads(self): """ Trim reads by given amount, removing potential random barcoding sequences from 5' end Trimming from 3' end can also help if mapping is problematic by reducing chance for indels to prevent mapping :return: """ self.settings.write_to_log( 'trimming reads') if not self.settings.get_property('force_retrim'): for lib_settings in self.settings.iter_lib_settings(): if not lib_settings.trimmed_reads_exist(): break else: return mod_utils.make_dir(self.rdir_path('trimmed_reads')) mod_utils.parmap(lambda lib_setting: self.trim_one_lib(lib_setting), self.settings.iter_lib_settings(), nprocs = self.threads) self.settings.write_to_log('trimming reads complete')
def collapse_identical_reads(self): """ collapses all identical reads using FASTX toolkit :return: """ self.settings.write_to_log('collapsing reads') if not self.settings.get_property('force_recollapse'): for lib_settings in self.settings.iter_lib_settings(): if not lib_settings.collapsed_reads_exist(): break else: return mod_utils.make_dir(self.rdir_path('collapsed_reads')) if self.settings.get_property('collapse_identical_reads'): mod_utils.parmap(lambda lib_setting: self.collapse_one_fastq_file(lib_setting), self.settings.iter_lib_settings(), nprocs = self.threads) else: mod_utils.parmap(lambda lib_setting: self.fastq_to_fasta(lib_setting), self.settings.iter_lib_settings(), nprocs = self.threads) self.settings.write_to_log('collapsing reads complete')
def trim_reads(self): """ Trim reads by given amount, removing potential random barcoding sequences from 5' end Trimming from 3' end can also help if mapping is problematic by reducing chance for indels to prevent mapping :return: """ self.settings.write_to_log('trimming reads with seqtk') for lib_settings in self.settings.iter_lib_settings(): if not lib_settings.trimmed_reads_exist(): break else: self.settings.write_to_log('using existing trimmed reads') return mod_utils.make_dir(self.rdir_path('trimmed_reads')) num_datasets = len([lib for lib in self.settings.iter_lib_settings()]) num_instances = min(num_datasets, self.threads) threads_per_instance = max((self.threads / num_instances) - 1, 1) mod_utils.parmap(lambda lib_setting: self.trim_one_lib( lib_setting, threads_per_instance), self.settings.iter_lib_settings(), nprocs=self.threads) self.settings.write_to_log('trimming reads complete')
def run_shapemapper(self): """ runs shapemapper2.0 on the samples in batches :return: """ self.settings.write_to_log('running shapemapper') if self.need_to_run_shapemapper(): mod_utils.make_dir(self.rdir_path('shapemapper')) all_settings = [ lib_setting for lib_setting in self.settings.iter_lib_settings() ] num_datasets = len(all_settings) num_instances = min(num_datasets, self.threads) threads_per_instance = self.threads / num_instances mod_utils.parmap(lambda lib_setting: self.run_single_shapemapper( lib_setting, threads_per_instance), all_settings, nprocs=num_instances) else: self.settings.write_to_log('using existing shapemapper output') self.settings.write_to_log('done running shapemapper')
def map_reads(self): """ map all reads using STAR :return: """ self.settings.write_to_log('mapping reads') for lib_settings in self.settings.iter_lib_settings(): if not lib_settings.mapped_reads_exist(): break else: return mod_utils.make_dir(self.rdir_path('mapped_reads')) all_settings = [ lib_setting for lib_setting in self.settings.iter_lib_settings() ] num_datasets = len(all_settings) num_instances = min(num_datasets, self.threads) threads_per_instance = max(self.threads / num_instances - 1, 1) mod_utils.parmap(lambda lib_setting: self.map_one_library( lib_setting, threads_per_instance), all_settings, nprocs=num_instances) self.settings.write_to_log('finished mapping reads')