예제 #1
0
    def remove_adaptor(self):
        if not self.settings.get_property('force_retrim'):
            for lib_settings in self.settings.iter_lib_settings():
                if not lib_settings.adaptorless_reads_exist():
                    break
            else:
                return

        if self.settings.get_property('trim_adaptor'):
            self.settings.write_to_log( 'trimming adaptors')
            mod_utils.make_dir(self.rdir_path('adaptor_removed'))
            mod_utils.parmap(lambda lib_setting: self.remove_adaptor_one_lib(lib_setting),
                           self.settings.iter_lib_settings(), nprocs=self.threads)
            self.settings.write_to_log( 'trimming adaptors done')
예제 #2
0
 def remove_adaptor(self):
     self.settings.write_to_log('removing adaptors with skewer')
     for lib_settings in self.settings.iter_lib_settings():
         if not lib_settings.adaptorless_reads_exist():
             break
     else:
         self.settings.write_to_log('using existing adaptor-trimmed reads')
         return
     mod_utils.make_dir(self.rdir_path('adaptor_removed'))
     num_datasets = len([lib for lib in self.settings.iter_lib_settings()])
     num_instances = min(num_datasets, self.threads)
     threads_per_instance = self.threads / num_instances
     mod_utils.parmap(lambda lib_setting: self.remove_adaptor_one_lib(
         lib_setting, threads_per_instance),
                      self.settings.iter_lib_settings(),
                      nprocs=num_instances)
     self.settings.write_to_log('removing adaptors done')
예제 #3
0
 def trim_reads(self):
     """
     Trim reads by given amount, removing potential random barcoding sequences from 5' end
     Trimming from 3' end can also help if mapping is problematic by reducing chance for indels to prevent mapping
     :return:
     """
     self.settings.write_to_log( 'trimming reads')
     if not self.settings.get_property('force_retrim'):
         for lib_settings in self.settings.iter_lib_settings():
             if not lib_settings.trimmed_reads_exist():
                 break
         else:
             return
     mod_utils.make_dir(self.rdir_path('trimmed_reads'))
     mod_utils.parmap(lambda lib_setting: self.trim_one_lib(lib_setting), self.settings.iter_lib_settings(),
                    nprocs = self.threads)
     self.settings.write_to_log('trimming reads complete')
예제 #4
0
 def collapse_identical_reads(self):
     """
     collapses all identical reads using FASTX toolkit
     :return:
     """
     self.settings.write_to_log('collapsing reads')
     if not self.settings.get_property('force_recollapse'):
         for lib_settings in self.settings.iter_lib_settings():
             if not lib_settings.collapsed_reads_exist():
                 break
         else:
             return
     mod_utils.make_dir(self.rdir_path('collapsed_reads'))
     if self.settings.get_property('collapse_identical_reads'):
         mod_utils.parmap(lambda lib_setting: self.collapse_one_fastq_file(lib_setting), self.settings.iter_lib_settings(),
                          nprocs = self.threads)
     else:
         mod_utils.parmap(lambda lib_setting: self.fastq_to_fasta(lib_setting), self.settings.iter_lib_settings(),
                          nprocs = self.threads)
     self.settings.write_to_log('collapsing reads complete')
예제 #5
0
 def trim_reads(self):
     """
     Trim reads by given amount, removing potential random barcoding sequences from 5' end
     Trimming from 3' end can also help if mapping is problematic by reducing chance for indels to prevent mapping
     :return:
     """
     self.settings.write_to_log('trimming reads with seqtk')
     for lib_settings in self.settings.iter_lib_settings():
         if not lib_settings.trimmed_reads_exist():
             break
     else:
         self.settings.write_to_log('using existing trimmed reads')
         return
     mod_utils.make_dir(self.rdir_path('trimmed_reads'))
     num_datasets = len([lib for lib in self.settings.iter_lib_settings()])
     num_instances = min(num_datasets, self.threads)
     threads_per_instance = max((self.threads / num_instances) - 1, 1)
     mod_utils.parmap(lambda lib_setting: self.trim_one_lib(
         lib_setting, threads_per_instance),
                      self.settings.iter_lib_settings(),
                      nprocs=self.threads)
     self.settings.write_to_log('trimming reads complete')
예제 #6
0
 def run_shapemapper(self):
     """
     runs shapemapper2.0 on the samples in batches
     :return:
     """
     self.settings.write_to_log('running shapemapper')
     if self.need_to_run_shapemapper():
         mod_utils.make_dir(self.rdir_path('shapemapper'))
         all_settings = [
             lib_setting
             for lib_setting in self.settings.iter_lib_settings()
         ]
         num_datasets = len(all_settings)
         num_instances = min(num_datasets, self.threads)
         threads_per_instance = self.threads / num_instances
         mod_utils.parmap(lambda lib_setting: self.run_single_shapemapper(
             lib_setting, threads_per_instance),
                          all_settings,
                          nprocs=num_instances)
     else:
         self.settings.write_to_log('using existing shapemapper output')
     self.settings.write_to_log('done running shapemapper')
예제 #7
0
 def map_reads(self):
     """
     map all reads using STAR
     :return:
     """
     self.settings.write_to_log('mapping reads')
     for lib_settings in self.settings.iter_lib_settings():
         if not lib_settings.mapped_reads_exist():
             break
     else:
         return
     mod_utils.make_dir(self.rdir_path('mapped_reads'))
     all_settings = [
         lib_setting for lib_setting in self.settings.iter_lib_settings()
     ]
     num_datasets = len(all_settings)
     num_instances = min(num_datasets, self.threads)
     threads_per_instance = max(self.threads / num_instances - 1, 1)
     mod_utils.parmap(lambda lib_setting: self.map_one_library(
         lib_setting, threads_per_instance),
                      all_settings,
                      nprocs=num_instances)
     self.settings.write_to_log('finished mapping reads')