Esempio n. 1
0
 def map_for_contaminating_sequences_one_lib(self, lib_settings):
     #first, take unmapped sequences and map them to yeast rRNA, counting mapping stats
     if not tps_utils.file_exists(lib_settings.get_rRNA_unmapped_reads()):
         subprocess.Popen(
             'bowtie2 -f -D 20 -R 3 -N 1 -L 15 -i S,1,0.50 -x %s -p %d -U %s --un-gz %s 2>>%s | samtools view -bS - > %s 2>>%s '
             % (
                 self.experiment_settings.get_rRNA_bowtie_index(),
                 self.threads,
                 lib_settings.get_unmappable_reads(),
                 lib_settings.get_rRNA_unmapped_reads(),
                 lib_settings.get_rRNA_mapping_stats(),
                 lib_settings.get_rRNA_mapped_reads(),
                 lib_settings.get_log(),
             ),
             shell=True).wait()
     if not tps_utils.file_exists(lib_settings.get_genome_unmapped_reads()):
         #take still unmapped sequences and map them to the rest of the yeast genome, counting mapping stats
         subprocess.Popen(
             'bowtie2 -f -D 20 -R 3 -N 1 -L 15 -i S,1,0.50 -x %s -p %d -U %s --un-gz %s 2>>%s | samtools view -bS - > %s 2>>%s '
             % (
                 self.experiment_settings.get_genome_bowtie_index(),
                 self.threads,
                 lib_settings.get_rRNA_unmapped_reads(),
                 lib_settings.get_genome_unmapped_reads(),
                 lib_settings.get_genome_mapping_stats(),
                 lib_settings.get_genome_mapped_reads(),
                 lib_settings.get_log(),
             ),
             shell=True).wait()
Esempio n. 2
0
 def map_for_contaminating_sequences_one_lib(self, lib_settings):
     # first, take unmapped sequences and map them to yeast rRNA, counting mapping stats
     if not tps_utils.file_exists(lib_settings.get_rRNA_unmapped_reads()):
         subprocess.Popen(
             "bowtie2 -f -D 20 -R 3 -N 1 -L 15 -i S,1,0.50 -x %s -p %d -U %s --un-gz %s 2>>%s | samtools view -bS - > %s 2>>%s "
             % (
                 self.experiment_settings.get_rRNA_bowtie_index(),
                 self.threads,
                 lib_settings.get_unmappable_reads(),
                 lib_settings.get_rRNA_unmapped_reads(),
                 lib_settings.get_rRNA_mapping_stats(),
                 lib_settings.get_rRNA_mapped_reads(),
                 lib_settings.get_log(),
             ),
             shell=True,
         ).wait()
     if not tps_utils.file_exists(lib_settings.get_genome_unmapped_reads()):
         # take still unmapped sequences and map them to the rest of the yeast genome, counting mapping stats
         subprocess.Popen(
             "bowtie2 -f -D 20 -R 3 -N 1 -L 15 -i S,1,0.50 -x %s -p %d -U %s --un-gz %s 2>>%s | samtools view -bS - > %s 2>>%s "
             % (
                 self.experiment_settings.get_genome_bowtie_index(),
                 self.threads,
                 lib_settings.get_rRNA_unmapped_reads(),
                 lib_settings.get_genome_unmapped_reads(),
                 lib_settings.get_genome_mapping_stats(),
                 lib_settings.get_genome_mapped_reads(),
                 lib_settings.get_log(),
             ),
             shell=True,
         ).wait()
Esempio n. 3
0
 def process_settings(self, settings_file):
     """
     - reads the settings file and converts str to float, list, etc.
     - stores result in self.settings as a dict()
     """
     int_keys = [ 'first_base_to_keep', 'last_base_to_keep', 'max_reads_to_split', 'minimum_reads_for_inclusion',
                  'pool_5trim', 'pool_3trim', 'min_post_adaptor_length']
     #float_keys = []
     str_keys = ['adaptor_sequence', 'rrna_index', 'genome_index', 'pool_append', 'pool_prepend', 'primer_sequence']
     boolean_keys = ['collapse_identical_reads', 'force_read_resplit', 'force_remapping', 'force_recollapse',
                     'force_recount', 'force_index_rebuild', 'force_retrim', 'trim_adaptor']
     list_str_keys = ['fastq_gz_files', 'sample_names']
     #list_float_keys = ['concentrations', 'input_rna']
     extant_files = ['pool_fasta',]
     config = ConfigParser.ConfigParser()
     config.read(settings_file)
     settings = {}
     for section in config.sections():
         for option in config.options(section):
             settings[option] = config.get(section, option)
             settings[section] = True
     for k in int_keys:
         settings[k] = int(settings[k])
     for k in str_keys:
         settings[k] = settings[k]
     #for k in float_keys:
     #    settings[k] = float(settings[k])
     for k in boolean_keys:
         if not settings[k].lower() in ['true', 'false']:
             raise ValueError(
               'Boolean value %s must be "true" or "false"' % k)
         settings[k] = settings[k].lower() == 'true'
     #for k in list_float_keys:
     #    settings[k] = map(float, simplejson.loads(settings[k]))
     #for k in list_int_keys:
     #    settings[k] = map(int, simplejson.loads(settings[k]))
     for k in list_str_keys:
         settings[k] = simplejson.loads(settings[k])
     self.fqdir = settings['fastq_dir']
     self.sample_names = settings['sample_names']
     self.fastq_gz_file_handles = [os.path.join(self.fqdir, fastq_gz_file) for fastq_gz_file in
                                   settings['fastq_gz_files']]
     for file_handle in self.fastq_gz_file_handles:
         assert tps_utils.file_exists(file_handle)
     for k in extant_files:
         assert tps_utils.file_exists(settings[k])
     self.settings = settings
     self.wdir = settings['working_dir']
     self.rdir = settings['results_dir']
     shutil.copy(settings_file, self.rdir)
Esempio n. 4
0
    def get_collapsed_read_fractions(self, lib_settings):
        out_name = os.path.join(
            self.experiment_settings.get_rdir(), 'QC', 'collapsed_fracs',
            '%(sample_name)s.collapsed_read_fractions.pkl' %
            {'sample_name': lib_settings.sample_name})
        if not tps_utils.file_exists(
                out_name) and not self.experiment_settings.get_property(
                    'force_recollapse'):
            collapsed_reads_file = lib_settings.get_collapsed_reads()
            read_counts = []
            f = gzip.open(collapsed_reads_file)
            for line in f:
                if not line.strip() == '' and not line.startswith(
                        '#'):  #ignore empty lines and commented out lines
                    if line.startswith(
                            '>'):  #> marks the start of a new sequence
                        num_reads = int(line[1:].strip().split('-')[1])
                        read_counts.append(num_reads)
                    else:
                        continue
            f.close()
            read_fractions = np.array(read_counts) / float(sum(read_counts))
            bzUtils.makePickle(read_fractions, out_name)
        else:
            read_fractions = bzUtils.unPickle(out_name)

        return (lib_settings.sample_name, read_fractions)
Esempio n. 5
0
    def get_collapsed_read_fractions(self, lib_settings):
        out_name = os.path.join(
            self.experiment_settings.get_rdir(),
            "QC",
            "collapsed_fracs",
            "%(sample_name)s.collapsed_read_fractions.pkl" % {"sample_name": lib_settings.sample_name},
        )
        if not tps_utils.file_exists(out_name) and not self.experiment_settings.get_property("force_recollapse"):
            collapsed_reads_file = lib_settings.get_collapsed_reads()
            read_counts = []
            f = gzip.open(collapsed_reads_file)
            for line in f:
                if not line.strip() == "" and not line.startswith("#"):  # ignore empty lines and commented out lines
                    if line.startswith(">"):  # > marks the start of a new sequence
                        num_reads = int(line[1:].strip().split("-")[1])
                        read_counts.append(num_reads)
                    else:
                        continue
            f.close()
            read_fractions = np.array(read_counts) / float(sum(read_counts))
            bzUtils.makePickle(read_fractions, out_name)
        else:
            read_fractions = bzUtils.unPickle(out_name)

        return (lib_settings.sample_name, read_fractions)
Esempio n. 6
0
 def sequence_counts_exist(self):
     sequence_counts = self.get_sequence_counts()
     return tps_utils.file_exists(sequence_counts)
Esempio n. 7
0
 def mapped_reads_exist(self):
     mapped_reads = self.get_mapped_reads()
     return tps_utils.file_exists(mapped_reads)
Esempio n. 8
0
 def trimmed_reads_exist(self):
     trimmed_reads = self.get_trimmed_reads()
     return tps_utils.file_exists(trimmed_reads)
Esempio n. 9
0
 def primerless_reads_exist(self):
     primerless_reads = self.get_primer_trimmed_reads()
     return tps_utils.file_exists(primerless_reads)
Esempio n. 10
0
 def adaptorless_reads_exist(self):
     adaptorless_reads = self.get_adaptor_trimmed_reads()
     return tps_utils.file_exists(adaptorless_reads)
Esempio n. 11
0
 def collapsed_reads_exist(self):
     collapsed_reads = self.get_collapsed_reads()
     return tps_utils.file_exists(collapsed_reads)
Esempio n. 12
0
 def split_reads_exist(self):
     split_reads = self.get_split_reads()
     return tps_utils.file_exists(split_reads)
Esempio n. 13
0
 def bowtie_index_exists(self):
     return tps_utils.file_exists(self.get_bowtie_index()+'.1.bt2')