예제 #1
0
 def parse_shapemapper_output_files(self):
     shapemapper_output_dir = os.path.join(os.path.dirname(self.experiment_settings.get_shapemapper_config_file()),
                                               'output', 'counted_mutations_columns')
     sample_name = self.lib_settings.sample_name
     for rRNA_name in self.experiment_settings.rRNA_seqs:
         shapemapper_output_file = os.path.join(shapemapper_output_dir, sample_name+'_'+rRNA_name+'.csv')
         assert mod_utils.file_exists(shapemapper_output_file)
         self.rRNA_mutation_data[rRNA_name] = rRNA_mutations(self, self.lib_settings, self.experiment_settings,
                                                             shapemapper_output_file)
예제 #2
0
 def need_to_run_shapemapper(self):
     for lib_setting in self.settings.iter_lib_settings():
         for rRNA_name in self.settings.rRNA_seqs:
             expected_file_name = os.path.join(
                 lib_setting.get_shapemapper_out_dir(),
                 'Pipeline_Modified_' + rRNA_name + '_mutation_counts.txt')
             if not mod_utils.file_exists(expected_file_name):
                 return True
     return False
예제 #3
0
 def need_to_run_shapemapper(self):
     if self.settings.get_property('force_shapemapper'):
         return True
     else:
         shapemapper_output_dir = os.path.join(os.path.dirname(self.settings.get_shapemapper_config_file()),
                                               'output', 'counted_mutations_columns')
         for sample_name in self.settings.get_property('experimentals') + self.settings.get_property(
                 'no_mod_controls')+ self.settings.get_property('with_mod_controls'):
             for rRNA_name in self.settings.rRNA_seqs:
                 expected_file_name = os.path.join(shapemapper_output_dir, sample_name+'_'+rRNA_name+'.csv')
                 if not mod_utils.file_exists(expected_file_name):
                     return True
         return False
예제 #4
0
 def star_index_exists(self):
     star_index = self.get_star_index()
     return mod_utils.file_exists(star_index)
예제 #5
0
    def process_settings(self, settings_file):
        """
        - reads the settings file and converts str to float, list, etc.
        - stores result in self.settings as a dict()
        """
        int_keys = [ 'first_base_to_keep', 'last_base_to_keep', 'min_post_adaptor_length', 'min_base_quality', 'min_mapping_quality']
        float_keys = ['confidence_interval_cutoff', 'fold_change_cutoff']
        str_keys = ['adaptor_sequence', 'rrna_fasta', 'experiment_name', 'shapemapper_ref_file', 'affected_nucleotides', 'pymol_base_script', 'pymol_base_script_colorchange', 'tptn_file_18s', 'tptn_file_25s', 'functional_groupings']
        boolean_keys = ['collapse_identical_reads', 'force_read_resplit', 'force_remapping', 'force_recollapse',
                        'force_recount', 'force_index_rebuild', 'force_retrim', 'trim_adaptor', 'discard_untrimmed', 'force_shapemapper',
                        'make_interactive_plots']
        list_str_keys = ['fastq_gz_files', 'sample_names', 'experimentals', 'no_mod_controls', 'with_mod_controls', 'exclude_constitutive']
        #list_float_keys = ['probe_concentrations']
        config = ConfigParser.ConfigParser()
        config.read(settings_file)
        settings = {}
        for section in config.sections():
            for option in config.options(section):
                settings[option] = config.get(section, option)
                settings[section] = True
        for k in int_keys:
            settings[k] = int(settings[k])
        for k in str_keys:
            settings[k] = settings[k]
        for k in float_keys:
            settings[k] = float(settings[k])
        for k in boolean_keys:
            if not settings[k].lower() in ['true', 'false']:
                raise ValueError(
                  'Boolean value %s must be "true" or "false"' % k)
            settings[k] = settings[k].lower() == 'true'
        #for k in list_float_keys:
        #    settings[k] = map(float, simplejson.loads(settings[k]))
        #for k in list_int_keys:
        #    settings[k] = map(int, simplejson.loads(settings[k]))
        for k in list_str_keys:
            settings[k] = simplejson.loads(settings[k])
        self.fqdir = settings['fastq_dir']
        self.sample_names = settings['sample_names']
        self.experimentals = settings['experimentals']
        self.no_mod_controls = settings['no_mod_controls']
        self.with_mod_controls = settings['with_mod_controls']
        self.exclude_constitutive = settings['exclude_constitutive']
        try:
            assert len(self.experimentals) == len(self.no_mod_controls)
            assert len(self.experimentals) == len(self.with_mod_controls)
        except:
            print 'error: experimentals, no_mod_controls, and with_mod_controls should all be the same length'
            print 'for mutation rate purposes, its ok to reuse a dataset here, it really doesnt matter'
        try:
            for sample_name in self.experimentals+self.no_mod_controls+self.with_mod_controls:
                assert sample_name in self.sample_names
        except:
            print sample_name, ' not in sample names, make sure you are using regular quotation marks'

        self.fastq_gz_file_handles = [os.path.join(self.fqdir, fastq_gz_file) for fastq_gz_file in
                                      settings['fastq_gz_files']]
        for file_handle in self.fastq_gz_file_handles:
            assert mod_utils.file_exists(file_handle)
        self.settings = settings
        self.rdir = settings['results_dir']
        mod_utils.make_dir(self.rdir)
        shutil.copy(settings_file, self.rdir)
예제 #6
0
 def mapped_reads_exist(self):
     mapped_reads = self.get_mapped_reads_sam_gz()
     return mod_utils.file_exists(mapped_reads)
예제 #7
0
 def mapped_reads_exist(self):
     mapped_reads = self.get_mapped_reads()
     return mod_utils.file_exists(mapped_reads)
예제 #8
0
 def adaptorless_reads_exist(self):
     adaptorless_reads = self.get_adaptor_trimmed_reads()
     return mod_utils.file_exists(adaptorless_reads)
예제 #9
0
 def adaptorless_reads_exist(self):
     adaptorless_reads = self.get_adaptor_trimmed_reads()
     return mod_utils.file_exists(adaptorless_reads)
예제 #10
0
 def collapsed_reads_exist(self):
     collapsed_reads = self.get_collapsed_reads()
     return mod_utils.file_exists(collapsed_reads)
예제 #11
0
 def split_reads_exist(self):
     split_reads = self.get_split_reads()
     return mod_utils.file_exists(split_reads)
예제 #12
0
 def mutation_counts_exists(self):
     return mod_utils.file_exists(self.get_mutation_counts())
예제 #13
0
 def positional_coverage_exists(self):
     return mod_utils.file_exists(self.get_positional_coverage())
예제 #14
0
 def read_5p_counts_exists(self):
     return mod_utils.file_exists(self.get_read_5p_counts())
예제 #15
0
 def rRNA_bowtie_index_exists(self):
     return mod_utils.file_exists(self.get_rRNA_bowtie_index()+'.1.bt2')
예제 #16
0
 def read_5p_counts_exists(self):
     return mod_utils.file_exists(self.get_read_5p_counts())
예제 #17
0
 def mutation_counts_exists(self):
     return mod_utils.file_exists(self.get_mutation_counts())
예제 #18
0
 def primerless_reads_exist(self):
     primerless_reads = self.get_primer_trimmed_reads()
     return mod_utils.file_exists(primerless_reads)
예제 #19
0
 def trimmed_reads_exist(self):
     trimmed_reads = self.get_trimmed_reads()
     return mod_utils.file_exists(trimmed_reads)
예제 #20
0
 def trimmed_reads_exist(self):
     trimmed_reads = self.get_trimmed_reads()
     return mod_utils.file_exists(trimmed_reads)
예제 #21
0
    def process_settings(self, settings_file):
        """
        - reads the settings file and converts str to float, list, etc.
        - stores result in self.settings as a dict()
        """
        int_keys = [
            'first_base_to_keep', 'last_base_to_keep',
            'min_post_adaptor_length', 'min_base_quality',
            'min_mapping_quality'
        ]
        float_keys = [
            'confidence_interval_cutoff', 'fold_change_cutoff',
            'winsorization_upper_limit'
        ]
        str_keys = [
            'adaptor_sequence', 'rrna_fasta', 'experiment_name',
            'affected_nucleotides', 'pymol_base_script',
            'pymol_base_script_colorchange', 'tptn_file_18s', 'tptn_file_25s'
        ]
        boolean_keys = ['make_interactive_plots']
        list_str_keys = [
            'fastq_gz_files', 'sample_names', 'experimentals',
            'no_mod_controls', 'with_mod_controls', 'exclude_constitutive'
        ]
        #list_float_keys = ['probe_concentrations']
        config = ConfigParser.ConfigParser()
        config.read(settings_file)
        settings = {}
        for section in config.sections():
            for option in config.options(section):
                settings[option] = config.get(section, option)
                settings[section] = True
        for k in int_keys:
            settings[k] = int(settings[k])
        for k in str_keys:
            settings[k] = settings[k]
        for k in float_keys:
            settings[k] = float(settings[k])
        for k in boolean_keys:
            if not settings[k].lower() in ['true', 'false']:
                raise ValueError('Boolean value %s must be "true" or "false"' %
                                 k)
            settings[k] = settings[k].lower() == 'true'
        #for k in list_float_keys:
        #    settings[k] = map(float, simplejson.loads(settings[k]))
        #for k in list_int_keys:
        #    settings[k] = map(int, simplejson.loads(settings[k]))
        for k in list_str_keys:
            settings[k] = simplejson.loads(settings[k])
        self.fqdir = settings['fastq_dir']
        self.sample_names = settings['sample_names']
        self.experimentals = settings['experimentals']
        self.no_mod_controls = settings['no_mod_controls']
        self.with_mod_controls = settings['with_mod_controls']
        self.exclude_constitutive = settings['exclude_constitutive']
        try:
            assert len(self.experimentals) == len(self.no_mod_controls)
            assert len(self.experimentals) == len(self.with_mod_controls)
        except:
            print 'error: experimentals, no_mod_controls, and with_mod_controls should all be the same length'
            print 'for mutation rate purposes, its ok to reuse a dataset here, it really doesnt matter'
        try:
            for sample_name in self.experimentals + self.no_mod_controls + self.with_mod_controls:
                assert sample_name in self.sample_names
        except:
            print sample_name, ' not in sample names, make sure you are using regular quotation marks'

        self.fastq_gz_file_handles = [
            os.path.join(self.fqdir, fastq_gz_file)
            for fastq_gz_file in settings['fastq_gz_files']
        ]
        for file_handle in self.fastq_gz_file_handles:
            assert mod_utils.file_exists(file_handle)
        self.settings = settings
        self.rdir = settings['results_dir']
        mod_utils.make_dir(self.rdir)
        shutil.copy(settings_file, self.rdir)
예제 #22
0
 def filtered_reads_exist(self):
     filtered_reads = self.get_filtered_reads()
     return mod_utils.file_exists(filtered_reads)