コード例 #1
0
 def get_log(self):
     mod_utils.make_dir(
         os.path.join(self.experiment_settings.get_rdir(), 'logs'))
     log = os.path.join(
         self.experiment_settings.get_rdir(), 'logs',
         '%(sample_name)s.log' % {'sample_name': self.sample_name})
     return log
コード例 #2
0
 def write_wigs(self,
                suffix,
                subtract_background=False,
                subtract_control=False):
     mod_utils.make_dir(self.rdir_path('mutation_wigs'))
     mod_utils.make_dir(self.rdir_path('rt_stop_wigs'))
     if subtract_background or subtract_control:
         libs_to_write = self.get_normalizable_libs()
     else:
         libs_to_write = self.libs
     #will also write a file to make batch import into mochiview easier
     f = open(
         os.path.join(self.rdir_path('mutation_wigs'),
                      'mochi_batch_' + suffix + '.txt'), 'w')
     f.write('SEQUENCE_SET\tFILE_NAME\tDATA_TYPE\tNAME\n')
     for lib in libs_to_write:
         f.write('<replace>\t%s\t<replace>\t%s\n' %
                 (lib.lib_settings.sample_name + '_' + suffix + '.wig.gz',
                  lib.lib_settings.sample_name + '_' + suffix))
         lib.write_mutation_rates_to_wig(
             os.path.join(self.rdir_path('mutation_wigs'),
                          lib.lib_settings.sample_name + '_' + suffix),
             subtract_background=subtract_background,
             subtract_control=subtract_control)
         lib.write_rt_stops_to_wig(
             os.path.join(self.rdir_path('rt_stop_wigs'),
                          lib.lib_settings.sample_name + '_' + suffix))
     f.close()
コード例 #3
0
ファイル: mod_settings.py プロジェクト: borisz264/mod_seq
 def get_log(self):
     mod_utils.make_dir(os.path.join(self.experiment_settings.get_rdir(), 'logs'))
     log = os.path.join(
       self.experiment_settings.get_rdir(),
       'logs',
       '%(sample_name)s.log' %
        {'sample_name': self.sample_name})
     return log
コード例 #4
0
ファイル: mod_seq_main.py プロジェクト: borisz264/mod_seq
    def make_plots(self, exclude_constitutive=False):
        if exclude_constitutive:
            mod_utils.make_dir(self.rdir_path('plots', 'exclude_constitutive'))
            mod_utils.make_dir(self.rdir_path('plots', 'exclude_constitutive', 'functional_groups'))
            mod_utils.make_dir(self.rdir_path('plots', 'exclude_constitutive', 'interactive'))
            rdir = self.rdir_path('plots','exclude_constitutive')
            file_tag = '_exclude_constitutive'
            mod_plotting.generate_roc_curves(self.settings.get_property('tptn_file_25s'), self.settings.rRNA_seqs, os.path.join(rdir, '25S_ROC_curves'), self.get_modified_libs(), 'S.c.25S__rRNA', self.settings.get_property('affected_nucleotides'))
            mod_plotting.generate_roc_curves(self.settings.get_property('tptn_file_18s'), self.settings.rRNA_seqs, os.path.join(rdir, '18S_ROC_curves'), self.get_modified_libs(), 'S.c.18S_rRNA', self.settings.get_property('affected_nucleotides'))
            mod_plotting.plot_functional_group_changes(self.get_normalizable_libs(), os.path.join(rdir, 'functional_groups', 'group_changes'),
                                                       self.settings.get_property('functional_groupings'),
                                                       nucleotides_to_count=self.settings.get_property('affected_nucleotides'),
                                                       exclude_constitutive=exclude_constitutive,
                                                       max_fold_reduction=0.001, max_fold_increase=100)

        else:
            mod_utils.make_dir(self.rdir_path('plots'))
            mod_utils.make_dir(self.rdir_path('plots', 'interactive'))
            rdir = self.rdir_path('plots')
            file_tag = ''

        mod_plotting.plot_mutated_nts_pie(self.libs, os.path.join(rdir, 'raw_mutation_fractions'+file_tag), exclude_constitutive=exclude_constitutive)
        mod_plotting.plot_mutation_breakdown_pie(self.libs, os.path.join(rdir, 'raw_mutation_types'+file_tag), exclude_constitutive=exclude_constitutive)

        mod_plotting.plot_mutated_nts_pie(self.libs,
                                          os.path.join(rdir, 'background_sub_mutation_fractions'+file_tag),
                                          subtract_background = True, exclude_constitutive=exclude_constitutive)
        mod_plotting.plot_mutation_rate_cdfs(self.libs, os.path.join(rdir, 'mutation_rate_cdf'+file_tag),
                                             nucleotides_to_count=self.settings.get_property('affected_nucleotides'),
                                             exclude_constitutive=exclude_constitutive)

        mod_plotting.plot_mutation_rate_violins(self.libs, os.path.join(rdir, 'mutation_rate_cdf'+file_tag),
                                             nucleotides_to_count=self.settings.get_property('affected_nucleotides'),
                                             exclude_constitutive=exclude_constitutive)

        mod_plotting.plot_changes_vs_control(self.get_normalizable_libs(), os.path.join(rdir, 'changes'+file_tag),
                                             nucleotides_to_count=self.settings.get_property('affected_nucleotides'),
                                             exclude_constitutive=exclude_constitutive)
        mod_plotting.ma_plots(self.get_normalizable_libs(), os.path.join(rdir, 'MA'+file_tag),
                                             nucleotides_to_count=self.settings.get_property('affected_nucleotides'),
                                             exclude_constitutive=exclude_constitutive)
        if self.settings.get_property('make_interactive_plots'):

                # mod_plotting.plot_changes_vs_control_interactive(self.get_normalizable_libs(), os.path.join(rdir, 'interactive', 'changes'+file_tag),
                #                                          nucleotides_to_count=self.settings.get_property('affected_nucleotides'),
                #                                          exclude_constitutive=False)


                mod_plotting.ma_plots_interactive(self.get_normalizable_libs(), os.path.join(rdir, 'interactive', 'MA'+file_tag),
                                                         nucleotides_to_count=self.settings.get_property('affected_nucleotides'),
                                                         exclude_constitutive=False)
コード例 #5
0
 def generate_mapping_index(self):
     """
     builds a STAR index from the input fasta file
     """
     self.settings.write_to_log('building STAR index')
     if not self.settings.star_index_exists():
         mod_utils.make_dir(self.settings.get_star_index())
         subprocess.Popen(
             'STAR --runThreadN %d --runMode genomeGenerate --genomeDir %s --genomeFastaFiles %s --genomeSAindexNbases 4 1>>%s 2>>%s'
             % (self.threads, self.settings.get_star_index(),
                self.settings.get_rRNA_fasta(), self.settings.get_log(),
                self.settings.get_log()),
             shell=True).wait()
     self.settings.write_to_log('building STAR index complete')
コード例 #6
0
ファイル: mod_seq_main.py プロジェクト: borisz264/mod_seq
 def write_wigs(self, suffix, subtract_background=False, subtract_control=False):
     mod_utils.make_dir(self.rdir_path('wigs'))
     if subtract_background or subtract_control:
         libs_to_write = self.get_normalizable_libs()
     else:
         libs_to_write = self.libs
     #will also write a file to make batch import into mochiview easier
     f = open(os.path.join(self.rdir_path('wigs'), 'mochi_batch_'+suffix+'.txt'), 'w')
     f.write('SEQUENCE_SET\tFILE_NAME\tDATA_TYPE\tNAME\n')
     for lib in libs_to_write:
         f.write('<replace>\t%s\t<replace>\t%s\n' % (lib.lib_settings.sample_name+'_'+suffix+'.wig.gz', lib.lib_settings.sample_name+'_'+suffix))
         lib.write_mutation_rates_to_wig(os.path.join(self.rdir_path('wigs'), lib.lib_settings.sample_name+'_'+suffix),
                                   subtract_background=subtract_background, subtract_control=subtract_control)
     f.close()
コード例 #7
0
 def annotate_structures(self, exclude_constitutive=False):
     if exclude_constitutive:
         mod_utils.make_dir(
             self.rdir_path('structures', 'protections_highlighted',
                            'exclude_constitutive'))
         mod_utils.make_dir(
             self.rdir_path('structures', 'colored_by_change',
                            'exclude_constitutive'))
         file_tag = '_exclude_constitutive'
     else:
         mod_utils.make_dir(
             self.rdir_path('structures', 'protections_highlighted'))
         mod_utils.make_dir(
             self.rdir_path('structures', 'colored_by_change'))
         file_tag = ''
     if exclude_constitutive:
         mod_plotting.highlight_structure(
             self.get_normalizable_libs(),
             self.rdir_path('structures', 'protections_highlighted',
                            'exclude_constitutive'),
             nucleotides_to_count=self.settings.get_property(
                 'affected_nucleotides'),
             exclude_constitutive=exclude_constitutive)
         # mod_plotting.color_by_change(self.get_normalizable_libs(), self.rdir_path('structures', 'colored_by_change', 'exclude_constitutive'),
         #                              nucleotides_to_count=self.settings.get_property('affected_nucleotides'),
         #                              exclude_constitutive=exclude_constitutive)
     else:
         mod_plotting.highlight_structure(
             self.get_normalizable_libs(),
             self.rdir_path('structures', 'protections_highlighted'),
             nucleotides_to_count=self.settings.get_property(
                 'affected_nucleotides'),
             exclude_constitutive=exclude_constitutive)
コード例 #8
0
ファイル: mod_seq_main.py プロジェクト: borisz264/mod_seq
    def remove_adaptor(self):
        if not self.settings.get_property('force_retrim'):
            for lib_settings in self.settings.iter_lib_settings():
                if not lib_settings.adaptorless_reads_exist():
                    break
            else:
                return

        if self.settings.get_property('trim_adaptor'):
            self.settings.write_to_log( 'trimming adaptors')
            mod_utils.make_dir(self.rdir_path('adaptor_removed'))
            mod_utils.parmap(lambda lib_setting: self.remove_adaptor_one_lib(lib_setting),
                           self.settings.iter_lib_settings(), nprocs=self.threads)
            self.settings.write_to_log( 'trimming adaptors done')
コード例 #9
0
ファイル: mod_seq_main.py プロジェクト: borisz264/mod_seq
 def trim_reads(self):
     """
     Trim reads by given amount, removing potential random barcoding sequences from 5' end
     Trimming from 3' end can also help if mapping is problematic by reducing chance for indels to prevent mapping
     :return:
     """
     self.settings.write_to_log( 'trimming reads')
     if not self.settings.get_property('force_retrim'):
         for lib_settings in self.settings.iter_lib_settings():
             if not lib_settings.trimmed_reads_exist():
                 break
         else:
             return
     mod_utils.make_dir(self.rdir_path('trimmed_reads'))
     mod_utils.parmap(lambda lib_setting: self.trim_one_lib(lib_setting), self.settings.iter_lib_settings(),
                    nprocs = self.threads)
     self.settings.write_to_log('trimming reads complete')
コード例 #10
0
 def remove_adaptor(self):
     self.settings.write_to_log('removing adaptors with skewer')
     for lib_settings in self.settings.iter_lib_settings():
         if not lib_settings.adaptorless_reads_exist():
             break
     else:
         self.settings.write_to_log('using existing adaptor-trimmed reads')
         return
     mod_utils.make_dir(self.rdir_path('adaptor_removed'))
     num_datasets = len([lib for lib in self.settings.iter_lib_settings()])
     num_instances = min(num_datasets, self.threads)
     threads_per_instance = self.threads / num_instances
     mod_utils.parmap(lambda lib_setting: self.remove_adaptor_one_lib(
         lib_setting, threads_per_instance),
                      self.settings.iter_lib_settings(),
                      nprocs=num_instances)
     self.settings.write_to_log('removing adaptors done')
コード例 #11
0
ファイル: mod_seq_main.py プロジェクト: borisz264/mod_seq
 def collapse_identical_reads(self):
     """
     collapses all identical reads using FASTX toolkit
     :return:
     """
     self.settings.write_to_log('collapsing reads')
     if not self.settings.get_property('force_recollapse'):
         for lib_settings in self.settings.iter_lib_settings():
             if not lib_settings.collapsed_reads_exist():
                 break
         else:
             return
     mod_utils.make_dir(self.rdir_path('collapsed_reads'))
     if self.settings.get_property('collapse_identical_reads'):
         mod_utils.parmap(lambda lib_setting: self.collapse_one_fastq_file(lib_setting), self.settings.iter_lib_settings(),
                          nprocs = self.threads)
     else:
         mod_utils.parmap(lambda lib_setting: self.fastq_to_fasta(lib_setting), self.settings.iter_lib_settings(),
                          nprocs = self.threads)
     self.settings.write_to_log('collapsing reads complete')
コード例 #12
0
 def trim_reads(self):
     """
     Trim reads by given amount, removing potential random barcoding sequences from 5' end
     Trimming from 3' end can also help if mapping is problematic by reducing chance for indels to prevent mapping
     :return:
     """
     self.settings.write_to_log('trimming reads with seqtk')
     for lib_settings in self.settings.iter_lib_settings():
         if not lib_settings.trimmed_reads_exist():
             break
     else:
         self.settings.write_to_log('using existing trimmed reads')
         return
     mod_utils.make_dir(self.rdir_path('trimmed_reads'))
     num_datasets = len([lib for lib in self.settings.iter_lib_settings()])
     num_instances = min(num_datasets, self.threads)
     threads_per_instance = max((self.threads / num_instances) - 1, 1)
     mod_utils.parmap(lambda lib_setting: self.trim_one_lib(
         lib_setting, threads_per_instance),
                      self.settings.iter_lib_settings(),
                      nprocs=self.threads)
     self.settings.write_to_log('trimming reads complete')
コード例 #13
0
 def run_shapemapper(self):
     """
     runs shapemapper2.0 on the samples in batches
     :return:
     """
     self.settings.write_to_log('running shapemapper')
     if self.need_to_run_shapemapper():
         mod_utils.make_dir(self.rdir_path('shapemapper'))
         all_settings = [
             lib_setting
             for lib_setting in self.settings.iter_lib_settings()
         ]
         num_datasets = len(all_settings)
         num_instances = min(num_datasets, self.threads)
         threads_per_instance = self.threads / num_instances
         mod_utils.parmap(lambda lib_setting: self.run_single_shapemapper(
             lib_setting, threads_per_instance),
                          all_settings,
                          nprocs=num_instances)
     else:
         self.settings.write_to_log('using existing shapemapper output')
     self.settings.write_to_log('done running shapemapper')
コード例 #14
0
 def map_reads(self):
     """
     map all reads using STAR
     :return:
     """
     self.settings.write_to_log('mapping reads')
     for lib_settings in self.settings.iter_lib_settings():
         if not lib_settings.mapped_reads_exist():
             break
     else:
         return
     mod_utils.make_dir(self.rdir_path('mapped_reads'))
     all_settings = [
         lib_setting for lib_setting in self.settings.iter_lib_settings()
     ]
     num_datasets = len(all_settings)
     num_instances = min(num_datasets, self.threads)
     threads_per_instance = max(self.threads / num_instances - 1, 1)
     mod_utils.parmap(lambda lib_setting: self.map_one_library(
         lib_setting, threads_per_instance),
                      all_settings,
                      nprocs=num_instances)
     self.settings.write_to_log('finished mapping reads')
コード例 #15
0
def main():
    outfolder, genome_fasta, normalization_file_name = sys.argv[1:4]
    experimental_file_names = sys.argv[4:]
    mod_utils.make_dir(outfolder)
    normalization_dict = mod_utils.unPickle(normalization_file_name)
    norm_name = '.'.join(os.path.basename(normalization_file_name).split('.')[:-2])
    experimental_dict_names = ['.'.join(os.path.basename(file_name).split('.')[:-2]) for file_name in experimental_file_names]
    experimental_dicts = [mod_utils.unPickle(file_name) for file_name in experimental_file_names]

    normed_mutation_rate_histogram(experimental_dicts, experimental_dict_names, os.path.join(outfolder, 'mutation_rate_histogram'), title='nonzero positions')
    background_subtracted_sets = []
    write_wig(normalization_dict, norm_name, os.path.join(outfolder, norm_name))
    for i in range(len(experimental_dict_names)):
        write_wig(experimental_dicts[i], experimental_dict_names[i], os.path.join(outfolder, experimental_dict_names[i]))
        background_subtracted = subtract_background(experimental_dicts[i], normalization_dict)
        background_subtracted_sets.append(background_subtracted)
        mod_utils.makePickle(background_subtracted, os.path.join(outfolder, experimental_dict_names[i]+'_subtracted.pkl'))
        write_wig(background_subtracted, experimental_dict_names[i]+'_subtracted', os.path.join(outfolder, experimental_dict_names[i]+'_subtracted'))
        try:
            plot_weighted_nts_pie(background_subtracted, genome_fasta, '%s backround-subtracted fractions' % experimental_dict_names[i], os.path.join(outfolder, experimental_dict_names[i]+'_sub_pie'))
        except:
            pass
    normed_mutation_rate_histogram(background_subtracted_sets, experimental_dict_names, os.path.join(outfolder, 'back_subtracted_mutation_rate_histogram'), title = 'nonzero positions, background subtracted')
コード例 #16
0
ファイル: mod_seq_main.py プロジェクト: borisz264/mod_seq
 def make_tables(self, exclude_constitutive=False):
     subfolders = ['raw', 'background_subtracted', 'control_subtracted', 'fold_change']
     for subfolder in subfolders:
         mod_utils.make_dir(self.rdir_path('tables', subfolder))
         mod_utils.make_dir(self.rdir_path('pickles', subfolder))
         mod_utils.make_dir(self.rdir_path('tables', subfolder, 'exclude_constitutive'))
         mod_utils.make_dir(self.rdir_path('pickles', subfolder, 'exclude_constitutive'))
     self.pickle_mutation_rates('mutation_rates.pkl', exclude_constitutive=exclude_constitutive)
     self.pickle_mutation_rates('back_subtracted_mutation_rates.pkl', subtract_background=True, exclude_constitutive=exclude_constitutive)
     self.pickle_mutation_rates('control_subtracted_mutation_rates.pkl', subtract_control=True, exclude_constitutive=exclude_constitutive)
     self.pickle_fold_changes('mutation_rate_fold_changes.pkl', exclude_constitutive=True)
     self.write_wigs('')
     self.write_wigs('back_subtract', subtract_background=True)
     self.write_wigs('control_subtract', subtract_control=True)
     self.write_mutation_rates_tsv('mutation_rates.tsv', exclude_constitutive=exclude_constitutive)
     self.write_mutation_rates_tsv('back_subtracted_mutation_rates.tsv', subtract_background=True, exclude_constitutive=exclude_constitutive)
     self.write_mutation_rates_tsv('control_subtracted_mutation_rates.tsv', subtract_control=True, exclude_constitutive=exclude_constitutive)
     self.write_combined_mutation_rates_tsv()
     self.write_combined_mutation_rates_tsv(exclude_constitutive=True)
コード例 #17
0
    def make_tables(self, exclude_constitutive=False):
        #subfolders = ['raw', 'background_subtracted', 'control_subtracted', 'fold_change']
        subfolders = ['raw', 'fold_change']
        for subfolder in subfolders:
            mod_utils.make_dir(self.rdir_path('rt_stop_tables', subfolder))
            mod_utils.make_dir(self.rdir_path('mutation_tables', subfolder))
            #mod_utils.make_dir(self.rdir_path('pickles', subfolder))
            mod_utils.make_dir(
                self.rdir_path('rt_stop_tables', subfolder,
                               'exclude_constitutive'))
            mod_utils.make_dir(
                self.rdir_path('mutation_tables', subfolder,
                               'exclude_constitutive'))
            #mod_utils.make_dir(self.rdir_path('pickles', subfolder, 'exclude_constitutive'))
        #self.pickle_mutation_rates('mutation_rates.pkl', exclude_constitutive=exclude_constitutive)
        #self.pickle_mutation_rates('back_subtracted_mutation_rates.pkl', subtract_background=True, exclude_constitutive=exclude_constitutive)
        #self.pickle_mutation_rates('control_subtracted_mutation_rates.pkl', subtract_control=True, exclude_constitutive=exclude_constitutive)
        #self.pickle_fold_changes('mutation_rate_fold_changes.pkl', exclude_constitutive=True)
        self.write_wigs('')
        #self.write_wigs('back_subtract', subtract_background=True)
        #self.write_wigs('control_subtract', subtract_control=True)
        self.write_mutation_rates_tsv(
            'mutation_rates.tsv', exclude_constitutive=exclude_constitutive)
        #self.write_mutation_rates_tsv('back_subtracted_mutation_rates.tsv', subtract_background=True, exclude_constitutive=exclude_constitutive)
        self.write_mutation_rates_tsv(
            'control_subtracted_mutation_rates_lowess.tsv',
            subtract_control=True,
            exclude_constitutive=exclude_constitutive,
            lowess_correct=True)
        self.write_mutation_rates_tsv(
            'control_subtracted_mutation_rates.tsv',
            subtract_control=True,
            exclude_constitutive=exclude_constitutive,
            lowess_correct=False)

        #self.write_mutation_rates_tsv('lowess_control_subtracted_mutation_rates.tsv', subtract_control=True,
        #                              exclude_constitutive=exclude_constitutive, lowess_correct=True)
        self.write_combined_mutation_rates_tsv()
        self.write_combined_mutation_counts_tsv()
        self.write_combined_rt_stop_tsv(type='rpm')
        self.write_combined_rt_stop_tsv(type='count')
        self.write_combined_rt_stop_tsv(type='score')
コード例 #18
0
ファイル: mod_seq_main.py プロジェクト: borisz264/mod_seq
 def annotate_structures(self, exclude_constitutive=False):
     if exclude_constitutive:
         mod_utils.make_dir(self.rdir_path('structures', 'protections_highlighted', 'exclude_constitutive'))
         mod_utils.make_dir(self.rdir_path('structures', 'colored_by_change', 'exclude_constitutive'))
         file_tag = '_exclude_constitutive'
     else:
         mod_utils.make_dir(self.rdir_path('structures', 'protections_highlighted'))
         mod_utils.make_dir(self.rdir_path('structures', 'colored_by_change'))
         file_tag = ''
     if exclude_constitutive:
         mod_plotting.highlight_structure(self.get_normalizable_libs(), self.rdir_path('structures', 'protections_highlighted', 'exclude_constitutive'),
                                          nucleotides_to_count=self.settings.get_property('affected_nucleotides'),
                                          exclude_constitutive=exclude_constitutive)
         mod_plotting.color_by_change(self.get_normalizable_libs(), self.rdir_path('structures', 'colored_by_change', 'exclude_constitutive'),
                                      nucleotides_to_count=self.settings.get_property('affected_nucleotides'),
                                      exclude_constitutive=exclude_constitutive)
     else:
         mod_plotting.highlight_structure(self.get_normalizable_libs(), self.rdir_path('structures', 'protections_highlighted'),
                              nucleotides_to_count=self.settings.get_property('affected_nucleotides'),
                              exclude_constitutive=exclude_constitutive)
         mod_plotting.color_by_change(self.get_normalizable_libs(), self.rdir_path('structures', 'colored_by_change'),
                                      nucleotides_to_count=self.settings.get_property('affected_nucleotides'),
                                      exclude_constitutive=exclude_constitutive)
コード例 #19
0
    def process_settings(self, settings_file):
        """
        - reads the settings file and converts str to float, list, etc.
        - stores result in self.settings as a dict()
        """
        int_keys = [
            'first_base_to_keep', 'last_base_to_keep',
            'min_post_adaptor_length', 'min_base_quality',
            'min_mapping_quality'
        ]
        float_keys = [
            'confidence_interval_cutoff', 'fold_change_cutoff',
            'winsorization_upper_limit'
        ]
        str_keys = [
            'adaptor_sequence', 'rrna_fasta', 'experiment_name',
            'affected_nucleotides', 'pymol_base_script',
            'pymol_base_script_colorchange', 'tptn_file_18s', 'tptn_file_25s'
        ]
        boolean_keys = ['make_interactive_plots']
        list_str_keys = [
            'fastq_gz_files', 'sample_names', 'experimentals',
            'no_mod_controls', 'with_mod_controls', 'exclude_constitutive'
        ]
        #list_float_keys = ['probe_concentrations']
        config = ConfigParser.ConfigParser()
        config.read(settings_file)
        settings = {}
        for section in config.sections():
            for option in config.options(section):
                settings[option] = config.get(section, option)
                settings[section] = True
        for k in int_keys:
            settings[k] = int(settings[k])
        for k in str_keys:
            settings[k] = settings[k]
        for k in float_keys:
            settings[k] = float(settings[k])
        for k in boolean_keys:
            if not settings[k].lower() in ['true', 'false']:
                raise ValueError('Boolean value %s must be "true" or "false"' %
                                 k)
            settings[k] = settings[k].lower() == 'true'
        #for k in list_float_keys:
        #    settings[k] = map(float, simplejson.loads(settings[k]))
        #for k in list_int_keys:
        #    settings[k] = map(int, simplejson.loads(settings[k]))
        for k in list_str_keys:
            settings[k] = simplejson.loads(settings[k])
        self.fqdir = settings['fastq_dir']
        self.sample_names = settings['sample_names']
        self.experimentals = settings['experimentals']
        self.no_mod_controls = settings['no_mod_controls']
        self.with_mod_controls = settings['with_mod_controls']
        self.exclude_constitutive = settings['exclude_constitutive']
        try:
            assert len(self.experimentals) == len(self.no_mod_controls)
            assert len(self.experimentals) == len(self.with_mod_controls)
        except:
            print 'error: experimentals, no_mod_controls, and with_mod_controls should all be the same length'
            print 'for mutation rate purposes, its ok to reuse a dataset here, it really doesnt matter'
        try:
            for sample_name in self.experimentals + self.no_mod_controls + self.with_mod_controls:
                assert sample_name in self.sample_names
        except:
            print sample_name, ' not in sample names, make sure you are using regular quotation marks'

        self.fastq_gz_file_handles = [
            os.path.join(self.fqdir, fastq_gz_file)
            for fastq_gz_file in settings['fastq_gz_files']
        ]
        for file_handle in self.fastq_gz_file_handles:
            assert mod_utils.file_exists(file_handle)
        self.settings = settings
        self.rdir = settings['results_dir']
        mod_utils.make_dir(self.rdir)
        shutil.copy(settings_file, self.rdir)
コード例 #20
0
 def get_wdir(self):
     mod_utils.make_dir(self.wdir)
     return self.wdir
コード例 #21
0
ファイル: mod_settings.py プロジェクト: borisz264/mod_seq
 def get_wdir(self):
     mod_utils.make_dir(self.wdir)
     return self.wdir
コード例 #22
0
ファイル: mod_settings.py プロジェクト: borisz264/mod_seq
    def process_settings(self, settings_file):
        """
        - reads the settings file and converts str to float, list, etc.
        - stores result in self.settings as a dict()
        """
        int_keys = [ 'first_base_to_keep', 'last_base_to_keep', 'min_post_adaptor_length', 'min_base_quality', 'min_mapping_quality']
        float_keys = ['confidence_interval_cutoff', 'fold_change_cutoff']
        str_keys = ['adaptor_sequence', 'rrna_fasta', 'experiment_name', 'shapemapper_ref_file', 'affected_nucleotides', 'pymol_base_script', 'pymol_base_script_colorchange', 'tptn_file_18s', 'tptn_file_25s', 'functional_groupings']
        boolean_keys = ['collapse_identical_reads', 'force_read_resplit', 'force_remapping', 'force_recollapse',
                        'force_recount', 'force_index_rebuild', 'force_retrim', 'trim_adaptor', 'discard_untrimmed', 'force_shapemapper',
                        'make_interactive_plots']
        list_str_keys = ['fastq_gz_files', 'sample_names', 'experimentals', 'no_mod_controls', 'with_mod_controls', 'exclude_constitutive']
        #list_float_keys = ['probe_concentrations']
        config = ConfigParser.ConfigParser()
        config.read(settings_file)
        settings = {}
        for section in config.sections():
            for option in config.options(section):
                settings[option] = config.get(section, option)
                settings[section] = True
        for k in int_keys:
            settings[k] = int(settings[k])
        for k in str_keys:
            settings[k] = settings[k]
        for k in float_keys:
            settings[k] = float(settings[k])
        for k in boolean_keys:
            if not settings[k].lower() in ['true', 'false']:
                raise ValueError(
                  'Boolean value %s must be "true" or "false"' % k)
            settings[k] = settings[k].lower() == 'true'
        #for k in list_float_keys:
        #    settings[k] = map(float, simplejson.loads(settings[k]))
        #for k in list_int_keys:
        #    settings[k] = map(int, simplejson.loads(settings[k]))
        for k in list_str_keys:
            settings[k] = simplejson.loads(settings[k])
        self.fqdir = settings['fastq_dir']
        self.sample_names = settings['sample_names']
        self.experimentals = settings['experimentals']
        self.no_mod_controls = settings['no_mod_controls']
        self.with_mod_controls = settings['with_mod_controls']
        self.exclude_constitutive = settings['exclude_constitutive']
        try:
            assert len(self.experimentals) == len(self.no_mod_controls)
            assert len(self.experimentals) == len(self.with_mod_controls)
        except:
            print 'error: experimentals, no_mod_controls, and with_mod_controls should all be the same length'
            print 'for mutation rate purposes, its ok to reuse a dataset here, it really doesnt matter'
        try:
            for sample_name in self.experimentals+self.no_mod_controls+self.with_mod_controls:
                assert sample_name in self.sample_names
        except:
            print sample_name, ' not in sample names, make sure you are using regular quotation marks'

        self.fastq_gz_file_handles = [os.path.join(self.fqdir, fastq_gz_file) for fastq_gz_file in
                                      settings['fastq_gz_files']]
        for file_handle in self.fastq_gz_file_handles:
            assert mod_utils.file_exists(file_handle)
        self.settings = settings
        self.rdir = settings['results_dir']
        mod_utils.make_dir(self.rdir)
        shutil.copy(settings_file, self.rdir)
コード例 #23
0
    def make_plots(self, exclude_constitutive=False):
        if exclude_constitutive:
            mod_utils.make_dir(
                self.rdir_path('mutation_plots', 'exclude_constitutive'))
            mod_utils.make_dir(
                self.rdir_path('mutation_plots', 'exclude_constitutive',
                               'interactive'))
            mut_dir = self.rdir_path('mutation_plots', 'exclude_constitutive')
            mod_utils.make_dir(
                self.rdir_path('rt_stop_plots', 'exclude_constitutive'))
            mod_utils.make_dir(
                self.rdir_path('rt_stop_plots', 'exclude_constitutive',
                               'interactive'))
            stop_dir = self.rdir_path('rt_stop_plots', 'exclude_constitutive')
            file_tag = '_exclude_constitutive'
            #TODO: the names for the ROC curve chromosomes are hard coded and need to be changed between samples
            #mod_plotting.generate_roc_curves(self.settings.get_property('tptn_file_25s'), self.settings.rRNA_seqs, os.path.join(rdir, '23S_ROC_curves'), self.get_modified_libs(), 'E.c.23S_rRNA', self.settings.get_property('affected_nucleotides'))
            #mod_plotting.generate_roc_curves(self.settings.get_property('tptn_file_18s'), self.settings.rRNA_seqs, os.path.join(rdir, '16S_ROC_curves'), self.get_modified_libs(), 'E.c.16S_rRNA', self.settings.get_property('affected_nucleotides'))
        else:
            mod_utils.make_dir(self.rdir_path('mutation_plots'))
            mod_utils.make_dir(self.rdir_path('mutation_plots', 'interactive'))
            mut_dir = self.rdir_path('mutation_plots')
            mod_utils.make_dir(self.rdir_path('rt_stop_plots'))
            mod_utils.make_dir(self.rdir_path('rt_stop_plots', 'interactive'))
            stop_dir = self.rdir_path('rt_stop_plots')
            file_tag = ''
        #MUTATION PLOTS
        mod_plotting.plot_mutated_nts_pie(
            self.libs,
            os.path.join(mut_dir, 'raw_mutation_fractions' + file_tag),
            exclude_constitutive=exclude_constitutive)
        mod_plotting.plot_mutation_breakdown_pie(
            self.libs,
            os.path.join(mut_dir, 'raw_mutation_types' + file_tag),
            exclude_constitutive=exclude_constitutive)

        mod_plotting.plot_mutated_nts_pie(
            self.libs,
            os.path.join(mut_dir,
                         'background_sub_mutation_fractions' + file_tag),
            subtract_background=True,
            exclude_constitutive=exclude_constitutive)

        mod_plotting.plot_mutation_rate_cdfs(
            self.libs,
            os.path.join(mut_dir, 'mutation_rate_cdf' + file_tag),
            nucleotides_to_count=self.settings.get_property(
                'affected_nucleotides'),
            exclude_constitutive=exclude_constitutive)

        mod_plotting.plot_mutation_rate_violins(
            self.libs,
            os.path.join(mut_dir, 'mutation_rate_violin' + file_tag),
            nucleotides_to_count=self.settings.get_property(
                'affected_nucleotides'),
            exclude_constitutive=exclude_constitutive)
        mod_plotting.ma_plots_by_count(
            self.get_normalizable_libs(),
            os.path.join(mut_dir, 'MA_raw_counts' + file_tag),
            nucleotides_to_count=self.settings.get_property(
                'affected_nucleotides'),
            exclude_constitutive=exclude_constitutive)
        mod_plotting.ma_plots_by_count(
            self.get_normalizable_libs(),
            os.path.join(mut_dir, 'MA_raw_counts_lowess' + file_tag),
            nucleotides_to_count=self.settings.get_property(
                'affected_nucleotides'),
            exclude_constitutive=exclude_constitutive,
            lowess_correct=True)
        mod_plotting.mutation_rate_scatter(
            self.get_normalizable_libs(),
            os.path.join(mut_dir, 'scatter_mismatch_rate' + file_tag),
            nucleotides_to_count=self.settings.get_property(
                'affected_nucleotides'),
            exclude_constitutive=exclude_constitutive)

        if self.settings.get_property('make_interactive_plots'):
            mod_plotting.scatter_interactive(
                self.get_normalizable_libs(),
                os.path.join(mut_dir, 'interactive', 'scatter' + file_tag),
                nucleotides_to_count=self.settings.get_property(
                    'affected_nucleotides'),
                exclude_constitutive=False)
            mod_plotting.ma_plots_interactive_by_count(
                self.get_normalizable_libs(),
                os.path.join(mut_dir, 'interactive', 'MA_counts' + file_tag),
                nucleotides_to_count=self.settings.get_property(
                    'affected_nucleotides'),
                exclude_constitutive=False)
            mod_plotting.ma_plots_interactive_by_count(
                self.get_normalizable_libs(),
                os.path.join(mut_dir, 'interactive',
                             'MA_counts_lowess' + file_tag),
                nucleotides_to_count=self.settings.get_property(
                    'affected_nucleotides'),
                exclude_constitutive=False,
                lowess_correct=True)

    #RT STOP PLOTS
        mod_plotting.plot_rt_stop_pie(
            self.libs,
            os.path.join(stop_dir, 'raw_rt_stops' + file_tag),
            exclude_constitutive=exclude_constitutive)
        mod_plotting.plot_rt_stop_pie(
            self.libs,
            os.path.join(stop_dir, 'back_sub_rt_stops' + file_tag),
            subtract_background=True,
            exclude_constitutive=exclude_constitutive)
        mod_plotting.plot_rt_stop_cdfs(
            self.libs,
            os.path.join(stop_dir, 'rt_stop_cdf' + file_tag),
            nucleotides_to_count=self.settings.get_property(
                'affected_nucleotides'),
            exclude_constitutive=exclude_constitutive)
        mod_plotting.plot_rt_stop_violins(
            self.libs,
            os.path.join(stop_dir, 'rt_stop_violin' + file_tag),
            nucleotides_to_count=self.settings.get_property(
                'affected_nucleotides'),
            exclude_constitutive=exclude_constitutive)