def make_genome_mapping_index(self): make_index = False if ribo_utils.file_exists( self.settings.get_property('star_genome_dir')): self.settings.write_to_log( 'STAR index exists at %s' % self.settings.get_property('star_genome_dir')) if self.settings.get_property('rebuild_star_index'): self.settings.write_to_log('STAR index rebuild forced') make_index = True else: self.settings.write_to_log('using existing STAR index') else: make_index = True ribo_utils.make_dir(self.settings.get_property('star_genome_dir')) if make_index: self.settings.write_to_log('building STAR index') if self.settings.get_property('transcriptome_mapping_only'): command_to_run = 'STAR --runThreadN %d --runMode genomeGenerate --genomeDir %s --genomeFastaFiles %s --genomeSAsparseD %d 1>>%s 2>>%s' % ( self.threads, self.settings.get_property('star_genome_dir'), self.settings.get_transcriptome_FASTA(), self.settings.get_property('star_index_sparsity'), self.settings.get_log(), self.settings.get_log()) else: command_to_run = 'STAR --runThreadN %d --runMode genomeGenerate --genomeDir %s --genomeFastaFiles %s*.fa --genomeSAsparseD %d 1>>%s 2>>%s' % ( self.threads, self.settings.get_property('star_genome_dir'), self.settings.get_genome_sequence_dir(), self.settings.get_property('star_index_sparsity'), self.settings.get_log(), self.settings.get_log()) self.settings.write_to_log(command_to_run) subprocess.Popen(command_to_run, shell=True).wait() self.settings.write_to_log('STAR index ready')
def sequence_counts_exist(self): sequence_counts = self.get_transcript_counts() return ribo_utils.file_exists(sequence_counts)
def process_settings(self, settings_file): """ - reads the settings file and converts str to float, list, etc. - stores result in self.settings as a dict() - CRITICAL NOTE: All keys must be lower case """ # TODO: Add new parameters and comments to settings files int_keys = [ 'comparison_read_cutoff', 'min_post_trimming_length', 'max_post_trimming_length', 'sequence_quality_cutoff', 'trim_5p', 'star_index_sparsity', 'outfiltermultimapnmax', 'outfiltermismatchnmax', 'alignsjdboverhangmin', 'alignsjoverhangmin', 'alignintronmax', 'five_prime_p_offset' ] float_keys = ['atail_purity_cutoff'] str_keys = [ 'adaptor_3p_sequence', 'star_genome_dir', 'star_ncrna_dir', 'genome_sequence_dir', 'ncrna_sequence_dir', 'annotation_gtf_file', 'qc_annotation_gtf_file', 'alignendstype' ] #if transcriptome_only is true, will genreate a transcriptome-only genome to map to boolean_keys = [ 'transcriptome_mapping_only', 'deduplicate_reads', 'force_remapping', 'force_recount', 'rebuild_star_index', 'force_retrim', 'make_interactive_plots', 'reads_reversed', 'add_3_for_stop', 'forbid_non_polya_soft_clip', 'unique_mapping_only' ] list_str_keys = ['fastq_gz_files', 'sample_names'] #list_float_keys = ['concentrations', 'input_rna'] extant_files = ['genome_sequence_dir', 'annotation_gtf_file'] config = ConfigParser.ConfigParser() config.read(settings_file) settings = {} for section in config.sections(): for option in config.options(section): settings[option] = config.get(section, option) settings[section] = True for k in int_keys: settings[k] = int(settings[k]) for k in str_keys: settings[k] = settings[k] for k in float_keys: settings[k] = float(settings[k]) for k in boolean_keys: if not settings[k].lower() in ['true', 'false']: raise ValueError('Boolean value %s must be "true" or "false"' % k) settings[k] = settings[k].lower() == 'true' #for k in list_float_keys: # settings[k] = map(float, simplejson.loads(settings[k])) #for k in list_int_keys: # settings[k] = map(int, simplejson.loads(settings[k])) for k in list_str_keys: settings[k] = simplejson.loads(settings[k]) self.fqdir = settings['fastq_dir'] self.sample_names = settings['sample_names'] self.fastq_gz_files = settings['fastq_gz_files'] self.fastq_gz_file_handles = [ os.path.join(self.fqdir, fastq_gz_file) for fastq_gz_file in self.fastq_gz_files ] for file_handle in self.fastq_gz_file_handles: try: assert ribo_utils.file_exists(file_handle) except: print 'ERROR: nonexistent file ', file_handle sys.exit() for k in extant_files: try: assert ribo_utils.file_exists(settings[k]) except AssertionError: print 'file %s does not exist' % settings[k] sys.exit() self.settings = settings self.rdir = settings['results_dir'] ribo_utils.make_dir(self.rdir) shutil.copy(settings_file, self.rdir)
def genome_mapped_reads_exist(self): mapped_reads = self.get_genome_mapped_reads() return ribo_utils.file_exists(mapped_reads)
def qc_pickle_exists(self): qc_pickle = self.get_qc_pickle() return ribo_utils.file_exists(qc_pickle)
def ncrna_unmapped_reads_exist(self): mapped_reads = self.get_ncrna_unmapped_reads() return ribo_utils.file_exists(mapped_reads)
def ncrna_mapping_finished(self): unmapped_reads_summary = os.path.join( self.experiment_settings.get_rdir(), 'ncrna_mapped_reads', '%(sample_name)sLog.final.out' % {'sample_name': self.sample_name}) return ribo_utils.file_exists(unmapped_reads_summary)
def trimmed_reads_exist(self): trimmed_reads = self.get_trimmed_reads() return ribo_utils.file_exists(trimmed_reads)
def deduplicated_reads_exist(self): deduplicated_reads = self.get_deduplicated_reads() return ribo_utils.file_exists(deduplicated_reads)
def adaptorless_reads_exist(self): trimmed_reads = self.get_adaptor_trimmed_reads() return ribo_utils.file_exists(trimmed_reads)