def check_paired_end(self): # check if this is a paired-end file # if so, grab its partner seqfile_name = os.path.basename(self.input_file) pair_info = get_pair_info(seqfile_name) if pair_info is None: pair_info = get_new_pair_info(seqfile_name) # if pair_info is not None: self.split_file = True if pair_info is not None: pair_index = pair_info[0] second_name = pair_info[1] new_name = pair_info[2] scripter.debug('NOTICE: Detected paired read file.') if pair_index == '1': scripter.debug('Attempting to find second file.') self.second_file = os.sep.join([self.input_dir, second_name]) self.protoname = os.path.splitext(new_name)[0] scripter.debug('Found %s', self.second_file) try: scripter.assert_path(self.second_file) self.paired_end = True except IOError: scripter.debug('Failed to find paired end file') self.paired_end = False elif pair_index == '2': scripter.debug('This is the second file, ignoring it.') raise scripter.InvalidFileException else: scripter.debug('Failed to find paired end') self.paired_end = False else: scripter.debug('This file contains single-end reads.') self.paired_end = False
def align_once(fp_obj, flags, ref, use_quality=False, path_to_bowtie2=None, path_to_samtools=None, logger=None, passthru_args=None, **kwargs): if use_quality: if fp_obj.use_pysam: flags.append('--phred33') else: flags.append('--phred64') refname = os.path.basename(ref) path_to_unsorted = fp_obj.tmp_filename(refname) output_dir = os.path.split(path_to_unsorted)[0] fp_obj.check_output_dir(output_dir) filename1 = os.path.abspath(fp_obj.input_file) second_file = fp_obj.second_file if second_file is not None: filename2 = os.path.abspath(second_file) else: filename2 = None if fp_obj.paired_end: file_args = ['-x', ref, '-1', filename1, '-2', filename2] else: file_args = ['-x', ref, '-U', filename1] if passthru_args: bowtie2_args = [path_to_bowtie2] + flags + passthru_args + file_args else: bowtie2_args = [path_to_bowtie2] + flags + file_args # finish parsing input here bowtie2_stderr = PolledPipe(logger=logger, level=logging.ERROR) logger.info('Launching bowtie2 (output will be piped to samtools for BAM ' 'encoding)') logger.info(' '.join(bowtie2_args)) bowtie2_aligner = Popen(bowtie2_args, stdout=PIPE, stderr=bowtie2_stderr.w, bufsize=-1) samtools_args = [path_to_samtools, 'view', '-b', '-S', '-o', path_to_unsorted, '-'] logger.info('Launching samtools to encode bowtie2 output as BAM') logger.info(' '.join(samtools_args)) samtools_stdout = PolledPipe(logger=logger, level=logging.WARN) samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR) samtools_viewer = Popen(samtools_args, stdin=bowtie2_aligner.stdout, stdout=samtools_stdout.w, stderr=samtools_stderr.w, bufsize=-1) logger.debug('Waiting for bowtie2 to finish') pollables = [bowtie2_stderr, samtools_stdout, samtools_stderr] wait_for_job(bowtie2_aligner, pollables, logger) if not bowtie2_aligner.returncode == 0: logger.critical("bowtie2 did not run properly [%d]", bowtie2_aligner.returncode) samtools_viewer.terminate() samtools_viewer.poll() logger.critical("samtools terminated") return logger.debug('Alignment successfully completed') logger.debug('Waiting for samtools to finish') wait_for_job(samtools_viewer, [samtools_stdout, samtools_stderr], logger) if not samtools_viewer.returncode == 0: logger.critical("samtools view did not run properly [%d]", samtools_viewer.returncode) return logger.debug('Unsorted BAM file successfully written') logger.info('Launching samtools again to sort BAM output') output_dir, output_file = os.path.split(path_to_unsorted) bam_file = os.path.splitext(output_file)[0] sorter_args = [path_to_samtools, 'sort', output_file, bam_file] logger.info(' '.join(sorter_args)) samtools_stdout = PolledPipe(logger=logger, level=logging.WARN) samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR) samtools_sorter = Popen(sorter_args, stdout=samtools_stdout.w, stderr=samtools_stderr.w, cwd=output_dir) wait_for_job(samtools_sorter, [samtools_stdout, samtools_stderr], logger) if not samtools_sorter.returncode == 0: logger.critical("samtools sort did not run properly [%d]", samtools_sorter.returncode) return # don't destroy the files until we're sure we succeeded! assert_path(os.path.join(output_dir, bam_file + '.bam')) logger.debug('Removing unsorted file %s', path_to_unsorted) os.remove(path_to_unsorted) logger.debug('Launching samtools again to index sorted BAM output') samtools_stdout = PolledPipe(logger=logger, level=logging.WARN) samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR) index_args = [path_to_samtools, 'index', bam_file + '.bam'] samtools_indexer = Popen(index_args, stdout=samtools_stdout.w, stderr=samtools_stderr.w, cwd=output_dir) wait_for_job(samtools_indexer, [samtools_stdout, samtools_stderr], logger) if not samtools_indexer.returncode == 0: logger.critical("samtools index did not run properly [%d]", samtools_indexer.returncode) return # Make sure indexing succeeds assert_path(os.path.join(output_dir, bam_file + '.bam.bai')) return
def align_once(fp_obj, flags, ref, match_type, use_quality=False, quals_type='solexa1.3', path_to_bowtie=None, path_to_samtools=None, logger=None, **kwargs): refname = os.path.basename(ref) path_to_unsorted = fp_obj.tmp_filename(refname, match_type) output_dir = os.path.split(path_to_unsorted)[0] fp_obj.check_output_dir(output_dir) filename1 = os.path.abspath(fp_obj.input_file) second_file = fp_obj.second_file if second_file is not None: filename2 = os.path.abspath(second_file) else: filename2 = None if use_quality: if fp_obj.use_pysam: flags.append('--phred33-quals') else: flags.append(''.join(['--', quals_type, '-quals'])) if fp_obj.paired_end: file_args = [ref, '--12', '-'] logger.info('Automagically interpreting %s files', fp_obj.format) else: logger.info('Automagically interpreting %s file', fp_obj.format) file_args = [ref, '-'] bowtie_args = [path_to_bowtie] + flags + file_args # finish parsing input here input_stderr = PolledPipe(logger=logger, level=logging.ERROR) bowtie_stderr = PolledPipe(logger=logger, level=logging.ERROR) if fp_obj.use_pysam: if fp_obj.paired_end: in_args = [sys.executable, '-m', 'seriesoftubes.converters.bamtotab', filename1] else: in_args = [sys.executable, '-m', 'seriesoftubes.converters.bamtofastq', '--no-gzip', filename1] elif fp_obj.paired_end and fp_obj.format == 'FASTQ': in_args = [sys.executable, '-m', 'seriesoftubes.converters.fastqtotab', filename1, filename2] elif fp_obj.format == 'FASTQ': in_args = [sys.executable, '-m', 'seriesoftubes.converters.cat', filename1] else: logger.critical("Couldn't figure out what to do with file " "%s of format %s", fp_obj.input_file, fp_obj.format) logger.info(' '.join(in_args)) input_reader = Popen(in_args, stdout=PIPE, stderr=input_stderr.w, bufsize=-1) logger.info('Launching bowtie (output will be piped to samtools)') logger.info(' '.join(bowtie_args)) bowtie_aligner = Popen(bowtie_args, stdin=input_reader.stdout, stdout=PIPE, stderr=bowtie_stderr.w, bufsize=-1) samtools_args = [path_to_samtools, 'view', '-b', '-S', '-o', path_to_unsorted, '-'] logger.info('Launching samtools to encode bowtie output as BAM') logger.info(' '.join(samtools_args)) samtools_stdout = PolledPipe(logger=logger, level=logging.WARN) samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR) samtools_viewer = Popen(samtools_args, stdin=bowtie_aligner.stdout, stdout=samtools_stdout.w, stderr=samtools_stderr.w, bufsize=-1) logger.debug('Waiting for bowtie to finish') pollables = [input_stderr, bowtie_stderr, samtools_stdout, samtools_stderr] wait_for_job(bowtie_aligner, pollables, logger) if not bowtie_aligner.returncode == 0: logger.critical("bowtie did not run properly [%d]", bowtie_aligner.returncode) samtools_viewer.terminate() samtools_viewer.poll() logger.critical("samtools terminated") return logger.debug('Alignment successfully completed') logger.debug('Waiting for samtools to finish') wait_for_job(samtools_viewer, [samtools_stdout, samtools_stderr], logger) if not samtools_viewer.returncode == 0: logger.critical("samtools view did not run properly [%d]", samtools_viewer.returncode) return logger.debug('Unsorted BAM file successfully written') logger.info('Launching samtools again to sort BAM output') output_dir, output_file = os.path.split(path_to_unsorted) bam_file = os.path.splitext(output_file)[0] sorter_args = [path_to_samtools, 'sort', output_file, bam_file] logger.info(' '.join(sorter_args)) samtools_stdout = PolledPipe(logger=logger, level=logging.WARN) samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR) samtools_sorter = Popen(sorter_args, stdout=samtools_stdout.w, stderr=samtools_stderr.w, cwd=output_dir) wait_for_job(samtools_sorter, [samtools_stdout, samtools_stderr], logger) if not samtools_sorter.returncode == 0: logger.critical("samtools sort did not run properly [%d]", samtools_sorter.returncode) return # don't destroy the files until we're sure we succeeded! assert_path(os.path.join(output_dir, bam_file + '.bam')) logger.debug('Removing unsorted file %s', path_to_unsorted) os.remove(path_to_unsorted) logger.debug('Launching samtools again to index sorted BAM output') samtools_stdout = PolledPipe(logger=logger, level=logging.WARN) samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR) index_args = [path_to_samtools, 'index', bam_file + '.bam'] samtools_indexer = Popen(index_args, stdout=samtools_stdout.w, stderr=samtools_stderr.w, cwd=output_dir) wait_for_job(samtools_indexer, [samtools_stdout, samtools_stderr], logger) if not samtools_indexer.returncode == 0: logger.critical("samtools index did not run properly [%d]", samtools_indexer.returncode) return # Make sure indexing succeeds assert_path(os.path.join(output_dir, bam_file + '.bam.bai')) return
def __init__(self, filename, verbose=False, *args, **kwargs): super(BarcodeFilenameParser, self).__init__(filename, *args, **kwargs) protoname = self.protoname # check for old-style if os.path.splitext(protoname)[-3:] == 'all': protoname = protoname[0:-4] # check if this is a paired-end file # if so, grab its partner input_file = self.input_file illumina_name = os.path.basename(input_file) # try new style first new_info = get_new_pair_info(illumina_name) if new_info is not None: scripter.debug('NOTICE: Detected new-style paired read file.') read = new_info[0] if read == 'R2': scripter.debug('This is the second file, ignoring it.') raise scripter.InvalidFileException(input_file) elif read == 'R1': second_file = os.path.join(self.input_dir, new_info[1]) try: scripter.assert_path(second_file) scripter.debug('Found %s', second_file) self.second_file = second_file self.protoname2 = os.path.splitext( os.path.basename(second_file))[0] paired_end = True except IOError: scripter.debug('Failed to find paired end file') paired_end = False else: scripter.debug('Failed to find paired end') paired_end = False elif illumina_name.count('_') >= 3: scripter.debug('NOTICE: Detected paired read file.') iln_parts = illumina_name.split('_') if iln_parts[2] == '1': scripter.debug('Attempting to find second file.') second_file = os.sep.join([self.input_dir, '_'.join(iln_parts[0:2] + ['2'] + iln_parts[3:])]) try: scripter.assert_path(second_file) scripter.debug('Found %s', second_file) self.second_file = second_file self.protoname2 = os.path.splitext( os.path.basename(second_file))[0] paired_end = True except IOError: scripter.debug('Failed to find paired end file') paired_end = False elif iln_parts[2] == '2': scripter.debug('This is the second file, ignoring it.') raise scripter.InvalidFileException(input_file) else: scripter.debug('Failed to find paired end') paired_end = False else: paired_end = False self.paired_end = paired_end
output_dir, output_file = os.path.split(path_to_unsorted) bam_file = os.path.splitext(output_file)[0] sorter_args = [path_to_samtools, 'sort', output_file, bam_file] logger.info(' '.join(sorter_args)) samtools_stdout = PolledPipe(logger=logger, level=logging.WARN) samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR) samtools_sorter = Popen(sorter_args, stdout=samtools_stdout.w, stderr=samtools_stderr.w, cwd=output_dir) wait_for_job(samtools_sorter, [samtools_stdout, samtools_stderr], logger) if not samtools_sorter.returncode == 0: logger.critical("samtools sort did not run properly [%d]", samtools_sorter.returncode) return # don't destroy the files until we're sure we succeeded! assert_path(os.path.join(output_dir, bam_file + '.bam')) logger.debug('Removing unsorted file %s', path_to_unsorted) os.remove(path_to_unsorted) logger.debug('Launching samtools again to index sorted BAM output') samtools_stdout = PolledPipe(logger=logger, level=logging.WARN) samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR) index_args = [path_to_samtools, 'index', bam_file + '.bam'] samtools_indexer = Popen(index_args, stdout=samtools_stdout.w, stderr=samtools_stderr.w, cwd=output_dir) wait_for_job(samtools_indexer, [samtools_stdout, samtools_stderr], logger) if not samtools_indexer.returncode == 0: logger.critical("samtools index did not run properly [%d]", samtools_indexer.returncode) return