Esempio n. 1
0
    def check_paired_end(self):
        # check if this is a paired-end file
        # if so, grab its partner
        seqfile_name = os.path.basename(self.input_file)
        pair_info = get_pair_info(seqfile_name)
        if pair_info is None:
            pair_info = get_new_pair_info(seqfile_name)
#            if pair_info is not None: self.split_file = True
        if pair_info is not None:
            pair_index = pair_info[0]
            second_name = pair_info[1]
            new_name = pair_info[2]
            scripter.debug('NOTICE: Detected paired read file.')
            if pair_index == '1':
                scripter.debug('Attempting to find second file.')

                self.second_file = os.sep.join([self.input_dir, second_name])
                self.protoname = os.path.splitext(new_name)[0]
                scripter.debug('Found %s', self.second_file)
                try:
                    scripter.assert_path(self.second_file)
                    self.paired_end = True
                except IOError:
                    scripter.debug('Failed to find paired end file')
                    self.paired_end = False
            elif pair_index == '2':
                scripter.debug('This is the second file, ignoring it.')
                raise scripter.InvalidFileException
            else:
                scripter.debug('Failed to find paired end')
                self.paired_end = False
        else:
            scripter.debug('This file contains single-end reads.')
            self.paired_end = False
Esempio n. 2
0
def align_once(fp_obj, flags, ref, use_quality=False,
               path_to_bowtie2=None, path_to_samtools=None, logger=None,
               passthru_args=None,
               **kwargs):
    if use_quality:
        if fp_obj.use_pysam:
            flags.append('--phred33')
        else:
            flags.append('--phred64')

    refname = os.path.basename(ref)
    path_to_unsorted = fp_obj.tmp_filename(refname)
    output_dir = os.path.split(path_to_unsorted)[0]
    fp_obj.check_output_dir(output_dir)
    filename1 = os.path.abspath(fp_obj.input_file)
    second_file = fp_obj.second_file
    if second_file is not None:
        filename2 = os.path.abspath(second_file)
    else:
        filename2 = None

    if fp_obj.paired_end:
        file_args = ['-x', ref, '-1', filename1, '-2', filename2]
    else:
        file_args = ['-x', ref, '-U', filename1]

    if passthru_args:
        bowtie2_args = [path_to_bowtie2] + flags + passthru_args + file_args
    else:
        bowtie2_args = [path_to_bowtie2] + flags + file_args

    # finish parsing input here
    bowtie2_stderr = PolledPipe(logger=logger, level=logging.ERROR)
    logger.info('Launching bowtie2 (output will be piped to samtools for BAM '
                'encoding)')
    logger.info(' '.join(bowtie2_args))
    bowtie2_aligner = Popen(bowtie2_args, stdout=PIPE, stderr=bowtie2_stderr.w,
                            bufsize=-1)

    samtools_args = [path_to_samtools, 'view', '-b', '-S', '-o',
                     path_to_unsorted, '-']
    logger.info('Launching samtools to encode bowtie2 output as BAM')
    logger.info(' '.join(samtools_args))
    samtools_stdout = PolledPipe(logger=logger, level=logging.WARN)
    samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR)
    samtools_viewer = Popen(samtools_args, stdin=bowtie2_aligner.stdout,
                            stdout=samtools_stdout.w,
                            stderr=samtools_stderr.w, bufsize=-1)

    logger.debug('Waiting for bowtie2 to finish')
    pollables = [bowtie2_stderr, samtools_stdout, samtools_stderr]
    wait_for_job(bowtie2_aligner, pollables, logger)

    if not bowtie2_aligner.returncode == 0:
        logger.critical("bowtie2 did not run properly [%d]",
                        bowtie2_aligner.returncode)
        samtools_viewer.terminate()
        samtools_viewer.poll()
        logger.critical("samtools terminated")
        return

    logger.debug('Alignment successfully completed')
    logger.debug('Waiting for samtools to finish')
    wait_for_job(samtools_viewer, [samtools_stdout, samtools_stderr], logger)
    if not samtools_viewer.returncode == 0:
        logger.critical("samtools view did not run properly [%d]",
                        samtools_viewer.returncode)
        return

    logger.debug('Unsorted BAM file successfully written')

    logger.info('Launching samtools again to sort BAM output')
    output_dir, output_file = os.path.split(path_to_unsorted)
    bam_file = os.path.splitext(output_file)[0]
    sorter_args = [path_to_samtools, 'sort', output_file, bam_file]
    logger.info(' '.join(sorter_args))
    samtools_stdout = PolledPipe(logger=logger, level=logging.WARN)
    samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR)
    samtools_sorter = Popen(sorter_args, stdout=samtools_stdout.w,
                            stderr=samtools_stderr.w, cwd=output_dir)
    wait_for_job(samtools_sorter, [samtools_stdout, samtools_stderr], logger)
    if not samtools_sorter.returncode == 0:
        logger.critical("samtools sort did not run properly [%d]",
                        samtools_sorter.returncode)
        return

    # don't destroy the files until we're sure we succeeded!
    assert_path(os.path.join(output_dir, bam_file + '.bam'))
    logger.debug('Removing unsorted file %s', path_to_unsorted)
    os.remove(path_to_unsorted)

    logger.debug('Launching samtools again to index sorted BAM output')
    samtools_stdout = PolledPipe(logger=logger, level=logging.WARN)
    samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR)
    index_args = [path_to_samtools, 'index', bam_file + '.bam']
    samtools_indexer = Popen(index_args, stdout=samtools_stdout.w,
                             stderr=samtools_stderr.w, cwd=output_dir)
    wait_for_job(samtools_indexer, [samtools_stdout, samtools_stderr], logger)
    if not samtools_indexer.returncode == 0:
        logger.critical("samtools index did not run properly [%d]",
                        samtools_indexer.returncode)
        return

    # Make sure indexing succeeds
    assert_path(os.path.join(output_dir, bam_file + '.bam.bai'))
    return
Esempio n. 3
0
def align_once(fp_obj, flags, ref, match_type, use_quality=False,
               quals_type='solexa1.3',
               path_to_bowtie=None, path_to_samtools=None, logger=None,
               **kwargs):
    refname = os.path.basename(ref)
    path_to_unsorted = fp_obj.tmp_filename(refname, match_type)
    output_dir = os.path.split(path_to_unsorted)[0]
    fp_obj.check_output_dir(output_dir)
    filename1 = os.path.abspath(fp_obj.input_file)
    second_file = fp_obj.second_file
    if second_file is not None:
        filename2 = os.path.abspath(second_file)
    else:
        filename2 = None
    if use_quality:
        if fp_obj.use_pysam:
            flags.append('--phred33-quals')
        else:
            flags.append(''.join(['--', quals_type, '-quals']))
    if fp_obj.paired_end:
        file_args = [ref, '--12', '-']
        logger.info('Automagically interpreting %s files', fp_obj.format)
    else:
        logger.info('Automagically interpreting %s file', fp_obj.format)
        file_args = [ref, '-']
    bowtie_args = [path_to_bowtie] + flags + file_args

    # finish parsing input here
    input_stderr = PolledPipe(logger=logger, level=logging.ERROR)
    bowtie_stderr = PolledPipe(logger=logger, level=logging.ERROR)
    if fp_obj.use_pysam:
        if fp_obj.paired_end:
            in_args = [sys.executable, '-m',
                       'seriesoftubes.converters.bamtotab',
                       filename1]
        else:
            in_args = [sys.executable, '-m',
                       'seriesoftubes.converters.bamtofastq',
                       '--no-gzip',
                       filename1]
    elif fp_obj.paired_end and fp_obj.format == 'FASTQ':
        in_args = [sys.executable, '-m', 'seriesoftubes.converters.fastqtotab',
                   filename1, filename2]
    elif fp_obj.format == 'FASTQ':
        in_args = [sys.executable, '-m', 'seriesoftubes.converters.cat',
                   filename1]
    else:
        logger.critical("Couldn't figure out what to do with file "
                        "%s of format %s",
                        fp_obj.input_file, fp_obj.format)
    logger.info(' '.join(in_args))
    input_reader = Popen(in_args, stdout=PIPE, stderr=input_stderr.w,
                         bufsize=-1)
    logger.info('Launching bowtie (output will be piped to samtools)')
    logger.info(' '.join(bowtie_args))
    bowtie_aligner = Popen(bowtie_args, stdin=input_reader.stdout,
                           stdout=PIPE, stderr=bowtie_stderr.w,
                           bufsize=-1)

    samtools_args = [path_to_samtools, 'view', '-b', '-S', '-o',
                     path_to_unsorted, '-']
    logger.info('Launching samtools to encode bowtie output as BAM')
    logger.info(' '.join(samtools_args))
    samtools_stdout = PolledPipe(logger=logger, level=logging.WARN)
    samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR)
    samtools_viewer = Popen(samtools_args, stdin=bowtie_aligner.stdout,
                            stdout=samtools_stdout.w,
                            stderr=samtools_stderr.w, bufsize=-1)

    logger.debug('Waiting for bowtie to finish')
    pollables = [input_stderr, bowtie_stderr, samtools_stdout, samtools_stderr]
    wait_for_job(bowtie_aligner, pollables, logger)

    if not bowtie_aligner.returncode == 0:
        logger.critical("bowtie did not run properly [%d]",
                        bowtie_aligner.returncode)
        samtools_viewer.terminate()
        samtools_viewer.poll()
        logger.critical("samtools terminated")
        return

    logger.debug('Alignment successfully completed')
    logger.debug('Waiting for samtools to finish')
    wait_for_job(samtools_viewer, [samtools_stdout, samtools_stderr], logger)
    if not samtools_viewer.returncode == 0:
        logger.critical("samtools view did not run properly [%d]",
                        samtools_viewer.returncode)
        return

    logger.debug('Unsorted BAM file successfully written')

    logger.info('Launching samtools again to sort BAM output')
    output_dir, output_file = os.path.split(path_to_unsorted)
    bam_file = os.path.splitext(output_file)[0]
    sorter_args = [path_to_samtools, 'sort', output_file, bam_file]
    logger.info(' '.join(sorter_args))
    samtools_stdout = PolledPipe(logger=logger, level=logging.WARN)
    samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR)
    samtools_sorter = Popen(sorter_args, stdout=samtools_stdout.w,
                            stderr=samtools_stderr.w, cwd=output_dir)
    wait_for_job(samtools_sorter, [samtools_stdout, samtools_stderr], logger)
    if not samtools_sorter.returncode == 0:
        logger.critical("samtools sort did not run properly [%d]",
                        samtools_sorter.returncode)
        return

    # don't destroy the files until we're sure we succeeded!
    assert_path(os.path.join(output_dir, bam_file + '.bam'))
    logger.debug('Removing unsorted file %s', path_to_unsorted)
    os.remove(path_to_unsorted)

    logger.debug('Launching samtools again to index sorted BAM output')
    samtools_stdout = PolledPipe(logger=logger, level=logging.WARN)
    samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR)
    index_args = [path_to_samtools, 'index', bam_file + '.bam']
    samtools_indexer = Popen(index_args, stdout=samtools_stdout.w,
                             stderr=samtools_stderr.w, cwd=output_dir)
    wait_for_job(samtools_indexer, [samtools_stdout, samtools_stderr], logger)
    if not samtools_indexer.returncode == 0:
        logger.critical("samtools index did not run properly [%d]",
                        samtools_indexer.returncode)
        return

    # Make sure indexing succeeds
    assert_path(os.path.join(output_dir, bam_file + '.bam.bai'))
    return
Esempio n. 4
0
    def __init__(self, filename, verbose=False, *args, **kwargs):
        super(BarcodeFilenameParser, self).__init__(filename,
                                                    *args, **kwargs)
        protoname = self.protoname
        # check for old-style
        if os.path.splitext(protoname)[-3:] == 'all':
            protoname = protoname[0:-4]

        # check if this is a paired-end file
        # if so, grab its partner
        input_file = self.input_file
        illumina_name = os.path.basename(input_file)

        # try new style first
        new_info = get_new_pair_info(illumina_name)
        if new_info is not None:
            scripter.debug('NOTICE: Detected new-style paired read file.')
            read = new_info[0]
            if read == 'R2':
                scripter.debug('This is the second file, ignoring it.')
                raise scripter.InvalidFileException(input_file)
            elif read == 'R1':
                second_file = os.path.join(self.input_dir, new_info[1])
                try:
                    scripter.assert_path(second_file)
                    scripter.debug('Found %s', second_file)
                    self.second_file = second_file
                    self.protoname2 = os.path.splitext(
                        os.path.basename(second_file))[0]
                    paired_end = True
                except IOError:
                    scripter.debug('Failed to find paired end file')
                    paired_end = False
            else:
                scripter.debug('Failed to find paired end')
                paired_end = False
        elif illumina_name.count('_') >= 3:
            scripter.debug('NOTICE: Detected paired read file.')
            iln_parts = illumina_name.split('_')
            if iln_parts[2] == '1':
                scripter.debug('Attempting to find second file.')

                second_file = os.sep.join([self.input_dir,
                                           '_'.join(iln_parts[0:2] + ['2']
                                                    + iln_parts[3:])])
                try:
                    scripter.assert_path(second_file)
                    scripter.debug('Found %s', second_file)
                    self.second_file = second_file
                    self.protoname2 = os.path.splitext(
                        os.path.basename(second_file))[0]
                    paired_end = True
                except IOError:
                    scripter.debug('Failed to find paired end file')
                    paired_end = False
            elif iln_parts[2] == '2':
                scripter.debug('This is the second file, ignoring it.')
                raise scripter.InvalidFileException(input_file)
            else:
                scripter.debug('Failed to find paired end')
                paired_end = False
        else:
            paired_end = False
        self.paired_end = paired_end
Esempio n. 5
0
    output_dir, output_file = os.path.split(path_to_unsorted)
    bam_file = os.path.splitext(output_file)[0]
    sorter_args = [path_to_samtools, 'sort', output_file, bam_file]
    logger.info(' '.join(sorter_args))
    samtools_stdout = PolledPipe(logger=logger, level=logging.WARN)
    samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR)
    samtools_sorter = Popen(sorter_args, stdout=samtools_stdout.w,
                            stderr=samtools_stderr.w, cwd=output_dir)
    wait_for_job(samtools_sorter, [samtools_stdout, samtools_stderr], logger)
    if not samtools_sorter.returncode == 0:
        logger.critical("samtools sort did not run properly [%d]",
                        samtools_sorter.returncode)
        return

    # don't destroy the files until we're sure we succeeded!
    assert_path(os.path.join(output_dir, bam_file + '.bam'))
    logger.debug('Removing unsorted file %s', path_to_unsorted)
    os.remove(path_to_unsorted)

    logger.debug('Launching samtools again to index sorted BAM output')
    samtools_stdout = PolledPipe(logger=logger, level=logging.WARN)
    samtools_stderr = PolledPipe(logger=logger, level=logging.ERROR)
    index_args = [path_to_samtools, 'index', bam_file + '.bam']
    samtools_indexer = Popen(index_args, stdout=samtools_stdout.w,
                             stderr=samtools_stderr.w, cwd=output_dir)
    wait_for_job(samtools_indexer, [samtools_stdout, samtools_stderr], logger)
    if not samtools_indexer.returncode == 0:
        logger.critical("samtools index did not run properly [%d]",
                        samtools_indexer.returncode)
        return