Exemple #1
0
    def build_ctab_files(self,
                         ref_genome_path,
                         alignment_path,
                         output_dir,
                         num_threads=2):
        """
        Runs tablemaker and generates the ctab files in the specified output_dir

        :param ref_genome_path: File path to a reference genome or transcriptome assembly in GTF
                                 format
        :param alignment_path: File path to a reads alignment in BAM format
        :param output_dir: directory to which the ctab files need to be written
        :param num_threads: Number of prcessing threads (default=2)

        :returns 0 if successful, else 1
        """
        print('Running tablemaker...')
        print "Args passed : ref_genome_path: {0} , alignment_path: {1} , output_dir: {2} , " \
              "num_threads: {3} ".format(ref_genome_path, alignment_path, output_dir, num_threads)
        tm_args = " -p {0} -o {1} -q -W -G {2} {3}".format(
            str(num_threads), output_dir, ref_genome_path, alignment_path)
        try:
            print "Executing: tablemaker {0}".format(tm_args)
            runProgram(logger=self.logger,
                       progName="tablemaker",
                       argStr=tm_args)
        except Exception as ex:
            log(
                "Error executing tablemaker {0}. {1}".format(
                    tm_args, ex.message), logging.ERROR)
            return 1

        return 0
Exemple #2
0
    def get_stats(self, ifile, ipath):
        """
        Generate simple statistics from a BAM file. The statistics collected include
        counts of aligned and unaligned reads as well as all records with no start
        coordinate.

        :param ifile: bam file name
        :param ipath: absolute path to bam file.
        """
        if not ipath.startswith('/'):
            raise Exception("Input path must be an absolute path. Provided: " +
                            str(ipath))

        ifile = os.path.join(ipath, ifile)

        # check if input file exists
        if not os.path.exists(ifile):
            raise RuntimeError(None,
                               'Input bam file does not exist: ' + str(ifile))

        # get stats
        self._check_prog()

        try:
            #   samtools flagstat ifile
            # samtools appears to operates on garbage-in-garbage out policy. i.e.
            # it does not validate input and always returns True. Hence output
            # value is not being checked.
            stats = Popen('samtools flagstat {0}'.format(ifile),
                          shell=True,
                          stdin=PIPE,
                          stdout=PIPE)
            stats, stderr = stats.communicate()

            result = self._extractAlignmentStatsInfo(stats)

        except Exception as ex:
            log(
                'failed to get stats from {0}'.format(ifile) + '. ' +
                ex.message, logging.ERROR)
            # return {} #TODO send back a dictionary with -1 values

        return result
Exemple #3
0
    def validate(self,
                 ifile,
                 ipath,
                 ignore=[
                     'MATE_NOT_FOUND', 'MISSING_READ_GROUP',
                     'INVALID_MAPPING_QUALITY'
                 ]):
        """
        Validates the input bam file. Logs the errors if errors are found

        :param ifile: bam file name
        :param ipath: absolute path to bam file.
        :param ignore: list of errors to ignore (see
        http://broadinstitute.github.io/picard/command-line-overview.html#ValidateSamFile)
        :returns 0 if successful, else 1
        """
        if not ipath.startswith('/'):
            raise Exception("Input path must be an absolute path. Provided: " +
                            str(ipath))

        ifile = os.path.join(ipath, ifile)

        # check if input file exists
        if not os.path.exists(ifile):
            raise RuntimeError(None,
                               'Input file does not exist: ' + str(ifile))

        try:
            # java -jar picard.jar ValidateSamFile I=ifile MODE=SUMMARY
            validation = Popen(
                '/usr/lib/jvm/java-8-oracle/bin/java -jar '
                '/opt/picard/build/libs/picard.jar ValidateSamFile I={0} '
                'MODE=SUMMARY'.format(ifile),
                shell=True,
                stdin=PIPE,
                stdout=PIPE)
            result, stderr = validation.communicate()

            if self._is_valid(result, ignore):
                log('{0} passed validation'.format(ifile), logging.INFO,
                    self.logger)
                return 0
            else:
                log(
                    '{0} failed validation with errors: {1}'.format(
                        ifile, result), logging.ERROR, self.logger)
                return 1

        except Exception as ex:
            log('{0} failed validation'.format(ifile) + '. ' + ex.message,
                logging.ERROR, self.logger)
            return 1
Exemple #4
0
    def create_bai_from_bam(self,
                            ifile,
                            ipath,
                            ofile=None,
                            opath=None,
                            validate=False,
                            ignore=[
                                'MATE_NOT_FOUND', 'MISSING_READ_GROUP',
                                'INVALID_MAPPING_QUALITY'
                            ]):
        """
        creates a bai file from a bam file

        throws exceptions if input file is missing, invalid or if the output file could
        not be written to disk

        :param ifile: bam file name
        :param ipath: absolute path to bam file
        :param ofile: bai file name. If None, ifile name is used with the
        extension '.bam' (if any) replaced with '.bai'
        :param opath: path to bai file. If None, ipath will be used
        :param validate: set to true if sam file needs to be validated. Default=False
        :param ignore: see validate() method param

        :returns 0 if successful, else 1
        """
        # prepare input and output file paths
        ifile, ofile, opath = self._prepare_paths(ifile, ipath, ofile, opath,
                                                  '.bam', '.bai')

        # check if input file exists
        if not os.path.exists(ifile):
            raise RuntimeError(None,
                               'Input bam file does not exist: ' + str(ifile))

        # validate input sam file
        if validate and self.validate(ifile, ipath, ignore=ignore) == 1:
            return 1

        # convert
        self._check_prog()

        #   samtools index ifile ofile
        # samtools appears to operates on garbage-in-garbage out policy. i.e.
        # it does not validate input and always returns True. Hence output
        # value is not being checked.
        try:
            log('Creating bai from bam for file: ' + str(ifile) +
                ' with output file: ' + str(ofile) + ' and cwd: ' + str(opath))
            create = Popen('samtools index {0} {1}'.format(ifile, ofile),
                           shell=True,
                           stdin=PIPE,
                           stdout=PIPE,
                           cwd=opath)
            create.communicate()
        except Exception as ex:
            log(
                'failed to convert {0} to {1}'.format(ifile, ofile) + '. ' +
                ex.message, logging.ERROR)
            return 1

        return 0
Exemple #5
0
    def convert_sam_to_sorted_bam(self,
                                  ifile,
                                  ipath,
                                  ofile=None,
                                  opath=None,
                                  validate=False,
                                  ignore=[
                                      'MATE_NOT_FOUND', 'MISSING_READ_GROUP',
                                      'INVALID_MAPPING_QUALITY'
                                  ]):
        """
        Converts the specified sam file to a sorted bam file.

        throws exceptions if input file is missing, invalid or if the output file could
        not be written to disk

        :param ifile: sam file name
        :param ipath: absolute path to sam file.
        :param ofile: sorted bam file name. If None, ifile name is used with the
        extension '.sam' (if any) replaced with '.bam'
        :param opath: absolute path to sorted bam file. If None, ipath will be used
        :param validate: set to true if sam file needs to be validated. Default=False
        :param ignore: see validate() method param

        :returns 0 if successful, else 1
        """
        # prepare input and output file paths
        ifile, ofile, opath = self._prepare_paths(ifile, ipath, ofile, opath,
                                                  '.sam', '.bam')

        # check if input file exists
        if not os.path.exists(ifile):
            raise RuntimeError(None,
                               'Input sam file does not exist: ' + str(ifile))

        # validate input sam file
        if validate and self.validate(ifile, ipath, ignore=ignore) == 1:
            return 1

        # convert
        self._check_prog()

        #   samtools view -bS ifile | samtools sort -l 9 -O BAM > ofile
        # samtools appears to operates on garbage-in-garbage out policy. i.e.
        # it does not validate input and always returns True. Hence output
        # value is not being checked.
        try:
            log('Converting sam to sorted bam for file: ' + str(ifile) +
                ' with cwd: ' + str(opath))
            sort = Popen('samtools sort -l 9 -O BAM > {0}'.format(ofile),
                         shell=True,
                         stdin=PIPE,
                         stdout=PIPE,
                         cwd=opath)
            view = Popen('samtools view -bS {0}'.format(ifile),
                         shell=True,
                         stdout=sort.stdin,
                         cwd=opath)
            result, stderr = sort.communicate(
            )  # samtools always returns success
            view.wait()
        except Exception as ex:
            log(
                'failed to convert {0} to {1}'.format(ifile, ofile) + '. ' +
                ex.message, logging.ERROR)

        return 0