def build_ctab_files(self, ref_genome_path, alignment_path, output_dir, num_threads=2): """ Runs tablemaker and generates the ctab files in the specified output_dir :param ref_genome_path: File path to a reference genome or transcriptome assembly in GTF format :param alignment_path: File path to a reads alignment in BAM format :param output_dir: directory to which the ctab files need to be written :param num_threads: Number of prcessing threads (default=2) :returns 0 if successful, else 1 """ print('Running tablemaker...') print "Args passed : ref_genome_path: {0} , alignment_path: {1} , output_dir: {2} , " \ "num_threads: {3} ".format(ref_genome_path, alignment_path, output_dir, num_threads) tm_args = " -p {0} -o {1} -q -W -G {2} {3}".format( str(num_threads), output_dir, ref_genome_path, alignment_path) try: print "Executing: tablemaker {0}".format(tm_args) runProgram(logger=self.logger, progName="tablemaker", argStr=tm_args) except Exception as ex: log( "Error executing tablemaker {0}. {1}".format( tm_args, ex.message), logging.ERROR) return 1 return 0
def get_stats(self, ifile, ipath): """ Generate simple statistics from a BAM file. The statistics collected include counts of aligned and unaligned reads as well as all records with no start coordinate. :param ifile: bam file name :param ipath: absolute path to bam file. """ if not ipath.startswith('/'): raise Exception("Input path must be an absolute path. Provided: " + str(ipath)) ifile = os.path.join(ipath, ifile) # check if input file exists if not os.path.exists(ifile): raise RuntimeError(None, 'Input bam file does not exist: ' + str(ifile)) # get stats self._check_prog() try: # samtools flagstat ifile # samtools appears to operates on garbage-in-garbage out policy. i.e. # it does not validate input and always returns True. Hence output # value is not being checked. stats = Popen('samtools flagstat {0}'.format(ifile), shell=True, stdin=PIPE, stdout=PIPE) stats, stderr = stats.communicate() result = self._extractAlignmentStatsInfo(stats) except Exception as ex: log( 'failed to get stats from {0}'.format(ifile) + '. ' + ex.message, logging.ERROR) # return {} #TODO send back a dictionary with -1 values return result
def validate(self, ifile, ipath, ignore=[ 'MATE_NOT_FOUND', 'MISSING_READ_GROUP', 'INVALID_MAPPING_QUALITY' ]): """ Validates the input bam file. Logs the errors if errors are found :param ifile: bam file name :param ipath: absolute path to bam file. :param ignore: list of errors to ignore (see http://broadinstitute.github.io/picard/command-line-overview.html#ValidateSamFile) :returns 0 if successful, else 1 """ if not ipath.startswith('/'): raise Exception("Input path must be an absolute path. Provided: " + str(ipath)) ifile = os.path.join(ipath, ifile) # check if input file exists if not os.path.exists(ifile): raise RuntimeError(None, 'Input file does not exist: ' + str(ifile)) try: # java -jar picard.jar ValidateSamFile I=ifile MODE=SUMMARY validation = Popen( '/usr/lib/jvm/java-8-oracle/bin/java -jar ' '/opt/picard/build/libs/picard.jar ValidateSamFile I={0} ' 'MODE=SUMMARY'.format(ifile), shell=True, stdin=PIPE, stdout=PIPE) result, stderr = validation.communicate() if self._is_valid(result, ignore): log('{0} passed validation'.format(ifile), logging.INFO, self.logger) return 0 else: log( '{0} failed validation with errors: {1}'.format( ifile, result), logging.ERROR, self.logger) return 1 except Exception as ex: log('{0} failed validation'.format(ifile) + '. ' + ex.message, logging.ERROR, self.logger) return 1
def create_bai_from_bam(self, ifile, ipath, ofile=None, opath=None, validate=False, ignore=[ 'MATE_NOT_FOUND', 'MISSING_READ_GROUP', 'INVALID_MAPPING_QUALITY' ]): """ creates a bai file from a bam file throws exceptions if input file is missing, invalid or if the output file could not be written to disk :param ifile: bam file name :param ipath: absolute path to bam file :param ofile: bai file name. If None, ifile name is used with the extension '.bam' (if any) replaced with '.bai' :param opath: path to bai file. If None, ipath will be used :param validate: set to true if sam file needs to be validated. Default=False :param ignore: see validate() method param :returns 0 if successful, else 1 """ # prepare input and output file paths ifile, ofile, opath = self._prepare_paths(ifile, ipath, ofile, opath, '.bam', '.bai') # check if input file exists if not os.path.exists(ifile): raise RuntimeError(None, 'Input bam file does not exist: ' + str(ifile)) # validate input sam file if validate and self.validate(ifile, ipath, ignore=ignore) == 1: return 1 # convert self._check_prog() # samtools index ifile ofile # samtools appears to operates on garbage-in-garbage out policy. i.e. # it does not validate input and always returns True. Hence output # value is not being checked. try: log('Creating bai from bam for file: ' + str(ifile) + ' with output file: ' + str(ofile) + ' and cwd: ' + str(opath)) create = Popen('samtools index {0} {1}'.format(ifile, ofile), shell=True, stdin=PIPE, stdout=PIPE, cwd=opath) create.communicate() except Exception as ex: log( 'failed to convert {0} to {1}'.format(ifile, ofile) + '. ' + ex.message, logging.ERROR) return 1 return 0
def convert_sam_to_sorted_bam(self, ifile, ipath, ofile=None, opath=None, validate=False, ignore=[ 'MATE_NOT_FOUND', 'MISSING_READ_GROUP', 'INVALID_MAPPING_QUALITY' ]): """ Converts the specified sam file to a sorted bam file. throws exceptions if input file is missing, invalid or if the output file could not be written to disk :param ifile: sam file name :param ipath: absolute path to sam file. :param ofile: sorted bam file name. If None, ifile name is used with the extension '.sam' (if any) replaced with '.bam' :param opath: absolute path to sorted bam file. If None, ipath will be used :param validate: set to true if sam file needs to be validated. Default=False :param ignore: see validate() method param :returns 0 if successful, else 1 """ # prepare input and output file paths ifile, ofile, opath = self._prepare_paths(ifile, ipath, ofile, opath, '.sam', '.bam') # check if input file exists if not os.path.exists(ifile): raise RuntimeError(None, 'Input sam file does not exist: ' + str(ifile)) # validate input sam file if validate and self.validate(ifile, ipath, ignore=ignore) == 1: return 1 # convert self._check_prog() # samtools view -bS ifile | samtools sort -l 9 -O BAM > ofile # samtools appears to operates on garbage-in-garbage out policy. i.e. # it does not validate input and always returns True. Hence output # value is not being checked. try: log('Converting sam to sorted bam for file: ' + str(ifile) + ' with cwd: ' + str(opath)) sort = Popen('samtools sort -l 9 -O BAM > {0}'.format(ofile), shell=True, stdin=PIPE, stdout=PIPE, cwd=opath) view = Popen('samtools view -bS {0}'.format(ifile), shell=True, stdout=sort.stdin, cwd=opath) result, stderr = sort.communicate( ) # samtools always returns success view.wait() except Exception as ex: log( 'failed to convert {0} to {1}'.format(ifile, ofile) + '. ' + ex.message, logging.ERROR) return 0