def bwa_return_code( self, output ): ''' Just make sure bwa output has the following regex and make sure the read \d counts up to how many sequence lines there are Should end with Version line Example Line: [M::main_mem] read 100 sequences (111350 bp)... [main] Version: 0.7.4-r385 ''' read_line_pat = '\[M::main_mem\] read (\d+) sequences \((\d+) bp\)...' cpat = re.compile( read_line_pat ) total_reads = 0 total_bp = 0 counts = cpat.findall( output ) for reads, bps in counts: total_reads += int( reads ) total_bp += int( bps ) # Count num of read sequences expected_reads = seqio.reads_in_file( self.args[1] ) # If mates file was given count them too if len( self.args ) == 3: expected_reads += seqio.reads_in_file( self.args[2] ) # No lines found in input file? if expected_reads == 0: return 1 if total_reads != expected_reads: logger.warning( "Expecting BWA to process {} reads but processed {}".format(expected_reads, total_reads) ) return 1 return super( BWAMem, self ).bwa_return_code( output )
def required_args(self): if len(self.args) != 1: raise ValueError( 'BWAIndex needs 1 parameter: the input fasta file') self.validate_input(self.args[0]) if seqio.reads_in_file(self.args[0]) == 0: raise ValueError('{0} is not a valid file to index'.format( self.args[0]))
def required_args( self ): ''' Index only requires an input fasta file to index Validate that it is an actual fasta file ''' if len( self.args ) != 1: raise ValueError( "bwa index needs only 1 parameter" ) self.validate_input( self.args[0] ) if seqio.reads_in_file( self.args[0] ) == 0: raise ValueError( "{} is not a valid file to index".format(self.args[0]) )