def testMinScoreNoScores(self): """ A request for reads with alignment scores no lower than a given value must produce an empty result when no alignments have scores. """ data = '\n'.join([ '@SQ SN:ref1 LN:10', 'query1 0 ref1 2 60 2=2X2M * 0 0 TCTAGG ZZZZZZ', 'query2 0 ref1 2 60 2= * 0 0 TC ZZ', ]).replace(' ', '\t') with dataFile(data) as filename: sf = SAMFilter(filename, minScore=6) self.assertEqual([], list(sf.alignments()))
def testDropSecondary(self): """ Dropping matches flagged as secondary must give the expected result. """ data = '\n'.join([ '@SQ SN:ref1 LN:10', 'query1 0 ref1 2 60 2=2X2M * 0 0 TCTAGG ZZZZZZ', 'query2 256 ref1 2 60 2= * 0 0 TC ZZ', ]).replace(' ', '\t') with dataFile(data) as filename: sf = SAMFilter(filename, dropSecondary=True) (alignment,) = list(sf.alignments()) self.assertEqual('query1', alignment.query_name)
def testAlignmentCount(self): """ When all queries have been yielded, the alignment count must be as expected. """ data = '\n'.join([ '@SQ SN:ref1 LN:10', 'query1 0 ref1 2 60 2=2X2M * 0 0 TCTAGG 123456', 'query2 0 ref1 2 60 2= * 0 0 TC XY', ]).replace(' ', '\t') with dataFile(data) as filename: sf = SAMFilter(filename) list(sf.alignments()) self.assertEqual(2, sf.alignmentCount)
def testStoreQueryIds(self): """ If we request that query ids are saved, they must be. """ data = '\n'.join([ '@SQ SN:ref1 LN:10', 'query1 0 ref1 2 60 2=2X2M * 0 0 TCTAGG 123456', 'query2 0 ref1 2 60 2= * 0 0 TC XY', 'query2 0 ref1 2 60 2= * 0 0 TC XY', ]).replace(' ', '\t') with dataFile(data) as filename: sf = SAMFilter(filename, storeQueryIds=True) list(sf.alignments()) self.assertEqual({'query1', 'query2'}, sf.queryIds)
def testMinLength(self): """ A request for reads that are only longer than a certain value should result in the expected result. """ data = '\n'.join([ '@SQ SN:ref1 LN:10', 'query1 0 ref1 2 60 2=2X2M * 0 0 TCTAGG ZZZZZZ', 'query2 0 ref1 2 60 2= * 0 0 TC ZZ', ]).replace(' ', '\t') with dataFile(data) as filename: filterRead = ReadFilter(minLength=6).filter sf = SAMFilter(filename, filterRead=filterRead) (alignment,) = list(sf.alignments()) self.assertEqual('query1', alignment.query_name)
def testNoQuality(self): """ If an alignment has * for the quality string, the filter must return an alignment with a C{None} quality value. """ data = '\n'.join([ '@SQ SN:ref LN:10', 'query1 4 * 0 0 6M * 0 0 TCTAGG *', ]).replace(' ', '\t') with dataFile(data) as filename: sf = SAMFilter(filename) (alignment,) = list(sf.alignments()) self.assertEqual('query1', alignment.query_name) self.assertEqual('TCTAGG', alignment.query_sequence) self.assertIsNone(alignment.query_qualities)
def testMaxScore(self): """ A request for reads with alignment scores no higher than a given value must produce the expected result when some alignments have scores. """ data = '\n'.join([ '@SQ SN:ref1 LN:10', 'query1 0 ref1 2 60 2=2X2M * 0 0 TCTAGG ZZZZZZ AS:i:10', 'query2 0 ref1 2 60 2= * 0 0 TC ZZ', 'query3 0 ref1 2 60 2=2X2M * 0 0 TCTAGG ZZZZZZ AS:i:3', ]).replace(' ', '\t') with dataFile(data) as filename: sf = SAMFilter(filename, maxScore=6) (alignment,) = list(sf.alignments()) self.assertEqual('query3', alignment.query_name)
def testKeepQualityControlFailures(self): """ Keeping matches flagged as quality control failures must give the expected result. """ data = '\n'.join([ '@SQ SN:ref1 LN:10', 'query1 0 ref1 2 60 2=2X2M * 0 0 TCTAGG ZZZZZZ', 'query2 512 ref1 4 60 2= * 0 0 TC ZZ', ]).replace(' ', '\t') with dataFile(data) as filename: sf = SAMFilter(filename, keepQCFailures=True) (alignment1, alignment2) = list(sf.alignments()) self.assertEqual('query1', alignment1.query_name) self.assertEqual('query2', alignment2.query_name)
def testCloseButNoCIGAR(self): """ An unmapped query with no CIGAR string must be passed through unchanged if dropUnmapped is not specified. """ data = '\n'.join([ '@SQ SN:ref LN:10', 'query1 4 * 0 0 * * 0 0 TCTAGG ZZZZZZ', ]).replace(' ', '\t') with dataFile(data) as filename: sf = SAMFilter(filename) (alignment,) = list(sf.alignments()) self.assertEqual('query1', alignment.query_name) self.assertEqual('TCTAGG', alignment.query_sequence) self.assertEqual('ZZZZZZ', ''.join( map(lambda x: chr(x + 33), alignment.query_qualities)))
def testMinAndMaxScore(self): """ A request for reads with alignment scores no lower or higher than given values must produce the expected result. """ data = '\n'.join([ '@SQ SN:ref1 LN:10', 'query1 0 ref1 2 60 2=2X2M * 0 0 TCTAGG ZZZZZZ AS:i:10', 'query2 0 ref1 2 60 2=2X2M * 0 0 TCTAGG ZZZZZZ AS:i:12', 'query3 0 ref1 2 60 2= * 0 0 TC ZZ', 'query4 0 ref1 2 60 2=2X2M * 0 0 TCTAGG ZZZZZZ AS:i:3', 'query5 0 ref1 2 60 2=2X2M * 0 0 TCTAGG ZZZZZZ AS:i:2', ]).replace(' ', '\t') with dataFile(data) as filename: sf = SAMFilter(filename, minScore=3, maxScore=10) (alignment1, alignment2) = list(sf.alignments()) self.assertEqual('query1', alignment1.query_name) self.assertEqual('query4', alignment2.query_name)