Ejemplo n.º 1
0
    def testMinScoreNoScores(self):
        """
        A request for reads with alignment scores no lower than a given value
        must produce an empty result when no alignments have scores.
        """
        data = '\n'.join([
            '@SQ SN:ref1 LN:10',
            'query1 0 ref1 2 60 2=2X2M * 0 0 TCTAGG ZZZZZZ',
            'query2 0 ref1 2 60 2= * 0 0 TC ZZ',
        ]).replace(' ', '\t')

        with dataFile(data) as filename:
            sf = SAMFilter(filename, minScore=6)
            self.assertEqual([], list(sf.alignments()))
Ejemplo n.º 2
0
    def testDropSecondary(self):
        """
        Dropping matches flagged as secondary must give the expected result.
        """
        data = '\n'.join([
            '@SQ SN:ref1 LN:10',
            'query1 0 ref1 2 60 2=2X2M * 0 0 TCTAGG ZZZZZZ',
            'query2 256 ref1 2 60 2= * 0 0 TC ZZ',
        ]).replace(' ', '\t')

        with dataFile(data) as filename:
            sf = SAMFilter(filename, dropSecondary=True)
            (alignment,) = list(sf.alignments())
            self.assertEqual('query1', alignment.query_name)
Ejemplo n.º 3
0
    def testAlignmentCount(self):
        """
        When all queries have been yielded, the alignment count must be
        as expected.
        """
        data = '\n'.join([
            '@SQ SN:ref1 LN:10',
            'query1 0 ref1 2 60 2=2X2M * 0 0 TCTAGG 123456',
            'query2 0 ref1 2 60 2= * 0 0 TC XY',
        ]).replace(' ', '\t')

        with dataFile(data) as filename:
            sf = SAMFilter(filename)
            list(sf.alignments())
            self.assertEqual(2, sf.alignmentCount)
Ejemplo n.º 4
0
    def testStoreQueryIds(self):
        """
        If we request that query ids are saved, they must be.
        """
        data = '\n'.join([
            '@SQ SN:ref1 LN:10',
            'query1 0 ref1 2 60 2=2X2M * 0 0 TCTAGG 123456',
            'query2 0 ref1 2 60 2= * 0 0 TC XY',
            'query2 0 ref1 2 60 2= * 0 0 TC XY',
        ]).replace(' ', '\t')

        with dataFile(data) as filename:
            sf = SAMFilter(filename, storeQueryIds=True)
            list(sf.alignments())
            self.assertEqual({'query1', 'query2'}, sf.queryIds)
Ejemplo n.º 5
0
    def testMinLength(self):
        """
        A request for reads that are only longer than a certain value should
        result in the expected result.
        """
        data = '\n'.join([
            '@SQ SN:ref1 LN:10',
            'query1 0 ref1 2 60 2=2X2M * 0 0 TCTAGG ZZZZZZ',
            'query2 0 ref1 2 60 2= * 0 0 TC ZZ',
        ]).replace(' ', '\t')

        with dataFile(data) as filename:
            filterRead = ReadFilter(minLength=6).filter
            sf = SAMFilter(filename, filterRead=filterRead)
            (alignment,) = list(sf.alignments())
            self.assertEqual('query1', alignment.query_name)
Ejemplo n.º 6
0
    def testNoQuality(self):
        """
        If an alignment has * for the quality string, the filter must
        return an alignment with a C{None} quality value.
        """
        data = '\n'.join([
            '@SQ SN:ref LN:10',
            'query1 4 * 0 0 6M * 0 0 TCTAGG *',
        ]).replace(' ', '\t')

        with dataFile(data) as filename:
            sf = SAMFilter(filename)
            (alignment,) = list(sf.alignments())
            self.assertEqual('query1', alignment.query_name)
            self.assertEqual('TCTAGG', alignment.query_sequence)
            self.assertIsNone(alignment.query_qualities)
Ejemplo n.º 7
0
    def testMaxScore(self):
        """
        A request for reads with alignment scores no higher than a given value
        must produce the expected result when some alignments have scores.
        """
        data = '\n'.join([
            '@SQ SN:ref1 LN:10',
            'query1 0 ref1 2 60 2=2X2M * 0 0 TCTAGG ZZZZZZ AS:i:10',
            'query2 0 ref1 2 60 2= * 0 0 TC ZZ',
            'query3 0 ref1 2 60 2=2X2M * 0 0 TCTAGG ZZZZZZ AS:i:3',
        ]).replace(' ', '\t')

        with dataFile(data) as filename:
            sf = SAMFilter(filename, maxScore=6)
            (alignment,) = list(sf.alignments())
            self.assertEqual('query3', alignment.query_name)
Ejemplo n.º 8
0
    def testKeepQualityControlFailures(self):
        """
        Keeping matches flagged as quality control failures must give the
        expected result.
        """
        data = '\n'.join([
            '@SQ SN:ref1 LN:10',
            'query1 0 ref1 2 60 2=2X2M * 0 0 TCTAGG ZZZZZZ',
            'query2 512 ref1 4 60 2= * 0 0 TC ZZ',
        ]).replace(' ', '\t')

        with dataFile(data) as filename:
            sf = SAMFilter(filename, keepQCFailures=True)
            (alignment1, alignment2) = list(sf.alignments())
            self.assertEqual('query1', alignment1.query_name)
            self.assertEqual('query2', alignment2.query_name)
Ejemplo n.º 9
0
    def testCloseButNoCIGAR(self):
        """
        An unmapped query with no CIGAR string must be passed through
        unchanged if dropUnmapped is not specified.
        """
        data = '\n'.join([
            '@SQ SN:ref LN:10',
            'query1 4 * 0 0 * * 0 0 TCTAGG ZZZZZZ',
        ]).replace(' ', '\t')

        with dataFile(data) as filename:
            sf = SAMFilter(filename)
            (alignment,) = list(sf.alignments())
            self.assertEqual('query1', alignment.query_name)
            self.assertEqual('TCTAGG', alignment.query_sequence)
            self.assertEqual('ZZZZZZ', ''.join(
                map(lambda x: chr(x + 33), alignment.query_qualities)))
Ejemplo n.º 10
0
    def testMinAndMaxScore(self):
        """
        A request for reads with alignment scores no lower or higher than
        given values must produce the expected result.
        """
        data = '\n'.join([
            '@SQ SN:ref1 LN:10',
            'query1 0 ref1 2 60 2=2X2M * 0 0 TCTAGG ZZZZZZ AS:i:10',
            'query2 0 ref1 2 60 2=2X2M * 0 0 TCTAGG ZZZZZZ AS:i:12',
            'query3 0 ref1 2 60 2= * 0 0 TC ZZ',
            'query4 0 ref1 2 60 2=2X2M * 0 0 TCTAGG ZZZZZZ AS:i:3',
            'query5 0 ref1 2 60 2=2X2M * 0 0 TCTAGG ZZZZZZ AS:i:2',
        ]).replace(' ', '\t')

        with dataFile(data) as filename:
            sf = SAMFilter(filename, minScore=3, maxScore=10)
            (alignment1, alignment2) = list(sf.alignments())
            self.assertEqual('query1', alignment1.query_name)
            self.assertEqual('query4', alignment2.query_name)