Beispiel #1
0
    def testLeftInsert(self):
        """
        Test that an insert on the left works.
        """
        self.assertEqual(
            '2I2M',
            makeCigar('--ACGT  ', 'ATAC', noEdgeInsertions=False))

        # When edge insertions are not allowed, the first two bases are
        # soft-clipped (as in tests above).
        self.assertEqual('2S2M', makeCigar('--ACGT  ',
                                           'ATAC'))
Beispiel #2
0
 def testRightClippingPaddedOne(self):
     """
     Test that right soft clipping of one base works when the reference
     is padded on the right.
     """
     self.assertEqual('2M1S', makeCigar('  ACGT ',
                                        '    ATA'))
Beispiel #3
0
 def testRightClippingPaddedTwo(self):
     """
     Test that right soft clipping of two bases works when the reference
     is padded on the right.
     """
     self.assertEqual('2M2S', makeCigar('  ACGT  ',
                                        '    ATAA'))
Beispiel #4
0
 def testEqualStrings(self):
     """
     If the query exactly overlaps the reference we should get 'M' as many
     times as the length of the sequences.
     """
     self.assertEqual('4M', makeCigar('ACGT',
                                      'ATGG'))
Beispiel #5
0
 def testRightClippingUnpaddedTwo(self):
     """
     Test that right soft clipping of two bases works when the reference
     does not have enough padding to match the length of the query.
     """
     self.assertEqual('2M2S', makeCigar('  ACGT',
                                        '    ATAA'))
Beispiel #6
0
 def testTwoBasesFinalMatch(self):
     """
     If the query has two bases that matches the end of the reference, 1M1M
     should be the result.
     """
     self.assertEqual('2M', makeCigar('ACGT',
                                      '  AT'))
Beispiel #7
0
 def testOneBaseFinalMatch(self):
     """
     If the query has one base that matches the end of the reference, 1M
     should be the result.
     """
     self.assertEqual('1M', makeCigar('ACGT',
                                      '   A'))
Beispiel #8
0
 def testLeftClippingTwo(self):
     """
     Test that left soft clipping of two bases works.
     """
     self.assertEqual('2S1M', makeCigar('  ACGT',
                                        'TCA'))
Beispiel #9
0
 def testLeftClippingOne(self):
     """
     Test that left soft clipping of one base works.
     """
     self.assertEqual('1S2M', makeCigar(' ACGT',
                                        'TAT'))
Beispiel #10
0
def makeBAM(template, bamReferences=None, fastaReferences=None):
    """
    A context manager decorator to make a simple BAM file from a template.
    Note that this code invokes samtools.

    @param template: An iterable of C{str} sequences. The first will be treated
        as the reference, and then subsequent pairs (if any) will be treated as
        read and quality strings. Reads and quality strings can be indented
        with spaces to show where the read aligns with the reference.
    @return: A context manager that produces a 2-tuple containing the reference
        C{DNARead} instance and the C{Path} of the BAM file.
    """
    if len(template) % 2 != 1:
        raise ValueError(
            'The template must have an odd number of strings, specifying the '
            'reference sequence, then zero or more read/quality pairs.')

    leftPaddedReference = template[0]
    templateSequence = leftPaddedReference.lstrip().replace('-', '')

    if bamReferences is None:
        matchedReference = DNARead(REF_ID, templateSequence)
        bamReferences = Reads([matchedReference])
    else:
        matchedReference = bamReferences[0]
        # Sanity check: The first BAM reference must have the same sequence
        # as the template.
        assert matchedReference.sequence == templateSequence
        bamReferences = Reads(bamReferences)

    fastaReferences = Reads(
        bamReferences if fastaReferences is None else fastaReferences)

    nSeqs = (len(template) - 1) >> 1
    dirname = mkdtemp(prefix='test-consensus-')
    e = Executor()

    try:
        fastaFile = Path(dirname) / 'references.fasta'
        samFile = Path(dirname) / 'file.sam'
        bamFile = Path(dirname) / 'file.bam'

        fastaReferences.save(fastaFile)

        with open(samFile, 'w') as fp:
            for reference in bamReferences:
                print(f'@SQ\tSN:{reference.id}\tLN:{len(reference)}', file=fp)

            for count in range(nSeqs):
                leftPaddedQuery = template[count * 2 + 1].rstrip()
                leftPaddedQuality = template[count * 2 + 2].rstrip()
                assert len(leftPaddedQuery) == len(leftPaddedQuality)
                query = leftPaddedQuery.lstrip()
                quality = leftPaddedQuality.lstrip()
                queryNoGaps = qualityNoGaps = ''
                for queryBase, qualityBase in zip(query, quality):
                    if queryBase != '-':
                        queryNoGaps += queryBase
                        qualityNoGaps += qualityBase

                print(
                    '\t'.join(
                        map(
                            str,
                            (
                                f'read{count}',  # QNAME (query name)
                                0,  # FLAGS
                                matchedReference.id,  # RNAME (reference name)
                                matchOffset(leftPaddedReference,
                                            leftPaddedQuery) + 1,
                                30,  # MAPQ (mapping quality)
                                makeCigar(leftPaddedReference,
                                          leftPaddedQuery),  # CIGAR
                                '*',  # MRNM (mate reference name)
                                0,  # MPOS (mate position)
                                0,  # ISIZE (insert size)
                                queryNoGaps,  # SEQ
                                qualityNoGaps,  # QUAL
                            ))),
                    file=fp)

        e.execute(f'samtools sort -O BAM --write-index -o {str(bamFile)!r} '
                  f'{str(samFile)!r}')
        yield (fastaFile, bamFile)
    finally:
        # import sys; print(f'{samFile}', file=sys.stderr)
        e.execute(f'rm -fr {dirname!r}')