コード例 #1
0
ファイル: test_cigar.py プロジェクト: TaliVeith/dark-matter
 def testMatchTwoAfter(self):
     """
     Test that a soft-clipped base two sites after a non-soft-clipped site
     returns the correct offset.
     """
     self.assertEqual(
         12,
         softClippedOffset(2,
                           ((0, 10), (1, None), (2, None)),
                           (CMATCH, CSOFT_CLIP, CSOFT_CLIP)))
コード例 #2
0
ファイル: test_cigar.py プロジェクト: TaliVeith/dark-matter
 def testMatchTwoBefore(self):
     """
     Test that a soft-clipped base two sites before a non-soft-clipped site
     returns the correct offset.
     """
     self.assertEqual(
         8,
         softClippedOffset(0,
                           ((0, None), (1, None), (2, 10)),
                           (CSOFT_CLIP, CSOFT_CLIP, CMATCH)))
コード例 #3
0
ファイル: test_cigar.py プロジェクト: TaliVeith/dark-matter
 def testMatchOneAfter(self):
     """
     Test that a soft-clipped base one site after a non-soft-clipped site
     returns the correct offset.
     """
     self.assertEqual(
         11,
         softClippedOffset(1,
                           ((0, 10), (1, None)),
                           (CMATCH, CSOFT_CLIP)))
コード例 #4
0
ファイル: test_cigar.py プロジェクト: TaliVeith/dark-matter
 def testMatchTwoAfterThenHardClips(self):
     """
     Test that a soft-clipped base two sites after a non-soft-clipped site
     returns the correct offset, including when there are also hard clips.
     """
     self.assertEqual(
         12,
         softClippedOffset(
             2,
             ((0, 10), (1, None), (2, None), (3, None), (4, None)),
             (CMATCH, CSOFT_CLIP, CSOFT_CLIP, CHARD_CLIP, CHARD_CLIP)))
コード例 #5
0
ファイル: consensus.py プロジェクト: TaliVeith/dark-matter
def addPairsInfo(pairs, cigarOperations, query, qualities, referenceLength,
                 includeSoftClipped, correspondences, deletions, insertions):
    """
    Add information about matched pairs of nucleotides.

    @param pairs: A C{list} of 2-C{tuple}s of query offset, reference offset.
        Either (but not both) member of each tuple might be C{None} to indicate
        an indel mismatch.
    @param cigarOperations: A C{list} of CIGAR operations corresponding to the
        information in C{pairs}.
    @param query: A C{str} query DNA sequence.
    @param qualities: A C{list} of quality scores.
    @param includeSoftClipped: Include information from read bases that were
        marked as soft-clipped by the algorithm that made the BAM file.
    @param correspondences: A C{defaultdict(list)}, to hold (base, quality)
        scores for when a query offset corresponds to a reference offset.
    @param deletions: A C{set} of C{int} reference offsets that are deleted in
        the query.
    @param insertions: A C{defaultdict(list)}, to hold (base, quality)
        scores for when a query contains an insertion to the reference.
    """
    assert len(pairs) == len(cigarOperations)
    assert not any(pair == (None, None) for pair in pairs)

    inInsertion = False

    for count, ((queryOffset, referenceOffset),
                cigarOperation) in enumerate(zip(pairs, cigarOperations)):

        if queryOffset is None:
            # The query is missing something that is in the reference. So this
            # is a deletion from the reference.
            assert cigarOperation == CDEL
            assert referenceOffset is not None
            deletions[referenceOffset] += 1
            inInsertion = False

        elif referenceOffset is None:
            base = query[queryOffset]
            quality = qualities[queryOffset]

            if cigarOperation == CINS:
                # The query has an insertion (relative to the reference).

                # A CIGAR string shouldn't start with an insertion, IMO.
                # Rather, in such a case, it must start with unmatched
                # (soft-clipped) bases.
                # assert lastReferenceOffset is not None

                lookedBack, iOffset = insertionOffset(count, pairs,
                                                      cigarOperations)
                if not inInsertion:
                    inInsertion = True
                    if iOffset not in insertions:
                        insertions[iOffset] = Insertion(iOffset)
                    insertions[iOffset].start(iOffset if lookedBack else None)

                insertions[iOffset].append(base, quality)
            else:
                assert cigarOperation == CSOFT_CLIP
                inInsertion = False
                if includeSoftClipped:
                    correspondences[softClippedOffset(count, pairs,
                                                      cigarOperations)].append(
                                                          base, quality)
        else:
            # Query and reference offsets are both non-None.
            assert cigarOperation in CONSUMES_REFERENCE
            inInsertion = False
            base = query[queryOffset]
            quality = qualities[queryOffset]
            correspondences[referenceOffset].append(base, quality)