Example #1
0
 def test_insert(self):
     self.assertEqual(compare('abc', 'xabc'), 1)
     self.assertEqual(compare('abc', 'axbc'), 1)
     self.assertEqual(compare('abc', 'abxc'), 1)
     self.assertEqual(compare('abc', 'abcx'), 1)
     self.assertEqual(compare('abc', 'xxabc'), 2)
     self.assertEqual(compare('abc', 'axxbc'), 2)
     self.assertEqual(compare('abc', 'abxxc'), 2)
     self.assertEqual(compare('abc', 'abcxx'), 2)
     self.assertEqual(compare('abc', 'xabcx'), 2)
Example #2
0
 def test_delete(self):
     self.assertEqual(compare('abc', 'ab'), 1)
     self.assertEqual(compare('abc', 'ac'), 1)
     self.assertEqual(compare('abc', 'bc'), 1)
     self.assertEqual(compare('a', 'abc'), 2)
     self.assertEqual(compare('b', 'abc'), 2)
     self.assertEqual(compare('c', 'abc'), 2)
Example #3
0
 def test_replace(self):
     self.assertEqual(compare('abc', 'xbc'), 1)
     self.assertEqual(compare('abc', 'axc'), 1)
     self.assertEqual(compare('abc', 'abx'), 1)
     self.assertEqual(compare('abc', 'xxc'), 2)
     self.assertEqual(compare('abc', 'axx'), 2)
     self.assertEqual(compare('abc', 'xbx'), 2)
Example #4
0
    def compress(self):
        # Merging to-be-merged list with template list
        for seq in tqdm(self.tbmergedList):
            mergeCandidatesD1 = [
            ]  # List containing templates that each sequence might be merged with distance of 1,
            mergeCandidatesD2 = []  # and distance of 2
            seqReadCount = seq[1][0]
            ampID1, ampID2 = extractAmpID(seq[1][2])

            self.numMergeAttempts += seqReadCount

            for template in self.templateNestedList[
                    ampID1]:  # Get edit distance between sequence and every applicable template
                dist = compare(seq[0], template[0])
                if dist != -1:  # If distance is not more than 2, put template in consideration for merge
                    if dist == 1:
                        mergeCandidatesD1.append(template)
                    else:
                        mergeCandidatesD2.append(template)

            numCandidates = len(mergeCandidatesD1) + len(mergeCandidatesD2)
            if numCandidates > 0:
                self.mergedCount += seqReadCount
                if numCandidates > 1:
                    self.mergedUnsureCount += seqReadCount
                if mergeCandidatesD1:  # Prioritize templates that are a distance of 1, rather than 2, from the current sequence
                    splitValue = seqReadCount / len(
                        mergeCandidatesD1
                    )  # Allocate read count equally among templates equally similar to sequence
                    self.mergedD1Count += seqReadCount
                    for template in mergeCandidatesD1:
                        template[1][
                            0] += splitValue  # Increase total read count
                        template[1][
                            1] += splitValue  # Increase read count of merges
                else:
                    splitValue = seqReadCount / len(mergeCandidatesD2)
                    self.mergedD2Count += seqReadCount
                    for template in mergeCandidatesD2:
                        template[1][0] += splitValue
                        template[1][1] += splitValue

            else:
                if seqReadCount >= self.j3x_readDeletorThreshold:  # If we can't merge the sequence but it has a high read depth
                    self.leftoverList.append(seq)
                else:
                    self.discardedList.append(seq)
                    self.discardCountList[ampID1] += 1  # Discard
                    self.failedMergeAndDiscarded += 1
        # Combine the newly reinforced templates with the leftovers for inclusion in j3x
        j3xSeqs = self.templateFlatList + self.leftoverList
        return j3xSeqs
Example #5
0
 def test_damerau_levenshtein(self):
     for i, s1 in enumerate(PATTERNS):
         for j, s2 in enumerate(PATTERNS):
             self.assertEqual(compare(s1, s2, True), MATRIX_DL[i][j])
Example #6
0
 def test_beyond(self):
     self.assertEqual(compare('abc', 'def'), -1)
Example #7
0
 def test_emptystr(self):
     self.assertEqual(compare('', ''), 0)
     self.assertEqual(compare('', 'a'), 1)
     self.assertEqual(compare('', 'ab'), 2)
     self.assertEqual(compare('', 'abc'), -1)
     self.assertEqual(compare('abc', ''), -1)
Example #8
0
 def test_transpose(self):
     self.assertEqual(compare('abc', 'bac', True), 1)
     self.assertEqual(compare('abc', 'acb', True), 1)
     self.assertEqual(compare('abc', 'cba', True), 2)
     self.assertEqual(compare('abc', 'ba', True), 2)
     self.assertEqual(compare('abc', 'ca', True), 2)
Example #9
0
 def test_equal(self):
     self.assertEqual(compare('abc', 'abc'), 0)
Example #10
0
 def test_insert_delete(self):
     self.assertEqual(compare('abcde', 'eabcd'), 2)
     self.assertEqual(compare('abcde', 'acdeb'), 2)
     self.assertEqual(compare('abcde', 'abdec'), 2)
     self.assertEqual(compare('ababa', 'babab'), 2)