def multiPatternMatching(sequence, patterns): bwt = burrowWheelerTransform(sequence) suffixArray = buildSuffixArray(sequence) for match in findPatterns(bwt, patterns): for pos in match: yield suffixArray[pos]
def patternMatchWithMismatches(sequence, patterns, maxMismatches): """ Find locations of Patterns in the Sequence which have <= No of Mismatches """ bwt = burrowWheelerTransform(sequence) suffixArray = buildSuffixArray(sequence) _, lastToFirst = firstLast(bwt) for pattern in patterns: fragmentIndexes = list(findSeedPositions2(bwt, lastToFirst, pattern, maxMismatches)) candidatePos = Counter([suffixArray[y] - offset for offset, x in fragmentIndexes for y in x]) seedPositions = [cp[0] for cp in takewhile(lambda x : x[1] > 1, candidatePos.most_common())] for pos in seedPositions: if isSimilar(sequence[pos:pos + len(pattern)], pattern, maxMismatches): yield pos
def patternMatchWithMismatches(sequence, patterns, maxMismatches): """ Find locations of Patterns in the Sequence which have <= No of Mismatches """ bwt = burrowWheelerTransform(sequence) suffixArray = buildSuffixArray(sequence) _, lastToFirst = firstLast(bwt) for pattern in patterns: fragmentIndexes = list( findSeedPositions2(bwt, lastToFirst, pattern, maxMismatches)) candidatePos = Counter([ suffixArray[y] - offset for offset, x in fragmentIndexes for y in x ]) seedPositions = [ cp[0] for cp in takewhile(lambda x: x[1] > 1, candidatePos.most_common()) ] for pos in seedPositions: if isSimilar(sequence[pos:pos + len(pattern)], pattern, maxMismatches): yield pos
def partialSuffixArray(sequence, freq): return [ (i, v) for i, v in enumerate(buildSuffixArray(sequence)) if v % freq == 0 ]
def testBuildSuffixArray(self): with open('data/suffix/buildsuffixarray.txt') as fp: with open('data/suffix/buildsuffixarray.out', 'w') as output: sequences = [x.strip() for x in fp.readlines()] print ', '.join([str(x) for x in buildSuffixArray(sequences[0])])
def partialSuffixArray(sequence, freq): return [(i, v) for i, v in enumerate(buildSuffixArray(sequence)) if v % freq == 0]
def testBuildSuffixArray(self): with open('data/suffix/buildsuffixarray.txt') as fp: with open('data/suffix/buildsuffixarray.out', 'w') as output: sequences = [x.strip() for x in fp.readlines()] print ', '.join( [str(x) for x in buildSuffixArray(sequences[0])])