예제 #1
0
파일: test.py 프로젝트: sethjust/bwt_markov
  def test_list_built(self):
    bwt = BWT([1, 2])

    self.assertEqual(bwt.table[0], [BWT.END, 2, 1])
    self.assertEqual(bwt.transform(), [1, 2, BWT.END])
    self.assertEqual(bwt.C(1), 1) # There is one 1 in the string
    self.assertEqual(bwt.F(1, 1), 1) # There is one 1 in the last one characters
예제 #2
0
파일: test.py 프로젝트: sethjust/bwt_markov
  def test_alignment(self):
    bwt = BWT(list("banana"))
#    bwt.print_table()
    string = "ana" # This can be a string, because it supports random access and slicing.

    self.assertEqual(bwt.L(string), 3)
    self.assertEqual(bwt.U(string), 4)
    self.assertEqual(sorted(bwt.get_start_indices(string)), [3,5]) # return is index of last token in match
예제 #3
0
class MarkovModel:
  def __init__(self, string):
    self.string = string
    self.bwt = BWT(self.string)

  def get_n_tokens(self, context, n):
    '''
    Returns up to n tokens that follow the list of tokens given in context in
    the source string, or None if no such tokens exist.
    '''
    indices = self.bwt.get_start_indices(context)

    if indices == []:
      return None

    index = choice(indices)
    return self.get_n_gram_at_index(n, index)

  def get_n_gram_at_index(self, n, index):
    return self.string[index+1:min(index+1+n ,len(self.string)-1)]

  def get_all_possible_n_grams(self, context, n):
    indices = self.bwt.get_start_indices(context)
    return (self.get_n_gram_at_index(n, index) for index in indices)
예제 #4
0
class ApproximateMatcher:
    def __init__(self, target):
        self._text = target + '$'
        self._bwt = BWT(self._text)

    # return indices in target that contain
    # matches of string pattern with up to d
    # mismatches
    def get_matches(self, pattern, d):
        # initialze seed and check object
        seed_checker = SeedChecker(pattern, d)

        # for each seed k-mer in pattern
        for seed, seed_index in seed_checker.enumerate():
            # find exact matches of seed using BWT
            indices = self._bwt.get_matches(seed)
            # add candidate approximate matches based on
            # seed exact matches
            seed_checker.add_candidates(indices, seed_index)
        # verify that candidate approximate matches are within
        # minimum edit distance, and return final matches
        matches = seed_checker.filter_candidates(self._text)
        return matches
예제 #5
0
class ApproximateMatcher:
    def __init__(self, target):
        self._text = target + "$"
        self._bwt = BWT(self._text)

    # return indices in target that contain
    # matches of string pattern with up to d
    # mismatches
    def get_matches(self, pattern, d):
        # initialze seed and check object
        seed_checker = SeedChecker(pattern, d)

        # for each seed k-mer in pattern
        for seed, seed_index in seed_checker.enumerate():
            # find exact matches of seed using BWT
            indices = self._bwt.get_matches(seed)
            # add candidate approximate matches based on
            # seed exact matches
            seed_checker.add_candidates(indices, seed_index)
        # verify that candidate approximate matches are within
        # minimum edit distance, and return final matches
        matches = seed_checker.filter_candidates(self._text)
        return matches
예제 #6
0
 def __init__(self, string):
   self.string = string
   self.bwt = BWT(self.string)
예제 #7
0
 def __init__(self, target):
     self._text = target + '$'
     self._bwt = BWT(self._text)
예제 #8
0
 def __init__(self, target):
     self._text = target + "$"
     self._bwt = BWT(self._text)