Esempio n. 1
0
 def set_color(self, line):
     diffstring = ""
     sm = StringMatcher(seq1=self.line, seq2=line)
     mb = sm.get_matching_blocks()
     i = 0
     for block in mb:
         diffstring += "\x1b[33m%s\x1b[0m" % self.line[i:block[0]]
         diffstring += self.line[block[0]:block[0] + block[2]]
         i = block[0] + block[2]
     self.colored_line = diffstring
Esempio n. 2
0
  def testEqual(self):
    m = StringMatcher(None, self.s1, self.s1a)
    Ratio = m.ratio()
    self.assertEqual(intr(100*Ratio), 100)

    m = StringMatcher(None, self.s8, self.s8a)
    Ratio = m.ratio()
    self.assertEqual(intr(100*Ratio), 100)

    m = StringMatcher(None, self.s9, self.s9a)
    Ratio = m.ratio()
    self.assertEqual(intr(100*Ratio), 100)
def compare_sentences(new_sentences, old_sentences):
    rows = []
    for new_sentence in new_sentences:
        max_score = 0
        for old_sentence in old_sentences:
            if StringMatcher(None, new_sentence[0],
                             old_sentence).ratio() > max_score:
                max_score = StringMatcher(None, new_sentence[0],
                                          old_sentence).ratio()
            if max_score > .75:
                break
        rows.append(
            dict(sentence_id=new_sentence[1],
                 max_score=max_score,
                 sentence_length=len(new_sentence[0].split(" "))))
    return rows
def compare_single_sentence(new_sentence, old_sentences):
    max_score = 0
    old_sentence_id = ''
    for old_sentence in old_sentences:
        if StringMatcher(None, new_sentence['sentence'],
                         old_sentence['sentence']).ratio() > max_score:
            max_score = StringMatcher(None, new_sentence['sentence'],
                                      old_sentence['sentence']).ratio()
            old_sentence_id = old_sentence['id']
        if max_score == 1:
            break
    results = dict(sentence_id=new_sentence['id'],
                   old_sentence_id=old_sentence_id,
                   max_score=max_score,
                   sentence_length=len(new_sentence['sentence'].split(" ")))
    return results
Esempio n. 5
0
  def testPartialRatio(self):
    if len(self.s1) <= len(self.s3):
      shorter = self.s1
      longer = self.s3
    else:
      shorter = self.s3
      longer = self.s1

    m = StringMatcher(None, shorter, longer)
    blocks = m.get_matching_blocks()
    # each block represents a sequence of matching characters in a string
    # of the form (idx_1, idx_2, len)
    # the best partial match will block align with at least one of those blocks
    #   e.g. shorter = "abcd", longer = XXXbcdeEEE
    #   block = (1,3,3)
    #   best score === ratio("abcd", "Xbcd")
    scores = []
    for block in blocks:
      long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0
      long_end = long_start + len(shorter)
      long_substr = longer[long_start:long_end]

      m2 = StringMatcher(None, shorter, long_substr)
      r = m2.ratio()
      if r > .995:
          return 100
      else:
          scores.append(r)

    self.assertEqual(intr(100*max(scores)), 100)
Esempio n. 6
0
def partial_ratio2(s1, s2):
    """"Return the ratio of the most similar substring
    as a number between 0 and 100."""

    if s1 is None:
        raise TypeError("s1 is None")
    if s2 is None:
        raise TypeError("s2 is None")
    s1, s2 = utils.make_type_consistent(s1, s2)
    if len(s1) == 0 or len(s2) == 0:
        return 0

    
    shorter = s1
    longer = s2


    m = SequenceMatcher(None, shorter, longer)
    blocks = m.get_matching_blocks()

    # each block represents a sequence of matching characters in a string
    # of the form (idx_1, idx_2, len)
    # the best partial match will block align with at least one of those blocks
    #   e.g. shorter = "abcd", longer = XXXbcdeEEE
    #   block = (1,3,3)
    #   best score === ratio("abcd", "Xbcd")
    scores = []
    for block in blocks:
        long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0
        long_end = long_start + len(shorter)
        long_substr = longer[long_start:long_end]

        m2 = SequenceMatcher(None, shorter, long_substr)
        r = m2.ratio()
        if r > .995:
            return 100
        else:
            scores.append(r)

    return int(100 * max(scores))
Esempio n. 7
0
 def testCaseInsensitive(self):
   m = StringMatcher(None, self.s1, self.s2)
   Ratio = m.ratio()
   self.assertNotEqual(intr(100*Ratio), 100)