def set_color(self, line): diffstring = "" sm = StringMatcher(seq1=self.line, seq2=line) mb = sm.get_matching_blocks() i = 0 for block in mb: diffstring += "\x1b[33m%s\x1b[0m" % self.line[i:block[0]] diffstring += self.line[block[0]:block[0] + block[2]] i = block[0] + block[2] self.colored_line = diffstring
def testEqual(self): m = StringMatcher(None, self.s1, self.s1a) Ratio = m.ratio() self.assertEqual(intr(100*Ratio), 100) m = StringMatcher(None, self.s8, self.s8a) Ratio = m.ratio() self.assertEqual(intr(100*Ratio), 100) m = StringMatcher(None, self.s9, self.s9a) Ratio = m.ratio() self.assertEqual(intr(100*Ratio), 100)
def compare_sentences(new_sentences, old_sentences): rows = [] for new_sentence in new_sentences: max_score = 0 for old_sentence in old_sentences: if StringMatcher(None, new_sentence[0], old_sentence).ratio() > max_score: max_score = StringMatcher(None, new_sentence[0], old_sentence).ratio() if max_score > .75: break rows.append( dict(sentence_id=new_sentence[1], max_score=max_score, sentence_length=len(new_sentence[0].split(" ")))) return rows
def compare_single_sentence(new_sentence, old_sentences): max_score = 0 old_sentence_id = '' for old_sentence in old_sentences: if StringMatcher(None, new_sentence['sentence'], old_sentence['sentence']).ratio() > max_score: max_score = StringMatcher(None, new_sentence['sentence'], old_sentence['sentence']).ratio() old_sentence_id = old_sentence['id'] if max_score == 1: break results = dict(sentence_id=new_sentence['id'], old_sentence_id=old_sentence_id, max_score=max_score, sentence_length=len(new_sentence['sentence'].split(" "))) return results
def testPartialRatio(self): if len(self.s1) <= len(self.s3): shorter = self.s1 longer = self.s3 else: shorter = self.s3 longer = self.s1 m = StringMatcher(None, shorter, longer) blocks = m.get_matching_blocks() # each block represents a sequence of matching characters in a string # of the form (idx_1, idx_2, len) # the best partial match will block align with at least one of those blocks # e.g. shorter = "abcd", longer = XXXbcdeEEE # block = (1,3,3) # best score === ratio("abcd", "Xbcd") scores = [] for block in blocks: long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0 long_end = long_start + len(shorter) long_substr = longer[long_start:long_end] m2 = StringMatcher(None, shorter, long_substr) r = m2.ratio() if r > .995: return 100 else: scores.append(r) self.assertEqual(intr(100*max(scores)), 100)
def partial_ratio2(s1, s2): """"Return the ratio of the most similar substring as a number between 0 and 100.""" if s1 is None: raise TypeError("s1 is None") if s2 is None: raise TypeError("s2 is None") s1, s2 = utils.make_type_consistent(s1, s2) if len(s1) == 0 or len(s2) == 0: return 0 shorter = s1 longer = s2 m = SequenceMatcher(None, shorter, longer) blocks = m.get_matching_blocks() # each block represents a sequence of matching characters in a string # of the form (idx_1, idx_2, len) # the best partial match will block align with at least one of those blocks # e.g. shorter = "abcd", longer = XXXbcdeEEE # block = (1,3,3) # best score === ratio("abcd", "Xbcd") scores = [] for block in blocks: long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0 long_end = long_start + len(shorter) long_substr = longer[long_start:long_end] m2 = SequenceMatcher(None, shorter, long_substr) r = m2.ratio() if r > .995: return 100 else: scores.append(r) return int(100 * max(scores))
def testCaseInsensitive(self): m = StringMatcher(None, self.s1, self.s2) Ratio = m.ratio() self.assertNotEqual(intr(100*Ratio), 100)