def test_unicode_marks(): a = u"ဥပမာ။ ကာမှန်လား။" b = u"ဥပမာ။ ကောင်းမှန်လား။" textdiff = thanlwinsoft.translate.textdiff.SequenceMatcher(None, a, b) textdiff_matches = textdiff.get_matching_blocks() sys.stderr.write(str(textdiff_matches) + "\n") expected = [(0, 0, 6), (8, 12, 8), (len(a), len(b), 0)] assert (textdiff_matches == expected)
def test_unicode_marks() : a =u"ဥပမာ။ ကာမှန်လား။" b =u"ဥပမာ။ ကောင်းမှန်လား။" textdiff = thanlwinsoft.translate.textdiff.SequenceMatcher(None, a, b) textdiff_matches = textdiff.get_matching_blocks() sys.stderr.write( str(textdiff_matches) + "\n") expected = [(0, 0, 6), (8, 12, 8), (len(a), len(b), 0)] assert(textdiff_matches == expected)
def test_unicode_with_junk() : a =u" ဥပမာ။ ကာမှန်လား။\t " b =u"ဥပမာ။ကောင်းမှန်လား။ " #textdiff = thanlwinsoft.translate.textdiff.SequenceMatcher(lambda x : x in " \t", a, b) textdiff = thanlwinsoft.translate.textdiff.SequenceMatcher(None, a, b) textdiff_matches = textdiff.get_matching_blocks() sys.stderr.write( str(textdiff_matches) + "\n") # the space at the end could legitimately match just after the tab as well expected = [Match(1, 0, 5), Match(9, 11, 8), Match(31, 19, 1), Match(len(a), len(b), 0)] assert(textdiff_matches == expected)
def test_unicode_with_junk(): a = u" ဥပမာ။ ကာမှန်လား။\t " b = u"ဥပမာ။ကောင်းမှန်လား။ " #textdiff = thanlwinsoft.translate.textdiff.SequenceMatcher(lambda x : x in " \t", a, b) textdiff = thanlwinsoft.translate.textdiff.SequenceMatcher(None, a, b) textdiff_matches = textdiff.get_matching_blocks() sys.stderr.write(str(textdiff_matches) + "\n") # the space at the end could legitimately match just after the tab as well expected = [ Match(1, 0, 5), Match(9, 11, 8), Match(31, 19, 1), Match(len(a), len(b), 0) ] assert (textdiff_matches == expected)
def run_test(junk, a, b): difflibmatcher = difflib.SequenceMatcher(junk, a, b) textdiff = thanlwinsoft.translate.textdiff.SequenceMatcher(junk, a, b) textdiff_matches = textdiff.get_matching_blocks() difflib_matches = difflibmatcher.get_matching_blocks() textdiff_match_len = 0 difflib_match_len = 0 sys.stderr.write(str(textdiff_matches) + "\n") sys.stderr.write(str(difflib_matches) + "\n") for block in textdiff_matches: textdiff_match_len += block[2] for block in difflib_matches: difflib_match_len += block[2] assert (textdiff_match_len == difflib_match_len) # if (difflibmatcher.get_matching_blocks() != textdiff.get_matching_blocks()) : # thanlwinsoft.translate.textdiff.markup_deltas(a, b, difflibmatcher.get_matching_blocks()) # thanlwinsoft.translate.textdiff.markup_deltas(a, b, textdiff.get_matching_blocks()) # assert(difflibmatcher.get_matching_blocks() == textdiff.get_matching_blocks()) return [textdiff_matches, difflib_matches]
def run_test(junk, a, b) : difflibmatcher = difflib.SequenceMatcher(junk, a, b) textdiff = thanlwinsoft.translate.textdiff.SequenceMatcher(junk, a, b) textdiff_matches = textdiff.get_matching_blocks() difflib_matches = difflibmatcher.get_matching_blocks() textdiff_match_len = 0 difflib_match_len = 0 sys.stderr.write( str(textdiff_matches) + "\n") sys.stderr.write( str(difflib_matches) + "\n") for block in textdiff_matches : textdiff_match_len += block[2] for block in difflib_matches : difflib_match_len += block[2] assert(textdiff_match_len == difflib_match_len) # if (difflibmatcher.get_matching_blocks() != textdiff.get_matching_blocks()) : # thanlwinsoft.translate.textdiff.markup_deltas(a, b, difflibmatcher.get_matching_blocks()) # thanlwinsoft.translate.textdiff.markup_deltas(a, b, textdiff.get_matching_blocks()) # assert(difflibmatcher.get_matching_blocks() == textdiff.get_matching_blocks()) return [textdiff_matches, difflib_matches]