from pprint import pprint from diffengine.difference import sequence_matcher from diffengine.tokenization import wikitext_split for op in sequence_matcher.diff([], wikitext_split.tokenize("Foo bar derp.")): pprint(op)
from pprint import pprint from diffengine.engines import hierarchical_matcher from diffengine.tokenization import wikitext_split input = "Foo bar derp." tokens = wikitext_split.tokenize(input) print(tokens) for cluster in hierarchical_matcher.cluster(tokens, wikitext_split): pprint(cluster) print("-----------------------") input = """ This is a sentence. This is the end. This is another paragraph. """ tokens = wikitext_split.tokenize(input) print(tokens) for cluster in hierarchical_matcher.cluster(tokens, wikitext_split): pprint(cluster) print("-----------------------") input = """ This is a sentence. This is the end.
from pprint import pprint from diffengine.difference import hierarchical_matcher from diffengine.tokenization import wikitext_split tokens1 = wikitext_split.tokenize("Foo bar derp.") print(tokens1) for i, op in enumerate(hierarchical_matcher.diff([], tokens1)): print("#{0}: {1}".format(i+1, repr(op))) print("-----------------------") tokens2 = wikitext_split.tokenize("Foo bar derp. Foo bar derp.") print(tokens2) for i, op in enumerate(hierarchical_matcher.diff(tokens1, tokens2)): print("#{0}: {1}".format(i+1, repr(op))) print("-----------------------") tokens3 = wikitext_split.tokenize("Foo bar derp. Foo this is a bar derp.") print(tokens3) for i, op in enumerate(hierarchical_matcher.diff(tokens2, tokens3)): print("#{0}: {1}".format(i+1, repr(op)))