Python tokenize Examples

Programming Language: Python

Namespace/Package Name: diffengine.tokenization.wikitext_split

Method/Function: tokenize

Examples at hotexamples.com: 3

Python tokenize - 3 examples found. These are the top rated real world Python examples of diffengine.tokenization.wikitext_split.tokenize extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: test.sequence_matcher.py Project: halfak/Difference-Engine

from pprint import pprint

from diffengine.difference import sequence_matcher
from diffengine.tokenization import wikitext_split

for op in sequence_matcher.diff([], wikitext_split.tokenize("Foo bar derp.")):
    pprint(op)

Example #2

Show file

File: test.hierarchical_matcher.token_cluster.py Project: halfak/Difference-Engine

from pprint import pprint

from diffengine.engines import hierarchical_matcher
from diffengine.tokenization import wikitext_split


input = "Foo bar derp."

tokens = wikitext_split.tokenize(input)
print(tokens)
for cluster in hierarchical_matcher.cluster(tokens, wikitext_split):
	pprint(cluster)

print("-----------------------")

input = """
This is a sentence.  This is the end.

This is another paragraph.
"""

tokens = wikitext_split.tokenize(input)
print(tokens)
for cluster in hierarchical_matcher.cluster(tokens, wikitext_split):
	pprint(cluster)

print("-----------------------")

input = """
This is a sentence.  This is the end.

Example #3

Show file

File: test.hierarchical_matcher.py Project: halfak/Difference-Engine

from pprint import pprint

from diffengine.difference import hierarchical_matcher
from diffengine.tokenization import wikitext_split

tokens1 = wikitext_split.tokenize("Foo bar derp.")
print(tokens1)
for i, op in enumerate(hierarchical_matcher.diff([], tokens1)):
    print("#{0}: {1}".format(i+1, repr(op)))

print("-----------------------")

tokens2 = wikitext_split.tokenize("Foo bar derp. Foo bar derp.")
print(tokens2)
for i, op in enumerate(hierarchical_matcher.diff(tokens1, tokens2)):
    print("#{0}: {1}".format(i+1, repr(op)))


print("-----------------------")

tokens3 = wikitext_split.tokenize("Foo bar derp. Foo this is a bar derp.")
print(tokens3)
for i, op in enumerate(hierarchical_matcher.diff(tokens2, tokens3)):
    print("#{0}: {1}".format(i+1, repr(op)))