Python Tokenizer Examples

Programming Language: Python

Namespace/Package Name: mathy_core

Class/Type: Tokenizer

Examples at hotexamples.com: 8

Python Tokenizer - 8 examples found. These are the top rated real world Python examples of mathy_core.Tokenizer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Tokenizer(7)

tokenize(3)

Frequently Used Methods

Tokenizer (7)

tokenize (3)

Example #1

Show file

File: test_tokenizer.py Project: mathy/mathy_core

def test_tokenizer_errors() -> None:

    text = "4x + 2x^3 * 7\\"
    tokenizer = Tokenizer()

    with pytest.raises(ValueError):
        tokenizer.tokenize(text)

Example #2

Show file

File: test_tokenizer.py Project: mathy/mathy_core

def test_tokenizer_ignore_padding() -> None:
    """When specified, the tokenizer will return tokens for padding characters
    to allow exact reproduction of the input string from its tokens."""
    text = "4x + 2y^7 - 6"
    # exclude_padding is True by default
    no_padding: List[Token] = Tokenizer().tokenize(text)
    # When including padding, spaces are preserved
    padding: List[Token] = Tokenizer(exclude_padding=False).tokenize(text)
    assert len(padding) == len(no_padding) + 4

Example #3

Show file

File: test_tokenizer.py Project: mathy/mathy_core

def test_tokenizer_tokenize() -> None:

    text = "4x + 2x^3 * 7!"
    tokenizer = Tokenizer()
    tokens: List[Token] = tokenizer.tokenize(text)

    for token in tokens:
        print(token)
        assert token.type <= TOKEN_TYPES.EOF
        assert token.value is not None

Example #4

Show file

File: test_tokenizer.py Project: mathy/mathy_core

def test_tokenizer_manual_verification() -> None:
    """Simplest conceptual example verifying the tokenizer maps to
    expectations."""

    manual_tokens: List[Token] = [
        Token("4", TOKEN_TYPES.Constant),
        Token("x", TOKEN_TYPES.Variable),
        Token("+", TOKEN_TYPES.Plus),
        Token("2", TOKEN_TYPES.Constant),
        Token("", TOKEN_TYPES.EOF),
    ]
    auto_tokens: List[Token] = Tokenizer().tokenize("4x + 2")

    for i, token in enumerate(manual_tokens):
        assert auto_tokens[i].value == token.value
        assert auto_tokens[i].type == token.type

Example #5

Show file

from typing import List

from mathy_core import Token, Tokenizer

text = "4x + 2x^3 * 7x"
tokenizer = Tokenizer()
tokens: List[Token] = tokenizer.tokenize(text)

for token in tokens:
    print(f"type: {token.type}, value: {token.value}")

Example #6

Show file

File: tokenizer_manual.py Project: justindujardin/mathy

from typing import List

from mathy_core import Token, TOKEN_TYPES, Tokenizer

manual_tokens: List[Token] = [
    Token("4", TOKEN_TYPES.Constant),
    Token("x", TOKEN_TYPES.Variable),
    Token("+", TOKEN_TYPES.Plus),
    Token("2", TOKEN_TYPES.Constant),
    Token("", TOKEN_TYPES.EOF),
]
auto_tokens: List[Token] = Tokenizer().tokenize("4x + 2")

for i, token in enumerate(manual_tokens):
    assert auto_tokens[i].value == token.value
    assert auto_tokens[i].type == token.type

Example #7

Show file

from mathy_core import (
    BinaryExpression,
    ExpressionParser,
    MathExpression,
    Token,
    Tokenizer,
    TreeLayout,
    TreeMeasurement,
    VariableExpression,
    testing,
)
from wasabi import msg

from mathy_envs import MathyEnvState, MathyObservation

tokenizer = Tokenizer()
parser = ExpressionParser()

expression_re = r"<code>([a-z\_]*):([\d\w\^\*\+\-\=\/\.\s\(\)\[\]]*)<\/code>"
rules_matcher_re = r"`rule_tests:([a-z\_]*)`"
snippet_matcher_re = r"```[pP]ython[\n]+{!\.(\/snippets\/[0-9a-z\_\/]+).py!}[\n]+```"
# Add animations? http://zulko.github.io/blog/2014/09/20/vector-animations-with-python/

# TODO: add links to code highlight blocks next to clipboard
link_template = "https://colab.research.google.com/github/justindujardin/mathy/blob/master/libraries/website/docs{}.ipynb"  # noqa


def to_math_ml_fragment(match):
    global parser
    match = match.group(1)
    try:

Example #8

Show file

File: test_tokenizer.py Project: mathy/mathy_core

def test_tokenizer_identify_functions() -> None:
    """The tokenizer can deal with known function expressions"""
    tokens: List[Token] = Tokenizer().tokenize("sgn(-3)")
    assert tokens[0].type == TOKEN_TYPES.Function
    assert tokens[0].value == "sgn"