Python tokenize Examples, src.tokenizer.tokenize Python Examples

Example #1

0

Show file

File: tokenizer_test.py Project: thelennylord/yamu

def test_label_name_containing_whitespaces():
    commands = ["foo bar:", "execute run", "    say baz"]
    try:
        tokenize([(i + 1, command) for i, command in enumerate(commands)])
    except ValueError as error:
        assert error.args[0].startswith("Unknown or incomplete command")
    else:
        assert False

Example #2

0

Show file

File: tokenizer_test.py Project: thelennylord/yamu

def test_unknown_label():
    commands = ["execute run", "    redo foo"]
    try:
        tokenize([(i + 1, command) for i, command in enumerate(commands)])
    except NameError as error:
        assert error.args[0].startswith("Unknown label")
    else:
        assert False

Example #3

0

Show file

File: tokenizer_test.py Project: thelennylord/yamu

def test_label_name_beginning_with_number():
    commands = ["1foo:", "execute run", "    say bar"]
    try:
        tokenize([(i + 1, command) for i, command in enumerate(commands)])
    except SyntaxError as error:
        assert error.args[0].startswith("Label names must satisfy")
    else:
        assert False

Example #4

0

Show file

File: tokenizer_test.py Project: thelennylord/yamu

def test_invalid_indentation():
    commands = ["say foo", "    say bar"]
    try:
        tokenize([(i + 1, command) for i, command in enumerate(commands)])
    except IndentationError:
        assert True
    else:
        assert False

Example #5

0

Show file

File: tokenizer_test.py Project: thelennylord/yamu

def test_invalid_command():
    commands = ["foo bar"]
    try:
        tokenize([(i + 1, command) for i, command in enumerate(commands)])
    except ValueError as error:
        assert error.args[0].startswith("Unknown or incomplete command")
    else:
        assert False

Example #6

0

Show file

File: tokenizer_test.py Project: thelennylord/yamu

def test_excess_indentation():
    commands = ["execute as @a run", "        say foo"]
    try:
        tokenize([(i + 1, command) for i, command in enumerate(commands)])
    except IndentationError:
        assert True
    else:
        assert False

Example #7

0

Show file

File: tokenizer_test.py Project: thelennylord/yamu

def test_invalid_indentation_space():
    commands = ["execute as @a run", " say abc"]
    try:
        tokenize([(i + 1, command) for i, command in enumerate(commands)])
    except IndentationError:
        assert True
    else:
        assert False

Example #8

0

Show file

File: tokenizer_test.py Project: thelennylord/yamu

def test_non_indented_redo():
    commands = ["say abc", "redo"]
    try:
        tokenize([(i + 1, command) for i, command in enumerate(commands)])
    except ValueError as error:
        assert error.args[0].startswith(
            "Command 'redo' must be indented in an execute command")
    else:
        assert False

Example #9

0

Show file

File: tokenizer_test.py Project: thelennylord/yamu

def test_redo_having_more_arguments():
    commands = ["foo:", "execute run", "    redo foo bar"]
    try:
        tokenize([(i + 1, command) for i, command in enumerate(commands)])
    except SyntaxError as error:
        assert error.args[0].startswith(
            "Command 'redo' has more than 1 argument at line ")
    else:
        assert False

Example #10

0

Show file

def test_tokinzer():
    pred = tokenize("これはテストのテキストです。ファミリーマート")
    assert ["これ", "は", "テスト", "の", "テキスト", "です", "。", "ファミリーマート"] == pred

    pred = tokenize("「これは、テストのテキストです。ファミリーマート」")
    assert ["これ", "は", "テスト", "の", "テキスト", "です", "。", "ファミリーマート"] == pred

    pred = tokenize("これは、テストのテキストです。ファミリーマート🍎")
    assert ["これ", "は", "テスト", "の", "テキスト", "です", "。", "ファミリーマート"] == pred

Example #11

0

Show file

File: char_mode.py Project: socketteer/transformer-tests

def char_tokenization_2(input):
    tokens = []
    for i in range(len(input)):
        if input[i] == ' ':
            # uses token with prepended space
            token = tokenize([input[i] + input[i + 1]])[0][0]
            i += 1
        else:
            token = tokenize([input[i]])[0][0]
        tokens.append(token)
    return tokens

Example #12

0

Show file

File: response_parser.py Project: MaliciousFiles/Wizard-Chatbot

    def get_response(self, user_input):
        """Given `user_input`, this method tokenizes it, and
        returns a response that is most likely to respond to the
        user in an appropriate way.
        """
        tokens = tokenize(user_input)
        latest_response = LatestResponse(None, 0)

        if not self.data:
            raise ValueError("Data must have at least one item")

        response = self._run_thread(tokens)
        if response:
            return response

        # Normal handling
        for response in self.data:
            response_weight = self._get_weight(response, tokens)

            # Check whether the response should become the most weighted or not, if they are tied it's a random chance
            if response_weight == latest_response.weight:
                if random.random() > 0.5:
                    latest_response = LatestResponse(response, response_weight)
            elif response_weight > latest_response.weight:
                latest_response = LatestResponse(response, response_weight)

        # Set the response to a generic one if nothing matched, generic meaning something like "I don't understand"
        self.last_response = latest_response
        if latest_response.weight == 0:
            return random.choice(self.generics)

        return random.choice(latest_response.response["value"])

Example #13

0

Show file

    def _process_file(fin, fout):
        print(f"Expanding {fin} to {fout}...")

        with open(fin, "r") as src:
            code = src.read()
        with open(fout, "w") as dest:
            dest.write(code)

        clear(fout)

        if CLEAN:
            return

        _env = copy.deepcopy(ENV)
        _env["XGLSL"] = LANG_CURRENT == "glsl"
        _env["XHLSL"] = LANG_CURRENT in ["hlsl9", "hlsl11"]
        _env["XHLSL9"] = LANG_CURRENT == "hlsl9"
        _env["XHLSL11"] = LANG_CURRENT == "hlsl11"

        tokens = tokenize(fout)
        tree = make_tree(tokens)
        processed = process_tree(tree, _env, XPATH, XPATH_DEFAULT,
                                 CLEAR_PRAGMA_INCLUDES)
        with open(fout, "w") as f:
            processed = handle_compatibility(processed, LANG_CURRENT)
            processed = re.sub(r"\n{3,}", "\n\n", processed)
            if MINIFY:
                processed = minify(processed)
            f.write(processed)

        print("-" * 80)

Example #14

0

Show file

File: tokenizer_test.py Project: thelennylord/yamu

def test_indentation_children():
    commands = [
        "execute as @a run", "    say foo", "    execute as @p run",
        "        say bar"
    ]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    children = tokens[0].get_children()
    assert isinstance(children[0], LiteralToken)
    assert isinstance(children[1], IndentationToken)

Example #15

0

Show file

File: char_mode.py Project: socketteer/transformer-tests

def char_tokenization(input):
    tokens = []
    for char in input:
        if char == ' ':
            # replaces spaces with underscores
            token = 62
        else:
            token = tokenize([char])[0][0]
        tokens.append(token)
    return tokens

Example #16

0

Show file

File: dataimporter.py Project: nuthasid/jpa_mount

    def _pre_process(doc_dict):
        """
        Pre-process a document data; 1) generate id, 2) tokenize the document.
        :param doc_dict: A document stored in a dictionary object with the following keys; 'title', 'desc', 'tag'.
        :return: A list of tokenized keys; job_id, title, desc, tag, title_seg, desc_seg, False.
        """
        from src import tokenizer
        import tltk
        import hashlib
        import time

        def tltk_tokenize(text):
            ret = tltk.segment(text).replace('<u/>', '').replace('<s/>', '').split('|')
            return ret

        cleaner = tokenizer.cleaner_generator('../Resource/charset')
        title = doc_dict['title']
        desc = doc_dict['desc']
        title_seg = tokenizer.tokenize(title, cleaner, tltk_tokenize, 5)
        desc_seg = tokenizer.tokenize(desc, cleaner, tltk_tokenize, 5)
        tag = doc_dict['tag']
        in_str = str(time.time()) + title + desc
        job_id = hashlib.md5(bytes(in_str, 'utf-8')).hexdigest()
        return [job_id, title, desc, tag, title_seg, desc_seg, False]

Example #17

0

Show file

File: main_count_df.py Project: nuthasid/jpa_mount

def wrapper_tokenize(text):
    return tt.tokenize(text, tltk_tokenize, ngram, './Dict/charset', cleaner)

Example #18

0

Show file

File: tokenizer_test.py Project: thelennylord/yamu

def test_label_assignment():
    commands = ["foo:", "execute run", "    say bar"]
    tokenize([(i + 1, command) for i, command in enumerate(commands)])

Example #19

0

Show file

File: test_tokenizer.py Project: fallgesetz/spantex

 def test_new_lines(self):
     text = "\n\n"
     tokens = tokenize(text)
     assert len(tokens) == 1
     assert isinstance(tokens[0], Spacing)

Example #20

0

Show file

File: test_tokenizer.py Project: fallgesetz/spantex

 def test_just_text(self):
     text = 'how are you'
     tokens = tokenize(text)
     for x in tokens:
         print x

Example #21

0

Show file

 def test_just_text(self):
     text = 'how are you'
     tokens = tokenize(text)
     for x in tokens:
         print x

Example #22

0

Show file

File: test_tokenizer.py Project: fallgesetz/spantex

 def test_display_math(self):
     text = '$$\sigma$$'
     tokens = tokenize(text)
     print tokens
     self._print_tokens(tokens)

Example #23

0

Show file

File: tokenizer_test.py Project: thelennylord/yamu

def test_non_inline_redo():
    commands = ["execute as @a run", "    say abc"]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    assert tokens[0].get_redo_condition() == ""

Example #24

0

Show file

def parse(program):
    "Read a Scheme expression from a string."
    return create_ast(tokenize(program))

Example #25

0

Show file

File: tokenizer_test.py Project: thelennylord/yamu

def test_indentation_command_token():
    commands = ["execute as @a run", "    say abc"]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    assert isinstance(tokens[0], IndentationToken)

Example #26

0

Show file

File: tokenizer_test.py Project: thelennylord/yamu

def test_indentation_exit():
    commands = ["execute as @a run", "    say foo", "say bar"]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    assert isinstance(tokens[0], IndentationToken)
    assert isinstance(tokens[1], LiteralToken)

Example #27

0

Show file

File: tokenizer_test.py Project: thelennylord/yamu

def test_token_value():
    commands = ["say foo", "execute as @a run", "    say bar"]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    assert tokens[0].get_command() == "say foo"
    assert tokens[1].get_command() == "execute as @a run"

Example #28

0

Show file

File: tokenizer_test.py Project: thelennylord/yamu

def test_indentation_children_length():
    commands = ["execute as @a run", "    say foo", "    say bar"]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    assert len(tokens[0].get_children()) == 2

Example #29

0

Show file

File: tokenizer_test.py Project: thelennylord/yamu

def test_should_not_redo():
    commands = ["execute as @a run", "    say abc"]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    assert tokens[0].should_redo() == False

Example #30

0

Show file

File: tokenizer_test.py Project: thelennylord/yamu

def test_ignore_empty_lines():
    commands = ["say foo", "", "say bar", ""]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    assert len(tokens) == 2
    assert tokens[0].get_command() == "say foo"
    assert tokens[1].get_command() == "say bar"

Example #31

0

Show file

File: test_tokenizer.py Project: fallgesetz/spantex

 def test_some_latex(self):
     text = '$\epsilon$ is awesome!'
     tokens = tokenize(text)
     assert tokens[0].content == '$\epsilon$'

Example #32

0

Show file

File: tokenizer_test.py Project: thelennylord/yamu

def test_simple_command_token():
    commands = ["say abc"]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    assert isinstance(tokens[0], LiteralToken)

Example #33

0

Show file

File: test_tokenizer.py Project: fallgesetz/spantex

 def test_escapes(self):
     text = '$\$$'
     tokens = tokenize(text)
     print tokens
     assert tokens[0].content == '$\$$'

Example #34

0

Show file

File: tokenizer_test.py Project: thelennylord/yamu

def test_output_length():
    commands = ["say foo", "say bar", "say far", "say boo"]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    assert len(tokens) == 4

Example #35

0

Show file

File: tokenizer_test.py Project: thelennylord/yamu

def test_indentation():
    commands = ["execute as @a run", "    say abc"]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    assert tokens[0].get_children()[0].get_command() == "say abc"