Esempi in Python per tokenize, esempi in Python per src.tokenizer.tokenize

Esempio n. 1

0

Mostra file

File: tokenizer_test.py Progetto: thelennylord/yamu

def test_label_name_containing_whitespaces():
    commands = ["foo bar:", "execute run", "    say baz"]
    try:
        tokenize([(i + 1, command) for i, command in enumerate(commands)])
    except ValueError as error:
        assert error.args[0].startswith("Unknown or incomplete command")
    else:
        assert False

Esempio n. 2

0

Mostra file

File: tokenizer_test.py Progetto: thelennylord/yamu

def test_unknown_label():
    commands = ["execute run", "    redo foo"]
    try:
        tokenize([(i + 1, command) for i, command in enumerate(commands)])
    except NameError as error:
        assert error.args[0].startswith("Unknown label")
    else:
        assert False

Esempio n. 3

0

Mostra file

File: tokenizer_test.py Progetto: thelennylord/yamu

def test_label_name_beginning_with_number():
    commands = ["1foo:", "execute run", "    say bar"]
    try:
        tokenize([(i + 1, command) for i, command in enumerate(commands)])
    except SyntaxError as error:
        assert error.args[0].startswith("Label names must satisfy")
    else:
        assert False

Esempio n. 4

0

Mostra file

File: tokenizer_test.py Progetto: thelennylord/yamu

def test_invalid_indentation():
    commands = ["say foo", "    say bar"]
    try:
        tokenize([(i + 1, command) for i, command in enumerate(commands)])
    except IndentationError:
        assert True
    else:
        assert False

Esempio n. 5

0

Mostra file

File: tokenizer_test.py Progetto: thelennylord/yamu

def test_invalid_command():
    commands = ["foo bar"]
    try:
        tokenize([(i + 1, command) for i, command in enumerate(commands)])
    except ValueError as error:
        assert error.args[0].startswith("Unknown or incomplete command")
    else:
        assert False

Esempio n. 6

0

Mostra file

File: tokenizer_test.py Progetto: thelennylord/yamu

def test_excess_indentation():
    commands = ["execute as @a run", "        say foo"]
    try:
        tokenize([(i + 1, command) for i, command in enumerate(commands)])
    except IndentationError:
        assert True
    else:
        assert False

Esempio n. 7

0

Mostra file

File: tokenizer_test.py Progetto: thelennylord/yamu

def test_invalid_indentation_space():
    commands = ["execute as @a run", " say abc"]
    try:
        tokenize([(i + 1, command) for i, command in enumerate(commands)])
    except IndentationError:
        assert True
    else:
        assert False

Esempio n. 8

0

Mostra file

File: tokenizer_test.py Progetto: thelennylord/yamu

def test_non_indented_redo():
    commands = ["say abc", "redo"]
    try:
        tokenize([(i + 1, command) for i, command in enumerate(commands)])
    except ValueError as error:
        assert error.args[0].startswith(
            "Command 'redo' must be indented in an execute command")
    else:
        assert False

Esempio n. 9

0

Mostra file

File: tokenizer_test.py Progetto: thelennylord/yamu

def test_redo_having_more_arguments():
    commands = ["foo:", "execute run", "    redo foo bar"]
    try:
        tokenize([(i + 1, command) for i, command in enumerate(commands)])
    except SyntaxError as error:
        assert error.args[0].startswith(
            "Command 'redo' has more than 1 argument at line ")
    else:
        assert False

Esempio n. 10

0

Mostra file

def test_tokinzer():
    pred = tokenize("これはテストのテキストです。ファミリーマート")
    assert ["これ", "は", "テスト", "の", "テキスト", "です", "。", "ファミリーマート"] == pred

    pred = tokenize("「これは、テストのテキストです。ファミリーマート」")
    assert ["これ", "は", "テスト", "の", "テキスト", "です", "。", "ファミリーマート"] == pred

    pred = tokenize("これは、テストのテキストです。ファミリーマート🍎")
    assert ["これ", "は", "テスト", "の", "テキスト", "です", "。", "ファミリーマート"] == pred

Esempio n. 11

0

Mostra file

File: char_mode.py Progetto: socketteer/transformer-tests

def char_tokenization_2(input):
    tokens = []
    for i in range(len(input)):
        if input[i] == ' ':
            # uses token with prepended space
            token = tokenize([input[i] + input[i + 1]])[0][0]
            i += 1
        else:
            token = tokenize([input[i]])[0][0]
        tokens.append(token)
    return tokens

Esempio n. 12

0

Mostra file

File: response_parser.py Progetto: MaliciousFiles/Wizard-Chatbot

    def get_response(self, user_input):
        """Given `user_input`, this method tokenizes it, and
        returns a response that is most likely to respond to the
        user in an appropriate way.
        """
        tokens = tokenize(user_input)
        latest_response = LatestResponse(None, 0)

        if not self.data:
            raise ValueError("Data must have at least one item")

        response = self._run_thread(tokens)
        if response:
            return response

        # Normal handling
        for response in self.data:
            response_weight = self._get_weight(response, tokens)

            # Check whether the response should become the most weighted or not, if they are tied it's a random chance
            if response_weight == latest_response.weight:
                if random.random() > 0.5:
                    latest_response = LatestResponse(response, response_weight)
            elif response_weight > latest_response.weight:
                latest_response = LatestResponse(response, response_weight)

        # Set the response to a generic one if nothing matched, generic meaning something like "I don't understand"
        self.last_response = latest_response
        if latest_response.weight == 0:
            return random.choice(self.generics)

        return random.choice(latest_response.response["value"])

Esempio n. 13

0

Mostra file

    def _process_file(fin, fout):
        print(f"Expanding {fin} to {fout}...")

        with open(fin, "r") as src:
            code = src.read()
        with open(fout, "w") as dest:
            dest.write(code)

        clear(fout)

        if CLEAN:
            return

        _env = copy.deepcopy(ENV)
        _env["XGLSL"] = LANG_CURRENT == "glsl"
        _env["XHLSL"] = LANG_CURRENT in ["hlsl9", "hlsl11"]
        _env["XHLSL9"] = LANG_CURRENT == "hlsl9"
        _env["XHLSL11"] = LANG_CURRENT == "hlsl11"

        tokens = tokenize(fout)
        tree = make_tree(tokens)
        processed = process_tree(tree, _env, XPATH, XPATH_DEFAULT,
                                 CLEAR_PRAGMA_INCLUDES)
        with open(fout, "w") as f:
            processed = handle_compatibility(processed, LANG_CURRENT)
            processed = re.sub(r"\n{3,}", "\n\n", processed)
            if MINIFY:
                processed = minify(processed)
            f.write(processed)

        print("-" * 80)

Esempio n. 14

0

Mostra file

File: tokenizer_test.py Progetto: thelennylord/yamu

def test_indentation_children():
    commands = [
        "execute as @a run", "    say foo", "    execute as @p run",
        "        say bar"
    ]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    children = tokens[0].get_children()
    assert isinstance(children[0], LiteralToken)
    assert isinstance(children[1], IndentationToken)

Esempio n. 15

0

Mostra file

File: char_mode.py Progetto: socketteer/transformer-tests

def char_tokenization(input):
    tokens = []
    for char in input:
        if char == ' ':
            # replaces spaces with underscores
            token = 62
        else:
            token = tokenize([char])[0][0]
        tokens.append(token)
    return tokens

Esempio n. 16

0

Mostra file

File: dataimporter.py Progetto: nuthasid/jpa_mount

    def _pre_process(doc_dict):
        """
        Pre-process a document data; 1) generate id, 2) tokenize the document.
        :param doc_dict: A document stored in a dictionary object with the following keys; 'title', 'desc', 'tag'.
        :return: A list of tokenized keys; job_id, title, desc, tag, title_seg, desc_seg, False.
        """
        from src import tokenizer
        import tltk
        import hashlib
        import time

        def tltk_tokenize(text):
            ret = tltk.segment(text).replace('<u/>', '').replace('<s/>', '').split('|')
            return ret

        cleaner = tokenizer.cleaner_generator('../Resource/charset')
        title = doc_dict['title']
        desc = doc_dict['desc']
        title_seg = tokenizer.tokenize(title, cleaner, tltk_tokenize, 5)
        desc_seg = tokenizer.tokenize(desc, cleaner, tltk_tokenize, 5)
        tag = doc_dict['tag']
        in_str = str(time.time()) + title + desc
        job_id = hashlib.md5(bytes(in_str, 'utf-8')).hexdigest()
        return [job_id, title, desc, tag, title_seg, desc_seg, False]

Esempio n. 17

0

Mostra file

File: main_count_df.py Progetto: nuthasid/jpa_mount

def wrapper_tokenize(text):
    return tt.tokenize(text, tltk_tokenize, ngram, './Dict/charset', cleaner)

Esempio n. 18

0

Mostra file

File: tokenizer_test.py Progetto: thelennylord/yamu

def test_label_assignment():
    commands = ["foo:", "execute run", "    say bar"]
    tokenize([(i + 1, command) for i, command in enumerate(commands)])

Esempio n. 19

0

Mostra file

File: test_tokenizer.py Progetto: fallgesetz/spantex

 def test_new_lines(self):
     text = "\n\n"
     tokens = tokenize(text)
     assert len(tokens) == 1
     assert isinstance(tokens[0], Spacing)

Esempio n. 20

0

Mostra file

File: test_tokenizer.py Progetto: fallgesetz/spantex

 def test_just_text(self):
     text = 'how are you'
     tokens = tokenize(text)
     for x in tokens:
         print x

Esempio n. 21

0

Mostra file

 def test_just_text(self):
     text = 'how are you'
     tokens = tokenize(text)
     for x in tokens:
         print x

Esempio n. 22

0

Mostra file

File: test_tokenizer.py Progetto: fallgesetz/spantex

 def test_display_math(self):
     text = '$$\sigma$$'
     tokens = tokenize(text)
     print tokens
     self._print_tokens(tokens)

Esempio n. 23

0

Mostra file

File: tokenizer_test.py Progetto: thelennylord/yamu

def test_non_inline_redo():
    commands = ["execute as @a run", "    say abc"]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    assert tokens[0].get_redo_condition() == ""

Esempio n. 24

0

Mostra file

def parse(program):
    "Read a Scheme expression from a string."
    return create_ast(tokenize(program))

Esempio n. 25

0

Mostra file

File: tokenizer_test.py Progetto: thelennylord/yamu

def test_indentation_command_token():
    commands = ["execute as @a run", "    say abc"]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    assert isinstance(tokens[0], IndentationToken)

Esempio n. 26

0

Mostra file

File: tokenizer_test.py Progetto: thelennylord/yamu

def test_indentation_exit():
    commands = ["execute as @a run", "    say foo", "say bar"]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    assert isinstance(tokens[0], IndentationToken)
    assert isinstance(tokens[1], LiteralToken)

Esempio n. 27

0

Mostra file

File: tokenizer_test.py Progetto: thelennylord/yamu

def test_token_value():
    commands = ["say foo", "execute as @a run", "    say bar"]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    assert tokens[0].get_command() == "say foo"
    assert tokens[1].get_command() == "execute as @a run"

Esempio n. 28

0

Mostra file

File: tokenizer_test.py Progetto: thelennylord/yamu

def test_indentation_children_length():
    commands = ["execute as @a run", "    say foo", "    say bar"]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    assert len(tokens[0].get_children()) == 2

Esempio n. 29

0

Mostra file

File: tokenizer_test.py Progetto: thelennylord/yamu

def test_should_not_redo():
    commands = ["execute as @a run", "    say abc"]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    assert tokens[0].should_redo() == False

Esempio n. 30

0

Mostra file

File: tokenizer_test.py Progetto: thelennylord/yamu

def test_ignore_empty_lines():
    commands = ["say foo", "", "say bar", ""]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    assert len(tokens) == 2
    assert tokens[0].get_command() == "say foo"
    assert tokens[1].get_command() == "say bar"

Esempio n. 31

0

Mostra file

File: test_tokenizer.py Progetto: fallgesetz/spantex

 def test_some_latex(self):
     text = '$\epsilon$ is awesome!'
     tokens = tokenize(text)
     assert tokens[0].content == '$\epsilon$'

Esempio n. 32

0

Mostra file

File: tokenizer_test.py Progetto: thelennylord/yamu

def test_simple_command_token():
    commands = ["say abc"]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    assert isinstance(tokens[0], LiteralToken)

Esempio n. 33

0

Mostra file

File: test_tokenizer.py Progetto: fallgesetz/spantex

 def test_escapes(self):
     text = '$\$$'
     tokens = tokenize(text)
     print tokens
     assert tokens[0].content == '$\$$'

Esempio n. 34

0

Mostra file

File: tokenizer_test.py Progetto: thelennylord/yamu

def test_output_length():
    commands = ["say foo", "say bar", "say far", "say boo"]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    assert len(tokens) == 4

Esempio n. 35

0

Mostra file

File: tokenizer_test.py Progetto: thelennylord/yamu

def test_indentation():
    commands = ["execute as @a run", "    say abc"]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    assert tokens[0].get_children()[0].get_command() == "say abc"