Esempio n. 1
0
def test_label_name_containing_whitespaces():
    commands = ["foo bar:", "execute run", "    say baz"]
    try:
        tokenize([(i + 1, command) for i, command in enumerate(commands)])
    except ValueError as error:
        assert error.args[0].startswith("Unknown or incomplete command")
    else:
        assert False
Esempio n. 2
0
def test_unknown_label():
    commands = ["execute run", "    redo foo"]
    try:
        tokenize([(i + 1, command) for i, command in enumerate(commands)])
    except NameError as error:
        assert error.args[0].startswith("Unknown label")
    else:
        assert False
Esempio n. 3
0
def test_label_name_beginning_with_number():
    commands = ["1foo:", "execute run", "    say bar"]
    try:
        tokenize([(i + 1, command) for i, command in enumerate(commands)])
    except SyntaxError as error:
        assert error.args[0].startswith("Label names must satisfy")
    else:
        assert False
Esempio n. 4
0
def test_invalid_indentation():
    commands = ["say foo", "    say bar"]
    try:
        tokenize([(i + 1, command) for i, command in enumerate(commands)])
    except IndentationError:
        assert True
    else:
        assert False
Esempio n. 5
0
def test_invalid_command():
    commands = ["foo bar"]
    try:
        tokenize([(i + 1, command) for i, command in enumerate(commands)])
    except ValueError as error:
        assert error.args[0].startswith("Unknown or incomplete command")
    else:
        assert False
Esempio n. 6
0
def test_excess_indentation():
    commands = ["execute as @a run", "        say foo"]
    try:
        tokenize([(i + 1, command) for i, command in enumerate(commands)])
    except IndentationError:
        assert True
    else:
        assert False
Esempio n. 7
0
def test_invalid_indentation_space():
    commands = ["execute as @a run", " say abc"]
    try:
        tokenize([(i + 1, command) for i, command in enumerate(commands)])
    except IndentationError:
        assert True
    else:
        assert False
Esempio n. 8
0
def test_non_indented_redo():
    commands = ["say abc", "redo"]
    try:
        tokenize([(i + 1, command) for i, command in enumerate(commands)])
    except ValueError as error:
        assert error.args[0].startswith(
            "Command 'redo' must be indented in an execute command")
    else:
        assert False
Esempio n. 9
0
def test_redo_having_more_arguments():
    commands = ["foo:", "execute run", "    redo foo bar"]
    try:
        tokenize([(i + 1, command) for i, command in enumerate(commands)])
    except SyntaxError as error:
        assert error.args[0].startswith(
            "Command 'redo' has more than 1 argument at line ")
    else:
        assert False
Esempio n. 10
0
def test_tokinzer():
    pred = tokenize("これはテストのテキストです。ファミリーマート")
    assert ["これ", "は", "テスト", "の", "テキスト", "です", "。", "ファミリーマート"] == pred

    pred = tokenize("「これは、テストのテキストです。ファミリーマート」")
    assert ["これ", "は", "テスト", "の", "テキスト", "です", "。", "ファミリーマート"] == pred

    pred = tokenize("これは、テストのテキストです。ファミリーマート🍎")
    assert ["これ", "は", "テスト", "の", "テキスト", "です", "。", "ファミリーマート"] == pred
Esempio n. 11
0
def char_tokenization_2(input):
    tokens = []
    for i in range(len(input)):
        if input[i] == ' ':
            # uses token with prepended space
            token = tokenize([input[i] + input[i + 1]])[0][0]
            i += 1
        else:
            token = tokenize([input[i]])[0][0]
        tokens.append(token)
    return tokens
    def get_response(self, user_input):
        """Given `user_input`, this method tokenizes it, and
        returns a response that is most likely to respond to the
        user in an appropriate way.
        """
        tokens = tokenize(user_input)
        latest_response = LatestResponse(None, 0)

        if not self.data:
            raise ValueError("Data must have at least one item")

        response = self._run_thread(tokens)
        if response:
            return response

        # Normal handling
        for response in self.data:
            response_weight = self._get_weight(response, tokens)

            # Check whether the response should become the most weighted or not, if they are tied it's a random chance
            if response_weight == latest_response.weight:
                if random.random() > 0.5:
                    latest_response = LatestResponse(response, response_weight)
            elif response_weight > latest_response.weight:
                latest_response = LatestResponse(response, response_weight)

        # Set the response to a generic one if nothing matched, generic meaning something like "I don't understand"
        self.last_response = latest_response
        if latest_response.weight == 0:
            return random.choice(self.generics)

        return random.choice(latest_response.response["value"])
Esempio n. 13
0
    def _process_file(fin, fout):
        print(f"Expanding {fin} to {fout}...")

        with open(fin, "r") as src:
            code = src.read()
        with open(fout, "w") as dest:
            dest.write(code)

        clear(fout)

        if CLEAN:
            return

        _env = copy.deepcopy(ENV)
        _env["XGLSL"] = LANG_CURRENT == "glsl"
        _env["XHLSL"] = LANG_CURRENT in ["hlsl9", "hlsl11"]
        _env["XHLSL9"] = LANG_CURRENT == "hlsl9"
        _env["XHLSL11"] = LANG_CURRENT == "hlsl11"

        tokens = tokenize(fout)
        tree = make_tree(tokens)
        processed = process_tree(tree, _env, XPATH, XPATH_DEFAULT,
                                 CLEAR_PRAGMA_INCLUDES)
        with open(fout, "w") as f:
            processed = handle_compatibility(processed, LANG_CURRENT)
            processed = re.sub(r"\n{3,}", "\n\n", processed)
            if MINIFY:
                processed = minify(processed)
            f.write(processed)

        print("-" * 80)
Esempio n. 14
0
def test_indentation_children():
    commands = [
        "execute as @a run", "    say foo", "    execute as @p run",
        "        say bar"
    ]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    children = tokens[0].get_children()
    assert isinstance(children[0], LiteralToken)
    assert isinstance(children[1], IndentationToken)
Esempio n. 15
0
def char_tokenization(input):
    tokens = []
    for char in input:
        if char == ' ':
            # replaces spaces with underscores
            token = 62
        else:
            token = tokenize([char])[0][0]
        tokens.append(token)
    return tokens
Esempio n. 16
0
    def _pre_process(doc_dict):
        """
        Pre-process a document data; 1) generate id, 2) tokenize the document.
        :param doc_dict: A document stored in a dictionary object with the following keys; 'title', 'desc', 'tag'.
        :return: A list of tokenized keys; job_id, title, desc, tag, title_seg, desc_seg, False.
        """
        from src import tokenizer
        import tltk
        import hashlib
        import time

        def tltk_tokenize(text):
            ret = tltk.segment(text).replace('<u/>', '').replace('<s/>', '').split('|')
            return ret

        cleaner = tokenizer.cleaner_generator('../Resource/charset')
        title = doc_dict['title']
        desc = doc_dict['desc']
        title_seg = tokenizer.tokenize(title, cleaner, tltk_tokenize, 5)
        desc_seg = tokenizer.tokenize(desc, cleaner, tltk_tokenize, 5)
        tag = doc_dict['tag']
        in_str = str(time.time()) + title + desc
        job_id = hashlib.md5(bytes(in_str, 'utf-8')).hexdigest()
        return [job_id, title, desc, tag, title_seg, desc_seg, False]
Esempio n. 17
0
def wrapper_tokenize(text):
    return tt.tokenize(text, tltk_tokenize, ngram, './Dict/charset', cleaner)
Esempio n. 18
0
def test_label_assignment():
    commands = ["foo:", "execute run", "    say bar"]
    tokenize([(i + 1, command) for i, command in enumerate(commands)])
Esempio n. 19
0
 def test_new_lines(self):
     text = "\n\n"
     tokens = tokenize(text)
     assert len(tokens) == 1
     assert isinstance(tokens[0], Spacing)
Esempio n. 20
0
 def test_just_text(self):
     text = 'how are you'
     tokens = tokenize(text)
     for x in tokens:
         print x
Esempio n. 21
0
 def test_just_text(self):
     text = 'how are you'
     tokens = tokenize(text)
     for x in tokens:
         print x
Esempio n. 22
0
 def test_display_math(self):
     text = '$$\sigma$$'
     tokens = tokenize(text)
     print tokens
     self._print_tokens(tokens)
Esempio n. 23
0
def test_non_inline_redo():
    commands = ["execute as @a run", "    say abc"]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    assert tokens[0].get_redo_condition() == ""
Esempio n. 24
0
def parse(program):
    "Read a Scheme expression from a string."
    return create_ast(tokenize(program))
Esempio n. 25
0
def test_indentation_command_token():
    commands = ["execute as @a run", "    say abc"]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    assert isinstance(tokens[0], IndentationToken)
Esempio n. 26
0
def test_indentation_exit():
    commands = ["execute as @a run", "    say foo", "say bar"]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    assert isinstance(tokens[0], IndentationToken)
    assert isinstance(tokens[1], LiteralToken)
Esempio n. 27
0
def test_token_value():
    commands = ["say foo", "execute as @a run", "    say bar"]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    assert tokens[0].get_command() == "say foo"
    assert tokens[1].get_command() == "execute as @a run"
Esempio n. 28
0
def test_indentation_children_length():
    commands = ["execute as @a run", "    say foo", "    say bar"]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    assert len(tokens[0].get_children()) == 2
Esempio n. 29
0
def test_should_not_redo():
    commands = ["execute as @a run", "    say abc"]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    assert tokens[0].should_redo() == False
Esempio n. 30
0
def test_ignore_empty_lines():
    commands = ["say foo", "", "say bar", ""]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    assert len(tokens) == 2
    assert tokens[0].get_command() == "say foo"
    assert tokens[1].get_command() == "say bar"
Esempio n. 31
0
 def test_some_latex(self):
     text = '$\epsilon$ is awesome!'
     tokens = tokenize(text)
     assert tokens[0].content == '$\epsilon$'
Esempio n. 32
0
def test_simple_command_token():
    commands = ["say abc"]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    assert isinstance(tokens[0], LiteralToken)
Esempio n. 33
0
 def test_escapes(self):
     text = '$\$$'
     tokens = tokenize(text)
     print tokens
     assert tokens[0].content == '$\$$'
Esempio n. 34
0
def test_output_length():
    commands = ["say foo", "say bar", "say far", "say boo"]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    assert len(tokens) == 4
Esempio n. 35
0
def test_indentation():
    commands = ["execute as @a run", "    say abc"]
    tokens = tokenize([(i + 1, command) for i, command in enumerate(commands)])
    assert tokens[0].get_children()[0].get_command() == "say abc"