Python Token Examples, lexer.token.Token Python Examples

Example #1

0

Show file

 def test_build_empty_operator(self):
     built_phrase = phrase_builder(self.expression_context, PhraseClass.operator, [
         Token(TokenClass.word, "delay")
     ], 0)
     expected_expr = Phrase(PhraseClass.operator, phrase_subclass=None,
                            keyword=Token(TokenClass.word, "delay"), params=[])
     self.assertTrue(are_phrases_equal(built_phrase, expected_expr))

Example #2

0

Show file

 def test_build_parametrised_label(self):
     built_phrase = phrase_builder(self.expression_context, PhraseClass.label,
                                   [Token(TokenClass.word, "label"),
                                    Token(TokenClass.parameter, "@")], 0)
     expected_expr = Phrase(PhraseClass.label, phrase_subclass=None,
                            keyword=Token(TokenClass.word, "label"), params=[Token(TokenClass.parameter, "@")])
     self.assertTrue(are_phrases_equal(built_phrase, expected_expr))

Example #3

0

Show file

def tokenize_line(code_line, line_number):
    line_tokens = []
    line = peekable(code_line)
    ch = next(line)

    while ch != "\n":

        # Case 0: whitespace
        if ch == " ":
            pass

        #  Case 1: doublet symbols
        elif ch in doublet_pieces and ch + line.peek('') in Symbols.DOUBLETS:
            line_tokens.append(Token(ch + next(line), Symbols.NAME))

        #  Case 2: singlet symbols
        elif ch in Symbols.SINGLETS:
            line_tokens.append(Token(ch, Symbols.NAME))

        #   Case 3: identifier or keyword
        elif ch.isalpha():
            token_chars = [ch]
            while ''.join(line.peek('')).isalnum():
                token_chars.append(next(line))
            token = ''.join(token_chars)
            line_tokens.append(
                Token(
                    token, Keywords.NAME
                    if token in Keywords.VALUES else Identifiers.NAME))

        #   Case 4: number
        elif ch.isdigit():
            token_chars = [ch]
            while ''.join(line.peek('')).isdigit():
                token_chars.append(next(line))
            else:
                if line.peek('') == '.':
                    token_chars.append(next(line))
                    while ''.join(line.peek('')).isdigit():
                        token_chars.append(next(line))
            #  A letter immediately after a number is a syntactic error

            if ''.join(line.peek('')).isalpha():
                raise SyntaxError(
                    "Malformed number at line {}".format(line_number))

            line_tokens.append(Token(''.join(token_chars), Numbers.NAME))

        else:
            raise SyntaxError("Unexpected token {} at line {}".format(
                ch, line_number))

        ch = next(line)

    return line_tokens

Example #4

0

Show file

File: lexer.py Project: dmdt/peace-core

def process_line(string: str) -> List[Token]:
    """
    Split input string in tokens.

    :param string: string to process
    :return: list contains recognized tokens
    """
    tokens: List[Token] = []
    index = 0
    i = 0
    active_machines = False
    machine_found = False

    while i < len(string):
        char = string[i]
        # Process symbol by each machine
        for machine in machines:
            machine.process_object(char)
            if machine.state != State.undefined:
                active_machines = True

        if not active_machines:
            # If all machines reach undefined state and sequence length is not zero
            if i - index > 0:
                for machine in machines:
                    # Find machine with not undefined state
                    if machine.prevState != State.undefined and machine.prevState != State.begin and not machine_found:
                        token = Token(machine.name, string[index:i])
                        tokens.append(token)
                        machine_found = True
                    machine.reset_state()
                index = i
                # Roll back for 1 symbol, that led to undefined state (and is an part of next token)
                i -= 1
                machine_found = False
            # If all machines reach undefined state and current symbol was not recognized
            else:
                # Classify symbol as undefined
                tokens.append(Token(TokenClass.undefined, string[i]))
                index = i
        # Reset active machines flag
        active_machines = False
        i += 1

    # Recognize final token
    for machine in machines:
        if machine.state != State.undefined and machine.state != State.begin and not machine_found:
            token = Token(machine.name, string[index:i])
            tokens.append(token)
            machine_found = True
        machine.reset_state()

    return tokens

Example #5

0

Show file

    def test_types(self):

        tokens = []
        line = "Integer Double Boolean String Void"
        put_line_in_lexer_text_source(self.lexer, line)

        while not self.lexer.is_eot_token():
            token = self.lexer.get_token()
            tokens.append(token)

        token = self.lexer.get_token()
        tokens.append(token)

        token = self.lexer.get_token()
        tokens.append(token)

        expected = [
            Token(TokenType.K_INTEGER),
            Token(TokenType.K_DOUBLE),
            Token(TokenType.K_BOOLEAN),
            Token(TokenType.K_STRING),
            Token(TokenType.K_VOID),
            Token(TokenType.EOT),
            Token(TokenType.EOT),
            Token(TokenType.EOT)
        ]
        self.assertEqual(expected, tokens)

Example #6

0

Show file

 def test_build_operator_with_parameters(self):
     built_phrase = phrase_builder(self.expression_context, PhraseClass.operator, [
         Token(TokenClass.word, "delay"),
         Token(TokenClass.num, "1"),
         Token(TokenClass.word, "two"),
         Token(TokenClass.string, "\"3\""),
         Token(TokenClass.parameter, "@4")
     ], 0)
     expected_expr = Phrase(PhraseClass.operator, phrase_subclass=None,
                            keyword=Token(TokenClass.word, "delay"),
                            params=[Token(TokenClass.num, "1"),
                                    Token(TokenClass.word, "two"),
                                    Token(TokenClass.string, "\"3\""),
                                    Token(TokenClass.parameter, "@4")])
     self.assertTrue(are_phrases_equal(built_phrase, expected_expr))

Example #7

0

Show file

    def test_logic_operators(self):

        tokens = []
        line = "| & ! "
        put_line_in_lexer_text_source(self.lexer, line)

        while not self.lexer.is_eot_token():
            token = self.lexer.get_token()
            tokens.append(token)

        expected = [
            Token(TokenType.VERTICAL_LINE),
            Token(TokenType.AMPERSAND),
            Token(TokenType.EXCLAMATION),
            Token(TokenType.EOT)
        ]
        self.assertEqual(expected, tokens)

Example #8

0

Show file

    def test_math_operators(self):

        tokens = []
        line = "+ - * / "
        put_line_in_lexer_text_source(self.lexer, line)

        while not self.lexer.is_eot_token():
            token = self.lexer.get_token()
            tokens.append(token)

        expected = [
            Token(TokenType.PLUS_OR_CONC),
            Token(TokenType.MINUS),
            Token(TokenType.MUL_OR_REFER),
            Token(TokenType.DIV),
            Token(TokenType.EOT)
        ]
        self.assertEqual(expected, tokens)

Example #9

0

Show file

 def offset(self, a):
     type_ = a.type_
     self.match('[')
     i = self.bool_()
     self.match(']')
     type_ = type_.of
     w = Constant(i=type_.width)
     t1 = Arith(Token('*'), i, w)
     loc = t1
     while self.look.tag == '[':
         self.match('[')
         i = self.bool_()
         self.match(']')
         type_ = type_.of
         w = Constant(i=type_.width)
         t1 = Arith(Token('*'), i, w)
         t2 = Arith(Token('+'), loc, t1)
         loc = t2
     return Access(a, loc, type_)

Example #10

0

Show file

    def test_comment_eot_handling(self):

        tokens = []
        line = "//to jest komentarz "
        put_line_in_lexer_text_source(self.lexer, line)

        while not self.lexer.is_eot_token():
            token = self.lexer.get_token()
            tokens.append(token)

        expected = [Token(TokenType.EOT)]
        self.assertEqual(expected, tokens)

Example #11

0

Show file

 def test_build_comment(self):
     built_phrase = phrase_builder(self.body_context, PhraseClass.comment,
                                   [Token(TokenClass.word, "w1"),
                                    Token(TokenClass.word, "w2"),
                                    Token(TokenClass.word, "w3")], 0)
     expected_expr = Phrase(PhraseClass.comment, phrase_subclass=None,
                            params=[Token(TokenClass.word, "w1"),
                                    Token(TokenClass.word, "w2"),
                                    Token(TokenClass.word, "w3")])
     self.assertTrue(are_phrases_equal(built_phrase, expected_expr))

Example #12

0

Show file

File: test_tokenizer.py Project: aeciorc/calango

    def test_valid_input_single_line(self):
        code_mock = "echo([1 .. 4])\n"

        expected_result = [[
            Token('echo', Identifiers.NAME),
            Token('(', Symbols.NAME),
            Token('[', Symbols.NAME),
            Token('1', Numbers.NAME),
            Token('..', Symbols.NAME),
            Token('4', Numbers.NAME),
            Token(']', Symbols.NAME),
            Token(')', Symbols.NAME)
        ]]

        with patch('lexer.tokenizer.open',
                   new=mock_open(read_data=code_mock)) as _file:
            result = tokenize('path')
            self.assertListEqual(expected_result, result)

Example #13

0

Show file

    def test_zero_values(self):

        tokens = []
        line = "0 0.0 0.000001"
        put_line_in_lexer_text_source(self.lexer, line)

        while not self.lexer.is_eot_token():
            token = self.lexer.get_token()
            tokens.append(token)

        expected = [
            TokenWithValue(TokenType.VALUE_INT, 0),
            TokenWithDoubleValue(TokenType.VALUE_DOUBLE, 0, None, 0, 1),
            TokenWithDoubleValue(TokenType.VALUE_DOUBLE, 0, None, 1, 6),
            Token(TokenType.EOT)
        ]
        self.assertEqual(expected, tokens)

Example #14

0

Show file

    def test_values(self):

        tokens = []
        line = "\"string\" 5 2.5 "
        put_line_in_lexer_text_source(self.lexer, line)

        while not self.lexer.is_eot_token():
            token = self.lexer.get_token()
            tokens.append(token)

        expected = [
            TokenWithValue(TokenType.VALUE_STRING, 'string'),
            TokenWithValue(TokenType.VALUE_INT, 5),
            TokenWithDoubleValue(TokenType.VALUE_DOUBLE, 2, None, 5, 1),
            Token(TokenType.EOT)
        ]
        self.assertEqual(expected, tokens)

Example #15

0

Show file

    def test_ident(self):

        tokens = []
        line = "var_name x y z "
        put_line_in_lexer_text_source(self.lexer, line)

        while not self.lexer.is_eot_token():
            token = self.lexer.get_token()
            tokens.append(token)

        expected = [
            TokenWithValue(TokenType.VALUE_ID, "var_name"),
            TokenWithValue(TokenType.VALUE_ID, "x"),
            TokenWithValue(TokenType.VALUE_ID, "y"),
            TokenWithValue(TokenType.VALUE_ID, "z"),
            Token(TokenType.EOT)
        ]
        self.assertEqual(expected, tokens)

Example #16

0

Show file

    def test_other_tokens(self):

        tokens = []
        line = "if else true false return while = "
        put_line_in_lexer_text_source(self.lexer, line)

        while not self.lexer.is_eot_token():
            token = self.lexer.get_token()
            tokens.append(token)

        expected = [
            Token(TokenType.K_IF),
            Token(TokenType.K_ELSE),
            Token(TokenType.K_TRUE),
            Token(TokenType.K_FALSE),
            Token(TokenType.K_RETURN),
            Token(TokenType.K_WHILE),
            Token(TokenType.ASSIGN_OP),
            Token(TokenType.EOT)
        ]
        self.assertEqual(expected, tokens)

Example #17

0

Show file

    def test_punctuation(self):

        tokens = []
        line = ", . ; { } ( )  "
        put_line_in_lexer_text_source(self.lexer, line)

        while not self.lexer.is_eot_token():
            token = self.lexer.get_token()
            tokens.append(token)

        expected = [
            Token(TokenType.COMMA),
            Token(TokenType.DOT),
            Token(TokenType.SEMICOLON),
            Token(TokenType.LEFT_BRACKET),
            Token(TokenType.RIGHT_BRACKET),
            Token(TokenType.LEFT_PARENT),
            Token(TokenType.RIGHT_PARENT),
            Token(TokenType.EOT)
        ]
        self.assertEqual(expected, tokens)

Example #18

0

Show file

    def _compose_expression(self, phrase: Phrase, signature: Signature):
        if signature.contains_param or signature.output == "":
            # Create new parameters list to replace parametrised arguments to actual params
            params = list()
            for param in phrase.params:
                # Check is parameter has parameter class
                if param.token_class == TokenClass.parameter:
                    # When parameter value "@" - should be inserted expression occurrence number
                    if param.value == "@":
                        params.append(
                            Token(TokenClass.num, str(self.expr_uses)))
                        break
                    # Otherwise insert parameter from parameters list
                    param_num = int(param.value[1:]) - 1
                    params.append(self.param_list[param_num])
                else:
                    # Else insert a parameter from operator
                    params.append(param)

            self.expr_gen(phrase.keyword.value, params)

        self.line = signature.output

Example #19

0

Show file

    def test_equality_operators(self):

        tokens = []
        line = "<= < >= > == !="
        put_line_in_lexer_text_source(self.lexer, line)

        while not self.lexer.is_eot_token():
            token = self.lexer.get_token()
            tokens.append(token)

        expected = [
            Token(TokenType.LESS_EQUAL),
            Token(TokenType.LESS),
            Token(TokenType.GREATER_EQUAL),
            Token(TokenType.GREATER),
            Token(TokenType.EQUAL),
            Token(TokenType.NOT_EQUAL),
            Token(TokenType.EOT)
        ]
        self.assertEqual(expected, tokens)

Example #20

0

Show file

File: syntaxer.py Project: dmdt/peace-core

def process_tokens(tree: ParseTree, table: SymbolTable, lang_dict: LangDict,
                   tokens: List[Token]):
    active_machines: bool = False
    machine_found: bool = False
    token_index: int = 0
    phrase_start_line: int = 1
    temp_phrase: List[Token] = []
    sem_analyzer = SemanticAnalyzer(tree, table, lang_dict)

    while token_index < len(tokens):
        token: Token = tokens[token_index]

        # New line check
        if token.token_class == TokenClass.newline:
            sem_analyzer.add_line()

        # Process token with parser machines
        for machine in machines:
            machine.process_object(token)
            if machine.state != State.undefined:
                active_machines = True

        # If all machines reach undefined state
        if not active_machines:
            # Trying to find machine that recognized phrase
            for machine in machines:
                if not machine_found and machine.is_sequence_recognized():
                    recognized_phrase = phrase_builder(tree.get_context(),
                                                       machine.name,
                                                       temp_phrase,
                                                       phrase_start_line)
                    sem_analyzer.process_phrase(recognized_phrase,
                                                phrase_start_line)
                    machine_found = True
                    temp_phrase.clear()

            # Token wasn't recognized by any machine
            if not machine_found:
                for machine in machines:
                    if machine.prevState != State.undefined:
                        raise InterpretationError(
                            PeaceError(
                                f"Unexpected token {repr(token.value)}, expected {machine.name.name}.",
                                ErrorType.syntax_error,
                                sem_analyzer.get_line(), token.value))

            # Reset machine states
            for machine in machines:
                machine.reset_state()

            # Get new phrase start line
            phrase_start_line = sem_analyzer.get_line()

            # If current token newline - decrease line counter
            if token.token_class == TokenClass.newline:
                sem_analyzer.remove_line()

            # Roll back for 1 token, that led to undefined state (and is an part of next phrase)
            token_index = token_index - 1
            machine_found = False
        else:
            # If token belong to some phrase add it to temp phrase
            if (token.token_class != TokenClass.space
                    and token.token_class != TokenClass.newline
                    and token.token_class != TokenClass.undefined
                    and token.token_class != TokenClass.sign):
                temp_phrase.append(token)

        token_index += 1
        active_machines = False

    # Recognize final phrase
    for machine in machines:
        machine.process_object(Token(TokenClass.undefined, ""))
        if not machine_found and machine.is_sequence_recognized():
            recognized_phrase = phrase_builder(tree.get_context(),
                                               machine.name, temp_phrase,
                                               phrase_start_line)
            sem_analyzer.process_phrase(recognized_phrase, phrase_start_line)
            machine_found = True

    if not sem_analyzer.composer.is_tree_valid():
        raise InterpretationError(
            PeaceError("Missing '}}'.", ErrorType.syntax_error,
                       phrase_start_line))

    return

Example #21

0

Show file

 def test_build_device_in_body(self):
     built_phrase = phrase_builder(self.body_context, PhraseClass.block,
                                   [Token(TokenClass.word, "device_in_body")], 0)
     expected_expr = Phrase(PhraseClass.block, PhraseSubclass.device,
                            keyword=Token(TokenClass.word, "device_in_body"))
     self.assertTrue(are_phrases_equal(built_phrase, expected_expr))

Example #22

0

Show file

 def test_build_expression(self):
     built_phrase = phrase_builder(self.program_context, PhraseClass.block,
                                   [Token(TokenClass.word, "expression")], 0)
     expected_expr = Phrase(PhraseClass.block, PhraseSubclass.expression,
                            keyword=Token(TokenClass.word, "expression"))
     self.assertTrue(are_phrases_equal(built_phrase, expected_expr))

Example #23

0

Show file

File: lexer.py Project: felipechiarotti/development

 def next_token(self):
      token = Token()
      peek = self.buffer[self.active_buffer].next_buffer_char()
      while(peek == ' ' or peek == '\n'):
         peek = self.buffer[self.active_buffer].next_buffer_char()