def test_build_empty_operator(self): built_phrase = phrase_builder(self.expression_context, PhraseClass.operator, [ Token(TokenClass.word, "delay") ], 0) expected_expr = Phrase(PhraseClass.operator, phrase_subclass=None, keyword=Token(TokenClass.word, "delay"), params=[]) self.assertTrue(are_phrases_equal(built_phrase, expected_expr))
def test_build_parametrised_label(self): built_phrase = phrase_builder(self.expression_context, PhraseClass.label, [Token(TokenClass.word, "label"), Token(TokenClass.parameter, "@")], 0) expected_expr = Phrase(PhraseClass.label, phrase_subclass=None, keyword=Token(TokenClass.word, "label"), params=[Token(TokenClass.parameter, "@")]) self.assertTrue(are_phrases_equal(built_phrase, expected_expr))
def tokenize_line(code_line, line_number): line_tokens = [] line = peekable(code_line) ch = next(line) while ch != "\n": # Case 0: whitespace if ch == " ": pass # Case 1: doublet symbols elif ch in doublet_pieces and ch + line.peek('') in Symbols.DOUBLETS: line_tokens.append(Token(ch + next(line), Symbols.NAME)) # Case 2: singlet symbols elif ch in Symbols.SINGLETS: line_tokens.append(Token(ch, Symbols.NAME)) # Case 3: identifier or keyword elif ch.isalpha(): token_chars = [ch] while ''.join(line.peek('')).isalnum(): token_chars.append(next(line)) token = ''.join(token_chars) line_tokens.append( Token( token, Keywords.NAME if token in Keywords.VALUES else Identifiers.NAME)) # Case 4: number elif ch.isdigit(): token_chars = [ch] while ''.join(line.peek('')).isdigit(): token_chars.append(next(line)) else: if line.peek('') == '.': token_chars.append(next(line)) while ''.join(line.peek('')).isdigit(): token_chars.append(next(line)) # A letter immediately after a number is a syntactic error if ''.join(line.peek('')).isalpha(): raise SyntaxError( "Malformed number at line {}".format(line_number)) line_tokens.append(Token(''.join(token_chars), Numbers.NAME)) else: raise SyntaxError("Unexpected token {} at line {}".format( ch, line_number)) ch = next(line) return line_tokens
def process_line(string: str) -> List[Token]: """ Split input string in tokens. :param string: string to process :return: list contains recognized tokens """ tokens: List[Token] = [] index = 0 i = 0 active_machines = False machine_found = False while i < len(string): char = string[i] # Process symbol by each machine for machine in machines: machine.process_object(char) if machine.state != State.undefined: active_machines = True if not active_machines: # If all machines reach undefined state and sequence length is not zero if i - index > 0: for machine in machines: # Find machine with not undefined state if machine.prevState != State.undefined and machine.prevState != State.begin and not machine_found: token = Token(machine.name, string[index:i]) tokens.append(token) machine_found = True machine.reset_state() index = i # Roll back for 1 symbol, that led to undefined state (and is an part of next token) i -= 1 machine_found = False # If all machines reach undefined state and current symbol was not recognized else: # Classify symbol as undefined tokens.append(Token(TokenClass.undefined, string[i])) index = i # Reset active machines flag active_machines = False i += 1 # Recognize final token for machine in machines: if machine.state != State.undefined and machine.state != State.begin and not machine_found: token = Token(machine.name, string[index:i]) tokens.append(token) machine_found = True machine.reset_state() return tokens
def test_types(self): tokens = [] line = "Integer Double Boolean String Void" put_line_in_lexer_text_source(self.lexer, line) while not self.lexer.is_eot_token(): token = self.lexer.get_token() tokens.append(token) token = self.lexer.get_token() tokens.append(token) token = self.lexer.get_token() tokens.append(token) expected = [ Token(TokenType.K_INTEGER), Token(TokenType.K_DOUBLE), Token(TokenType.K_BOOLEAN), Token(TokenType.K_STRING), Token(TokenType.K_VOID), Token(TokenType.EOT), Token(TokenType.EOT), Token(TokenType.EOT) ] self.assertEqual(expected, tokens)
def test_build_operator_with_parameters(self): built_phrase = phrase_builder(self.expression_context, PhraseClass.operator, [ Token(TokenClass.word, "delay"), Token(TokenClass.num, "1"), Token(TokenClass.word, "two"), Token(TokenClass.string, "\"3\""), Token(TokenClass.parameter, "@4") ], 0) expected_expr = Phrase(PhraseClass.operator, phrase_subclass=None, keyword=Token(TokenClass.word, "delay"), params=[Token(TokenClass.num, "1"), Token(TokenClass.word, "two"), Token(TokenClass.string, "\"3\""), Token(TokenClass.parameter, "@4")]) self.assertTrue(are_phrases_equal(built_phrase, expected_expr))
def test_logic_operators(self): tokens = [] line = "| & ! " put_line_in_lexer_text_source(self.lexer, line) while not self.lexer.is_eot_token(): token = self.lexer.get_token() tokens.append(token) expected = [ Token(TokenType.VERTICAL_LINE), Token(TokenType.AMPERSAND), Token(TokenType.EXCLAMATION), Token(TokenType.EOT) ] self.assertEqual(expected, tokens)
def test_math_operators(self): tokens = [] line = "+ - * / " put_line_in_lexer_text_source(self.lexer, line) while not self.lexer.is_eot_token(): token = self.lexer.get_token() tokens.append(token) expected = [ Token(TokenType.PLUS_OR_CONC), Token(TokenType.MINUS), Token(TokenType.MUL_OR_REFER), Token(TokenType.DIV), Token(TokenType.EOT) ] self.assertEqual(expected, tokens)
def offset(self, a): type_ = a.type_ self.match('[') i = self.bool_() self.match(']') type_ = type_.of w = Constant(i=type_.width) t1 = Arith(Token('*'), i, w) loc = t1 while self.look.tag == '[': self.match('[') i = self.bool_() self.match(']') type_ = type_.of w = Constant(i=type_.width) t1 = Arith(Token('*'), i, w) t2 = Arith(Token('+'), loc, t1) loc = t2 return Access(a, loc, type_)
def test_comment_eot_handling(self): tokens = [] line = "//to jest komentarz " put_line_in_lexer_text_source(self.lexer, line) while not self.lexer.is_eot_token(): token = self.lexer.get_token() tokens.append(token) expected = [Token(TokenType.EOT)] self.assertEqual(expected, tokens)
def test_build_comment(self): built_phrase = phrase_builder(self.body_context, PhraseClass.comment, [Token(TokenClass.word, "w1"), Token(TokenClass.word, "w2"), Token(TokenClass.word, "w3")], 0) expected_expr = Phrase(PhraseClass.comment, phrase_subclass=None, params=[Token(TokenClass.word, "w1"), Token(TokenClass.word, "w2"), Token(TokenClass.word, "w3")]) self.assertTrue(are_phrases_equal(built_phrase, expected_expr))
def test_valid_input_single_line(self): code_mock = "echo([1 .. 4])\n" expected_result = [[ Token('echo', Identifiers.NAME), Token('(', Symbols.NAME), Token('[', Symbols.NAME), Token('1', Numbers.NAME), Token('..', Symbols.NAME), Token('4', Numbers.NAME), Token(']', Symbols.NAME), Token(')', Symbols.NAME) ]] with patch('lexer.tokenizer.open', new=mock_open(read_data=code_mock)) as _file: result = tokenize('path') self.assertListEqual(expected_result, result)
def test_zero_values(self): tokens = [] line = "0 0.0 0.000001" put_line_in_lexer_text_source(self.lexer, line) while not self.lexer.is_eot_token(): token = self.lexer.get_token() tokens.append(token) expected = [ TokenWithValue(TokenType.VALUE_INT, 0), TokenWithDoubleValue(TokenType.VALUE_DOUBLE, 0, None, 0, 1), TokenWithDoubleValue(TokenType.VALUE_DOUBLE, 0, None, 1, 6), Token(TokenType.EOT) ] self.assertEqual(expected, tokens)
def test_values(self): tokens = [] line = "\"string\" 5 2.5 " put_line_in_lexer_text_source(self.lexer, line) while not self.lexer.is_eot_token(): token = self.lexer.get_token() tokens.append(token) expected = [ TokenWithValue(TokenType.VALUE_STRING, 'string'), TokenWithValue(TokenType.VALUE_INT, 5), TokenWithDoubleValue(TokenType.VALUE_DOUBLE, 2, None, 5, 1), Token(TokenType.EOT) ] self.assertEqual(expected, tokens)
def test_ident(self): tokens = [] line = "var_name x y z " put_line_in_lexer_text_source(self.lexer, line) while not self.lexer.is_eot_token(): token = self.lexer.get_token() tokens.append(token) expected = [ TokenWithValue(TokenType.VALUE_ID, "var_name"), TokenWithValue(TokenType.VALUE_ID, "x"), TokenWithValue(TokenType.VALUE_ID, "y"), TokenWithValue(TokenType.VALUE_ID, "z"), Token(TokenType.EOT) ] self.assertEqual(expected, tokens)
def test_other_tokens(self): tokens = [] line = "if else true false return while = " put_line_in_lexer_text_source(self.lexer, line) while not self.lexer.is_eot_token(): token = self.lexer.get_token() tokens.append(token) expected = [ Token(TokenType.K_IF), Token(TokenType.K_ELSE), Token(TokenType.K_TRUE), Token(TokenType.K_FALSE), Token(TokenType.K_RETURN), Token(TokenType.K_WHILE), Token(TokenType.ASSIGN_OP), Token(TokenType.EOT) ] self.assertEqual(expected, tokens)
def test_punctuation(self): tokens = [] line = ", . ; { } ( ) " put_line_in_lexer_text_source(self.lexer, line) while not self.lexer.is_eot_token(): token = self.lexer.get_token() tokens.append(token) expected = [ Token(TokenType.COMMA), Token(TokenType.DOT), Token(TokenType.SEMICOLON), Token(TokenType.LEFT_BRACKET), Token(TokenType.RIGHT_BRACKET), Token(TokenType.LEFT_PARENT), Token(TokenType.RIGHT_PARENT), Token(TokenType.EOT) ] self.assertEqual(expected, tokens)
def _compose_expression(self, phrase: Phrase, signature: Signature): if signature.contains_param or signature.output == "": # Create new parameters list to replace parametrised arguments to actual params params = list() for param in phrase.params: # Check is parameter has parameter class if param.token_class == TokenClass.parameter: # When parameter value "@" - should be inserted expression occurrence number if param.value == "@": params.append( Token(TokenClass.num, str(self.expr_uses))) break # Otherwise insert parameter from parameters list param_num = int(param.value[1:]) - 1 params.append(self.param_list[param_num]) else: # Else insert a parameter from operator params.append(param) self.expr_gen(phrase.keyword.value, params) self.line = signature.output
def test_equality_operators(self): tokens = [] line = "<= < >= > == !=" put_line_in_lexer_text_source(self.lexer, line) while not self.lexer.is_eot_token(): token = self.lexer.get_token() tokens.append(token) expected = [ Token(TokenType.LESS_EQUAL), Token(TokenType.LESS), Token(TokenType.GREATER_EQUAL), Token(TokenType.GREATER), Token(TokenType.EQUAL), Token(TokenType.NOT_EQUAL), Token(TokenType.EOT) ] self.assertEqual(expected, tokens)
def process_tokens(tree: ParseTree, table: SymbolTable, lang_dict: LangDict, tokens: List[Token]): active_machines: bool = False machine_found: bool = False token_index: int = 0 phrase_start_line: int = 1 temp_phrase: List[Token] = [] sem_analyzer = SemanticAnalyzer(tree, table, lang_dict) while token_index < len(tokens): token: Token = tokens[token_index] # New line check if token.token_class == TokenClass.newline: sem_analyzer.add_line() # Process token with parser machines for machine in machines: machine.process_object(token) if machine.state != State.undefined: active_machines = True # If all machines reach undefined state if not active_machines: # Trying to find machine that recognized phrase for machine in machines: if not machine_found and machine.is_sequence_recognized(): recognized_phrase = phrase_builder(tree.get_context(), machine.name, temp_phrase, phrase_start_line) sem_analyzer.process_phrase(recognized_phrase, phrase_start_line) machine_found = True temp_phrase.clear() # Token wasn't recognized by any machine if not machine_found: for machine in machines: if machine.prevState != State.undefined: raise InterpretationError( PeaceError( f"Unexpected token {repr(token.value)}, expected {machine.name.name}.", ErrorType.syntax_error, sem_analyzer.get_line(), token.value)) # Reset machine states for machine in machines: machine.reset_state() # Get new phrase start line phrase_start_line = sem_analyzer.get_line() # If current token newline - decrease line counter if token.token_class == TokenClass.newline: sem_analyzer.remove_line() # Roll back for 1 token, that led to undefined state (and is an part of next phrase) token_index = token_index - 1 machine_found = False else: # If token belong to some phrase add it to temp phrase if (token.token_class != TokenClass.space and token.token_class != TokenClass.newline and token.token_class != TokenClass.undefined and token.token_class != TokenClass.sign): temp_phrase.append(token) token_index += 1 active_machines = False # Recognize final phrase for machine in machines: machine.process_object(Token(TokenClass.undefined, "")) if not machine_found and machine.is_sequence_recognized(): recognized_phrase = phrase_builder(tree.get_context(), machine.name, temp_phrase, phrase_start_line) sem_analyzer.process_phrase(recognized_phrase, phrase_start_line) machine_found = True if not sem_analyzer.composer.is_tree_valid(): raise InterpretationError( PeaceError("Missing '}}'.", ErrorType.syntax_error, phrase_start_line)) return
def test_build_device_in_body(self): built_phrase = phrase_builder(self.body_context, PhraseClass.block, [Token(TokenClass.word, "device_in_body")], 0) expected_expr = Phrase(PhraseClass.block, PhraseSubclass.device, keyword=Token(TokenClass.word, "device_in_body")) self.assertTrue(are_phrases_equal(built_phrase, expected_expr))
def test_build_expression(self): built_phrase = phrase_builder(self.program_context, PhraseClass.block, [Token(TokenClass.word, "expression")], 0) expected_expr = Phrase(PhraseClass.block, PhraseSubclass.expression, keyword=Token(TokenClass.word, "expression")) self.assertTrue(are_phrases_equal(built_phrase, expected_expr))
def next_token(self): token = Token() peek = self.buffer[self.active_buffer].next_buffer_char() while(peek == ' ' or peek == '\n'): peek = self.buffer[self.active_buffer].next_buffer_char()