def test_parse_simple_tokens(): tokens = tokenize('(add 1 2)') result = parse(tokens) assert result.name == 'add' and result.params == [ Token('word', '1'), Token('word', '2') ]
def formula(self, excel_formula): ''' If excel formula is set, this TRIGGERS creation of rpn formula and tree @param excel_formula: excel formula as a string @return: rpn formula ''' self._formula = excel_formula logging.debug("Processing RPN for formula {} at cell {}".format( excel_formula, self)) #First check if formula starts with correct operator if str(excel_formula).startswith(('=', '+')): self.rpn = self.make_rpn(excel_formula) # creates list of precedents (who do I depend on) self.createPrec() # This means formula must be a hardcode else: logging.debug( "Formula does not start with = or +. Creating a hardcode cell") if isinstance(fast_real(self.address), str): tok = Token(self.address, Token.OPERAND, "TEXT") self.rpn.append(OperandNode(tok)) self.needs_calc = False else: tok = Token(self.address, Token.OPERAND, "NUMBER") self.rpn.append(OperandNode(tok)) logging.info("RPN is: {}".format(self.rpn))
def test_middle_nonapha(self): s = list( self.t.tokenize_gen_alpha_digit( 'я иду в кино00000 111 00000cinema')) self.assertEqual(len(s), 8) self.assertEqual(s[4], Token('00000', 'digit', 12, 17)) self.assertEqual(s[5], Token('111', 'digit', 18, 21))
def is_statement(self): return self.token in ( Token('keyword', 'let'), Token('keyword', 'if'), Token('keyword', 'while'), Token('keyword', 'do'), Token('keyword', 'return') )
def test_stemmer_flex(self): line = "мамочка свари суп" fact = list(Stemmer().stem_flex(Token(0, 8, "мамочка свари суп", "a"))) check = [Token(0, 8, line, 'a'), Token(0, 7, line, 'a')] self.assertEqual(fact, check)
def test_exception(self): expr = "a > 3and3>5" tokenizer = Tokenizer(expr) self.assertEqual(Token(TokenType.FIELD, 'a'), tokenizer.next_token()) self.assertEqual(Token(TokenType.COMPARISON_OPERATOR, '>'), tokenizer.next_token()) self.assertEqual(Token(TokenType.INTEGER, 3), tokenizer.next_token()) self.assertRaises(SyntaxError, tokenizer.next_token)
def test_split(self): text = 'This is a test' tokenz = self._tokz.tokenize(text) self.assertEqual(tokenz[0], Token(text, Span(0, 4))) self.assertEqual(tokenz[1], Token(text, Span(5, 7))) self.assertEqual(tokenz[2], Token(text, Span(8, 9))) self.assertEqual(tokenz[3], Token(text, Span(10, 14)))
def test_parse_list(): tokens = tokenize("'(1 2 3)") result = parse(tokens) expected_list_items = [ Token('word', '1'), Token('word', '2'), Token('word', '3') ] assert result.token_type == 'list' and all( [a == b for a, b in zip(result.val, expected_list_items)])
def test_tokenize_basic01(self): result = tokenize("(+ 5 2)") expected = [ Token(TokenType.LPAREN, None), Token(TokenType.PLUS, None), Token(TokenType.INTEGER, 5), Token(TokenType.INTEGER, 2), Token(TokenType.RPAREN, None) ] self.assertListEqual(result, expected)
def test_tokenize_eq(self): result = tokenize("(eq? 2 2)") expected = [ Token(TokenType.LPAREN, None), Token(TokenType.EQ, None), Token(TokenType.INTEGER, 2), Token(TokenType.INTEGER, 2), Token(TokenType.RPAREN, None) ] self.assertEqual(result, expected)
def test_specials(self): text = 'This Dr. is a test!' # 0123456789012345678 tokenz = self._tokz.tokenize(text) self.assertEqual(tokenz, [Token(text, Span(0, 4)), Token(text, Span(5, 8)), Token(text, Span(9, 11)), Token(text, Span(12, 13)), Token(text, Span(14, 19))])
def test_tokenize_basic02(self): result = tokenize("(* 3 4)") expected = [ Token(TokenType.LPAREN, None), Token(TokenType.MULTIPLY, None), Token(TokenType.INTEGER, 3), Token(TokenType.INTEGER, 4), Token(TokenType.RPAREN, None) ] self.assertListEqual(result, expected)
def test_tokenize_define(self): result = tokenize("(define cat 5)") expected = [ Token(TokenType.LPAREN, None), Token(TokenType.DEFINE, None), Token(TokenType.ID, 'cat'), Token(TokenType.INTEGER, 5), Token(TokenType.RPAREN, None) ] self.assertEqual(result, expected)
def parse(self, line): parsed = [] buffer = [] if len(line) > 0: parsed_to_return = [] for index, token in enumerate(line): if token.type == Type.MEAN: for j, token in enumerate(line[index + 1:]): if (not token.type == Type.NUMBER and not token.type == \ Type.VAR) or len(line[index + 1:]) - 1 == j: p = Token(type=Type.AVERAGE) p.val = buffer.copy() parsed.clear() parsed.append(p) buffer.clear() for token in line[:index]: parsed_to_return.append(token) parsed_to_return.append(p) for token in line[len(line[index + 1:]):]: parsed_to_return.append(token) return parsed_to_return for token in line: # push the token to the buffer buffer.append(token) # find syntactical matches matches = self.find_matches(buffer) while len(matches) == 0: # no matches probably due to an extra token # for example, ARITH VAL VAL is legal but VAL must be converted to number first # simply move the token to the parsed list if len(buffer) > 0: parsed.append(buffer.pop(0)) matches = self.find_matches(buffer) else: break for m in matches: for x in matches[m][1]: # complete match if len(x) == len(buffer): # todo convert the token t = Token(type=matches[m][0]) t.val = buffer.copy() # then pop it parsed.append(t) buffer.clear() return parsed
def test_parse_cascaded_parans(): tokens = tokenize('(add (mul 3 4) 5)') result = parse(tokens) expected_params = [ Func('mul', [Token('word', '3'), Token('word', '4')]), Token('word', '5') ] assert result.name == 'add' and all( [a == b for a, b in zip(result.params, expected_params)])
def test_tokenize_list(): result = tokenize("'(1 2 3)") expected = [ Token('list_open'), Token('word', '1'), Token('word', '2'), Token('word', '3'), Token('list_close') ] assert all([a == b for a, b in zip(result, expected)])
def test_parse_parameter_list_empty(self): """ ( (type identifier) (',' type identifier)*)? """ tokens = (Token('symbol',')'), ) expected = Token('parameterList', []) parser = Parser(tokens) parser.parse_type = self._mock_parse(parser) actual = parser.parse_parameter_list() self.assertEqual(expected, actual)
def parse_expression_list(self): """ (expression (',' expression)* )? We should always expect a trailing ')' """ node = Token('expressionList', []) if self.token.value != ')': assert Token('symbol', ')') in self.tokens[self.idx:], 'Expression list must close' node.append(self.parse_expression()) while self.token.value != ')': self.try_add(node, 'symbol', value=',') node.append(self.parse_expression()) return node
def process(tokenlist): i = 0 while i < len(tokenlist.tokens): token = tokenlist.tokens[i] t = token.type if t == TYPE_CALL: token2 = token.value[1] t2 = token2.type if t2 == TYPE_FUNCTION: v2 = token2.value if v2 == "if": #first we must determine that there is in fact a body following this function. if i+1 < len(tokenlist.tokens) and tokenlist.tokens[i+1].type == TYPE_BLOCK_START and \ tokenlist.tokens[i + 1].value == BLOCK_START_CHAR: pass else: error_format( token, "\"if\" should be followed by a block.") #only if the body following this if-function has an "else" will this goto be added. index = find_endblock_token_index( tokenlist.tokens, i + 2) if index + 1 < len(tokenlist.tokens) and tokenlist.tokens[index + 1].type == TYPE_TERM and \ tokenlist.tokens[index + 1].value == "else": end_of_chain = find_endblock_token_index( tokenlist.tokens, i) tokenlist.tokens.insert( index, Token(TYPE_GOTO, end_of_chain, None, None)) increment_gotos_pointing_after_here( tokenlist, index) elif v2 == "while": # first we must determine that there is in fact a body following this function. if i + 1 < len(tokenlist.tokens) and tokenlist.tokens[i + 1].type == TYPE_BLOCK_START and \ tokenlist.tokens[i + 1].value == BLOCK_START_CHAR: pass else: error_format( token, "\"while\" should be followed by a body.") #Next we place a goto at the end of that body to point back at this while-function's args. index = find_endblock_token_index( tokenlist.tokens, i + 1) goto = find_startblock_token_index( tokenlist.tokens, i - 3) tokenlist.tokens.insert( index - 1, Token(TYPE_GOTO, goto, None, None)) increment_gotos_pointing_after_here(tokenlist, index) i += 1
def test_tokenize_with_spaces(): result = tokenize('''(mul 2 3 )''') expected = [ Token('open'), Token('word', 'mul'), Token('word', '2'), Token('word', '3'), Token('close') ] assert all([a == b for a, b in zip(result, expected)])
def execute(self, scope): # scope[self.dest.value] = self.val.execute(scope) val = self.val.execute(scope) if type(val) is AtomAST: scope[self.dest.value] = self.val elif type(val) is int: scope[self.dest.value] = AtomAST(Token(TokenType.NUM, val)) elif type(val) is str: scope[self.dest.value] = AtomAST(Token(TokenType.STR, val)) else: scope[self.dest.value] = AtomAST(Token(TokenType.STR, val)) print('Warning: value of type {} is not supported!'.format(type(val))) return self.val
def compile_do(self, parse_tree): log.info('Compiling do statement') assert parse_tree.type == 'doStatement' subroutine_term = Token('term', parse_tree.value[1:-1]) return self.compile_expression(subroutine_term) + ( 'pop temp 0\n' # Clear return value )
def parse_return_statement(self): node = Token('returnStatement', []) self.try_add(node, 'keyword', value='return') if self.token.value != ';': node.append(self.parse_expression()) self.try_add(node, 'symbol', value=';') return node
def _statement_list(self, additional_syncset=frozenset()): self._check_for_starter(self.stmt_starter_label, self._follow_dl | additional_syncset, self._ID) sl = Atom(Token(self.sl_label, self.list_category, self._curr.location)) first_set = self._first_loop_sl if 'rompe' in additional_syncset else self._first_sl while self._curr.lexeme in first_set or self._check_id_num(self._ID): if self._curr.lexeme == 'if': self._selection(additional_syncset).parent = sl elif self._curr.lexeme == 'while': self._iteration(additional_syncset).parent = sl elif self._curr.lexeme == 'repeat': self._repetition(additional_syncset).parent = sl elif self._curr.lexeme == 'cin': self._cin_stmt(additional_syncset).parent = sl elif self._curr.lexeme in ('cout', 'coutln'): self._cout_stmt(additional_syncset).parent = sl elif self._curr.lexeme == '{': self._block(additional_syncset).parent = sl elif self._curr.lexeme == 'rompe': self._break_stmt(additional_syncset).parent = sl elif self._check_id_num(self._ID): self._assignment(additional_syncset).parent = sl elif self._curr.lexeme in self._first_una: self._pre(additional_syncset).parent = sl self._check_for_starter(self.stmt_starter_label, self._follow_dl | additional_syncset, self._ID) return sl
def parse_do_statement(self): node = Token('doStatement', []) self.try_add(node, 'keyword', value='do') for token in self.parse_subroutine_call(): node.append(token) self.try_add(node, 'symbol', value=';') return node
def get_control(self, token_list): tokens = token_list[::] if token_list[0].get_type() not in [Token.STOP, Token.RETURN, Token.JUMPOVER, Token.HALT]: return None, tokens tok = token_list[0] token_list = token_list[1:] if tok.get_type() in [Token.STOP, Token.JUMPOVER]: sub = Literal(Token(Token.NULL, "Null")) else: sub, token_list = self.get_expression(token_list) if sub is None: sub = Literal(Token(Token.NULL, "Null")) return Control(tok, parent=None, child=sub), token_list
def parse(tokens): stack = deque() for k in tokens: if k.token_type == 'open' or k.token_type == 'word' or k.token_type == 'list_open': stack.append(k) elif k.token_type == 'close': # create function sub = [] while True: cur = stack.pop() if cur.token_type == 'open': break else: sub = [cur] + sub result = Func(sub[0].val, sub[1:]) stack.append(result) elif k.token_type == 'list_close': sub = [] while True: cur = stack.pop() if cur.token_type == 'list_open': break else: sub = [cur] + sub result = Token('list', sub) stack.append(result) if len(stack) == 1: return stack.pop() else: return list(stack)
def _pre(self, additional_syncset): operator = Atom(self._curr, self._curr.lexeme[0]) self._get_token() var = Atom(self._curr, f'{self.assignment_label} Ø') temp = self._curr.lexeme if self._sync(self._ID, frozenset({';'}) | self._follow_dl | additional_syncset, self._ID): var.lexeme = f'{self.assignment_label} {temp}' operator.parent = var Atom(Token(var.lexeme.split()[-1], var.category, operator.location), parent=operator, _inc_dec=True) Atom(Token('1', self.tokenizer.int_label, operator.location), parent=operator) self._sync(';', self._follow_dl | additional_syncset, self._ID) return var
def test_parse_return_statement_empty(self): """ 'return' expression? ';' """ tokens = ( Token('keyword', 'return'), Token('symbol', ';'), ) expected = Token('returnStatement', [ Token('keyword', 'return'), Token('symbol', ';'), ]) parser = Parser(tokens) actual = parser.parse_return_statement() self.assertEqual(expected, actual)
def parse_type(self): """ 'int' | 'char' | 'boolean' | identifier """ standard_types = ( Token('keyword', 'int'), Token('keyword', 'char'), Token('keyword', 'boolean') ) token = self.token if not self.token in standard_types: # Hack to validate identifier self.try_add(Token('dummy',[]), 'identifier') else: self.idx += 1 return token