Exemple #1
0
def test_parse_simple_tokens():
    tokens = tokenize('(add 1 2)')
    result = parse(tokens)

    assert result.name == 'add' and result.params == [
        Token('word', '1'), Token('word', '2')
    ]
Exemple #2
0
    def formula(self, excel_formula):
        '''
        If excel formula is set, this TRIGGERS creation of rpn formula and tree
        @param excel_formula: excel formula as a string
        @return: rpn formula
        '''
        self._formula = excel_formula
        logging.debug("Processing RPN for formula {} at cell {}".format(
            excel_formula, self))

        #First check if formula starts with correct operator
        if str(excel_formula).startswith(('=', '+')):
            self.rpn = self.make_rpn(excel_formula)

            # creates list of precedents (who do I depend on)
            self.createPrec()

        # This means formula must be a hardcode
        else:
            logging.debug(
                "Formula does not start with = or +. Creating a hardcode cell")
            if isinstance(fast_real(self.address), str):
                tok = Token(self.address, Token.OPERAND, "TEXT")
                self.rpn.append(OperandNode(tok))
                self.needs_calc = False
            else:
                tok = Token(self.address, Token.OPERAND, "NUMBER")
                self.rpn.append(OperandNode(tok))

        logging.info("RPN is: {}".format(self.rpn))
Exemple #3
0
 def test_middle_nonapha(self):
     s = list(
         self.t.tokenize_gen_alpha_digit(
             'я иду в кино00000 111 00000cinema'))
     self.assertEqual(len(s), 8)
     self.assertEqual(s[4], Token('00000', 'digit', 12, 17))
     self.assertEqual(s[5], Token('111', 'digit', 18, 21))
Exemple #4
0
 def is_statement(self):
     return self.token in (
         Token('keyword', 'let'), 
         Token('keyword', 'if'),
         Token('keyword', 'while'),
         Token('keyword', 'do'),   
         Token('keyword', 'return')
     )
Exemple #5
0
    def test_stemmer_flex(self): 

        line = "мамочка свари суп"

        fact = list(Stemmer().stem_flex(Token(0, 8, "мамочка свари суп", "a")))
        check = [Token(0, 8, line, 'a'), Token(0, 7, line, 'a')]

        self.assertEqual(fact, check)
 def test_exception(self):
     expr = "a > 3and3>5"
     tokenizer = Tokenizer(expr)
     self.assertEqual(Token(TokenType.FIELD, 'a'), tokenizer.next_token())
     self.assertEqual(Token(TokenType.COMPARISON_OPERATOR, '>'),
                      tokenizer.next_token())
     self.assertEqual(Token(TokenType.INTEGER, 3), tokenizer.next_token())
     self.assertRaises(SyntaxError, tokenizer.next_token)
Exemple #7
0
    def test_split(self):
        text = 'This is a test'
        tokenz = self._tokz.tokenize(text)

        self.assertEqual(tokenz[0], Token(text, Span(0, 4)))
        self.assertEqual(tokenz[1], Token(text, Span(5, 7)))
        self.assertEqual(tokenz[2], Token(text, Span(8, 9)))
        self.assertEqual(tokenz[3], Token(text, Span(10, 14)))
Exemple #8
0
def test_parse_list():
    tokens = tokenize("'(1 2 3)")
    result = parse(tokens)
    expected_list_items = [
        Token('word', '1'),
        Token('word', '2'),
        Token('word', '3')
    ]
    assert result.token_type == 'list' and all(
        [a == b for a, b in zip(result.val, expected_list_items)])
Exemple #9
0
 def test_tokenize_basic01(self):
     result = tokenize("(+ 5 2)")
     expected = [
         Token(TokenType.LPAREN, None),
         Token(TokenType.PLUS, None),
         Token(TokenType.INTEGER, 5),
         Token(TokenType.INTEGER, 2),
         Token(TokenType.RPAREN, None)
     ]
     self.assertListEqual(result, expected)
Exemple #10
0
 def test_tokenize_eq(self):
     result = tokenize("(eq? 2 2)")
     expected = [
         Token(TokenType.LPAREN, None),
         Token(TokenType.EQ, None),
         Token(TokenType.INTEGER, 2),
         Token(TokenType.INTEGER, 2),
         Token(TokenType.RPAREN, None)
     ]
     self.assertEqual(result, expected)
Exemple #11
0
    def test_specials(self):
        text = 'This Dr. is a test!'
        #       0123456789012345678
        tokenz = self._tokz.tokenize(text)

        self.assertEqual(tokenz, [Token(text, Span(0, 4)),
                                  Token(text, Span(5, 8)),
                                  Token(text, Span(9, 11)),
                                  Token(text, Span(12, 13)),
                                  Token(text, Span(14, 19))])
Exemple #12
0
 def test_tokenize_basic02(self):
     result = tokenize("(* 3 4)")
     expected = [
         Token(TokenType.LPAREN, None),
         Token(TokenType.MULTIPLY, None),
         Token(TokenType.INTEGER, 3),
         Token(TokenType.INTEGER, 4),
         Token(TokenType.RPAREN, None)
     ]
     self.assertListEqual(result, expected)
Exemple #13
0
 def test_tokenize_define(self):
     result = tokenize("(define cat 5)")
     expected = [
         Token(TokenType.LPAREN, None),
         Token(TokenType.DEFINE, None),
         Token(TokenType.ID, 'cat'),
         Token(TokenType.INTEGER, 5),
         Token(TokenType.RPAREN, None)
     ]
     self.assertEqual(result, expected)
Exemple #14
0
    def parse(self, line):
        parsed = []
        buffer = []

        if len(line) > 0:
            parsed_to_return = []
            for index, token in enumerate(line):
                if token.type == Type.MEAN:
                    for j, token in enumerate(line[index + 1:]):
                        if (not token.type == Type.NUMBER and not token.type == \
                        Type.VAR) or len(line[index + 1:]) - 1 == j:
                            p = Token(type=Type.AVERAGE)
                            p.val = buffer.copy()
                            parsed.clear()
                            parsed.append(p)
                            buffer.clear()

                            for token in line[:index]:
                                parsed_to_return.append(token)
                            parsed_to_return.append(p)
                            for token in line[len(line[index + 1:]):]:
                                parsed_to_return.append(token)
                            return parsed_to_return

        for token in line:
            # push the token to the buffer
            buffer.append(token)

            # find syntactical matches
            matches = self.find_matches(buffer)

            while len(matches) == 0:
                # no matches probably due to an extra token
                # for example, ARITH VAL VAL is legal but VAL must be converted to number first
                # simply move the token to the parsed list
                if len(buffer) > 0:
                    parsed.append(buffer.pop(0))
                    matches = self.find_matches(buffer)
                else:
                    break

            for m in matches:
                for x in matches[m][1]:
                    # complete match
                    if len(x) == len(buffer):
                        # todo convert the token
                        t = Token(type=matches[m][0])
                        t.val = buffer.copy()

                        # then pop it
                        parsed.append(t)
                        buffer.clear()

        return parsed
Exemple #15
0
def test_parse_cascaded_parans():
    tokens = tokenize('(add (mul 3 4) 5)')
    result = parse(tokens)

    expected_params = [
        Func('mul',
             [Token('word', '3'), Token('word', '4')]),
        Token('word', '5')
    ]

    assert result.name == 'add' and all(
        [a == b for a, b in zip(result.params, expected_params)])
Exemple #16
0
def test_tokenize_list():
    result = tokenize("'(1 2 3)")

    expected = [
        Token('list_open'),
        Token('word', '1'),
        Token('word', '2'),
        Token('word', '3'),
        Token('list_close')
    ]

    assert all([a == b for a, b in zip(result, expected)])
Exemple #17
0
    def test_parse_parameter_list_empty(self):
        """
        ( (type identifier) (',' type identifier)*)?
        """
        tokens = (Token('symbol',')'), )
        expected = Token('parameterList', [])

        parser = Parser(tokens)
        parser.parse_type = self._mock_parse(parser)
        
        actual = parser.parse_parameter_list()
        self.assertEqual(expected, actual)
Exemple #18
0
 def parse_expression_list(self):
     """
     (expression (',' expression)* )?
     We should always expect a trailing ')'
     """
     node = Token('expressionList', [])
     if self.token.value != ')':
         assert Token('symbol', ')') in self.tokens[self.idx:], 'Expression list must close'
         node.append(self.parse_expression())
         while self.token.value != ')':
             self.try_add(node, 'symbol', value=',')
             node.append(self.parse_expression())
     return node
Exemple #19
0
    def process(tokenlist):
        i = 0
        while i < len(tokenlist.tokens):
            token = tokenlist.tokens[i]
            t = token.type

            if t == TYPE_CALL:
                token2 = token.value[1]
                t2 = token2.type
                if t2 == TYPE_FUNCTION:
                    v2 = token2.value
                    if v2 == "if":
                        #first we must determine that there is in fact a body following this function.
                        if i+1 < len(tokenlist.tokens) and tokenlist.tokens[i+1].type == TYPE_BLOCK_START and \
                         tokenlist.tokens[i + 1].value == BLOCK_START_CHAR:
                            pass
                        else:
                            error_format(
                                token, "\"if\" should be followed by a block.")

                        #only if the body following this if-function has an "else" will this goto be added.
                        index = find_endblock_token_index(
                            tokenlist.tokens, i + 2)
                        if index + 1 < len(tokenlist.tokens) and tokenlist.tokens[index + 1].type == TYPE_TERM and \
                         tokenlist.tokens[index + 1].value == "else":
                            end_of_chain = find_endblock_token_index(
                                tokenlist.tokens, i)
                            tokenlist.tokens.insert(
                                index,
                                Token(TYPE_GOTO, end_of_chain, None, None))
                            increment_gotos_pointing_after_here(
                                tokenlist, index)
                    elif v2 == "while":
                        # first we must determine that there is in fact a body following this function.
                        if i + 1 < len(tokenlist.tokens) and tokenlist.tokens[i + 1].type == TYPE_BLOCK_START and \
                            tokenlist.tokens[i + 1].value == BLOCK_START_CHAR:
                            pass
                        else:
                            error_format(
                                token,
                                "\"while\" should be followed by a body.")

                        #Next we place a goto at the end of that body to point back at this while-function's args.
                        index = find_endblock_token_index(
                            tokenlist.tokens, i + 1)
                        goto = find_startblock_token_index(
                            tokenlist.tokens, i - 3)
                        tokenlist.tokens.insert(
                            index - 1, Token(TYPE_GOTO, goto, None, None))
                        increment_gotos_pointing_after_here(tokenlist, index)
            i += 1
Exemple #20
0
def test_tokenize_with_spaces():
    result = tokenize('''(mul 
                            2 
                            3
                         )''')
    expected = [
        Token('open'),
        Token('word', 'mul'),
        Token('word', '2'),
        Token('word', '3'),
        Token('close')
    ]

    assert all([a == b for a, b in zip(result, expected)])
Exemple #21
0
    def execute(self, scope):
        # scope[self.dest.value] = self.val.execute(scope)
        val = self.val.execute(scope)
        if type(val) is AtomAST:
            scope[self.dest.value] = self.val
        elif type(val) is int:
            scope[self.dest.value] = AtomAST(Token(TokenType.NUM, val))
        elif type(val) is str:
            scope[self.dest.value] = AtomAST(Token(TokenType.STR, val))
        else:
            scope[self.dest.value] = AtomAST(Token(TokenType.STR, val))
            print('Warning: value of type {} is not supported!'.format(type(val)))

        return self.val
 def compile_do(self, parse_tree):
     log.info('Compiling do statement')
     assert parse_tree.type == 'doStatement'
     subroutine_term = Token('term', parse_tree.value[1:-1])
     return self.compile_expression(subroutine_term) + (
         'pop temp 0\n'  # Clear return value
     )
Exemple #23
0
 def parse_return_statement(self):
     node = Token('returnStatement', [])
     self.try_add(node, 'keyword', value='return')
     if self.token.value != ';':
         node.append(self.parse_expression())
     self.try_add(node, 'symbol', value=';')
     return node
Exemple #24
0
 def _statement_list(self, additional_syncset=frozenset()):
     self._check_for_starter(self.stmt_starter_label,
                             self._follow_dl | additional_syncset, self._ID)
     sl = Atom(Token(self.sl_label, self.list_category,
                     self._curr.location))
     first_set = self._first_loop_sl if 'rompe' in additional_syncset else self._first_sl
     while self._curr.lexeme in first_set or self._check_id_num(self._ID):
         if self._curr.lexeme == 'if':
             self._selection(additional_syncset).parent = sl
         elif self._curr.lexeme == 'while':
             self._iteration(additional_syncset).parent = sl
         elif self._curr.lexeme == 'repeat':
             self._repetition(additional_syncset).parent = sl
         elif self._curr.lexeme == 'cin':
             self._cin_stmt(additional_syncset).parent = sl
         elif self._curr.lexeme in ('cout', 'coutln'):
             self._cout_stmt(additional_syncset).parent = sl
         elif self._curr.lexeme == '{':
             self._block(additional_syncset).parent = sl
         elif self._curr.lexeme == 'rompe':
             self._break_stmt(additional_syncset).parent = sl
         elif self._check_id_num(self._ID):
             self._assignment(additional_syncset).parent = sl
         elif self._curr.lexeme in self._first_una:
             self._pre(additional_syncset).parent = sl
         self._check_for_starter(self.stmt_starter_label,
                                 self._follow_dl | additional_syncset,
                                 self._ID)
     return sl
Exemple #25
0
 def parse_do_statement(self):
     node = Token('doStatement', [])
     self.try_add(node, 'keyword', value='do')
     for token in self.parse_subroutine_call():
         node.append(token)
     self.try_add(node, 'symbol', value=';')
     return node
Exemple #26
0
 def get_control(self, token_list):
     tokens = token_list[::]
     
     if token_list[0].get_type() not in [Token.STOP, Token.RETURN,
                                 Token.JUMPOVER, Token.HALT]:
         return None, tokens
     tok = token_list[0]
     token_list = token_list[1:]
     if tok.get_type() in [Token.STOP, Token.JUMPOVER]:
         sub = Literal(Token(Token.NULL, "Null"))
     else:
         sub, token_list = self.get_expression(token_list)
         if sub is None:
             sub = Literal(Token(Token.NULL, "Null"))
             
     return Control(tok, parent=None, child=sub), token_list
Exemple #27
0
def parse(tokens):
    stack = deque()

    for k in tokens:
        if k.token_type == 'open' or k.token_type == 'word' or k.token_type == 'list_open':
            stack.append(k)

        elif k.token_type == 'close':
            # create function
            sub = []
            while True:
                cur = stack.pop()
                if cur.token_type == 'open':
                    break
                else:
                    sub = [cur] + sub
            result = Func(sub[0].val, sub[1:])
            stack.append(result)

        elif k.token_type == 'list_close':
            sub = []
            while True:
                cur = stack.pop()
                if cur.token_type == 'list_open':
                    break
                else:
                    sub = [cur] + sub
            result = Token('list', sub)
            stack.append(result)

    if len(stack) == 1:
        return stack.pop()
    else:
        return list(stack)
Exemple #28
0
 def _pre(self, additional_syncset):
     operator = Atom(self._curr, self._curr.lexeme[0])
     self._get_token()
     var = Atom(self._curr, f'{self.assignment_label} Ø')
     temp = self._curr.lexeme
     if self._sync(self._ID,
                   frozenset({';'}) | self._follow_dl | additional_syncset,
                   self._ID):
         var.lexeme = f'{self.assignment_label} {temp}'
     operator.parent = var
     Atom(Token(var.lexeme.split()[-1], var.category, operator.location),
          parent=operator,
          _inc_dec=True)
     Atom(Token('1', self.tokenizer.int_label, operator.location),
          parent=operator)
     self._sync(';', self._follow_dl | additional_syncset, self._ID)
     return var
Exemple #29
0
    def test_parse_return_statement_empty(self):
        """
        'return' expression? ';' 
        """
        tokens = (
            Token('keyword', 'return'), 
            Token('symbol', ';'), 
        )

        expected = Token('returnStatement', [
            Token('keyword', 'return'), 
            Token('symbol', ';'), 
        ])

        parser = Parser(tokens)
        actual = parser.parse_return_statement()
        self.assertEqual(expected, actual)
Exemple #30
0
    def parse_type(self):
        """
        'int' | 'char' | 'boolean' | identifier 
        """
        standard_types = (
            Token('keyword', 'int'), 
            Token('keyword', 'char'),
            Token('keyword', 'boolean')
        )

        token = self.token
        if not self.token in standard_types:
            # Hack to validate identifier
            self.try_add(Token('dummy',[]), 'identifier')
        else:
            self.idx += 1
        return token