Beispiel #1
0
    def test_multiple_for(self):
        line = "530 NEXTI"
        results = tokenize_line(line)
        self.assertTrue(isinstance(results, ProgramLine))
        self.assertEqual(530, results.line)
        self.assertEqual(1, len(results.stmts))

        result = results.stmts[0]
        self.assertEqual(Keywords.NEXT, result.keyword)
        self.assertEqual('I', result.loop_var)

        line = "530 FORI=1TO9:C(I,1)=0:C(I,2)=37:NEXTI"
        results = tokenize_line(line)
        self.assertTrue(isinstance(results, ProgramLine))
        self.assertEqual(530, results.line)
        self.assertEqual(4, len(results.stmts))

        result = results.stmts[1]
        self.assertEqual(Keywords.LET, result.keyword)
        self.assertEqual([lexer_token(0, "num")], result._tokens)
        self.assertEqual("C(I,1)", result._variable)

        result = results.stmts[2]
        self.assertEqual(Keywords.LET, result.keyword)
        self.assertEqual([lexer_token(37, "num")], result._tokens)
        self.assertEqual("C(I,2)", result._variable)

        result = results.stmts[3]
        self.assertEqual(Keywords.NEXT, result.keyword)
        self.assertEqual('I', result.loop_var)
    def test_def(self):
        executor = self.runit(['100 DEF FNA(X)=X^2+1'])
        self.assertEqual(1, executor.get_symbol_count())
        # self.assert_value(executor, "FNA", "X^2+1")
        value = executor.get_symbol("FNA", symbol_type=SymbolType.FUNCTION)
        expected = [
            lexer_token(token='X', type='id'),
            lexer_token(token='^', type='op'),
            lexer_token(token=2.0, type='num'),
            lexer_token(token='+', type='op'),
            lexer_token(token=1.0, type='num')
        ]
        self.assertEqual(expected, value)

        AT = executor.get_symbol_type("FNA", SymbolType.FUNCTION)
        self.assertEqual(SymbolType.FUNCTION, AT)
 def test_spaces(self):
     stack = []
     tokens = self._lexer.lex(' 10 + 7 ')
     self.assertEqual(3, len(tokens))
     stack.append(tokens[0])
     stack.append(tokens[2])
     binop = get_op(lexer_token("+", "op"))
     answer = binop.eval(stack, op=None)
     self.assertEqual(17, answer.token)
 def test_minus(self):
     stack = []
     tokens = self._lexer.lex('10-7')
     self.assertEqual(3, len(tokens))
     stack.append(tokens[0])
     stack.append(tokens[2])
     binop = get_op(lexer_token("-", "op"))
     answer = binop.eval(
         stack,
         op=None)  # Op is not needed for this test. Only used for DEF FNx
     self.assertEqual(3, answer.token)
Beispiel #5
0
    def test_token_exp(self):
        multi_exp = "T=INT(RND(1)*20+20)*100:T0=T:T9=25+INT(RND(1)*10):D0=0:E=3000:E0=E"
        line = f"370 {multi_exp}"
        results = tokenize_line(line)
        self.assertTrue(isinstance(results, ProgramLine))
        self.assertEqual(370, results.line)
        self.assertEqual(6, len(results.stmts))
        expect = multi_exp.split(":")
        self.assertEqual(6, len(expect))
        for i in range(len(expect)):
            self.assertEqual(Keywords.LET, results.stmts[i].keyword)
            #self.assertEqual(expect[i], results.stmts[i].args)

        self.assertEqual([lexer_token(3000, "num")], results.stmts[4]._tokens)
Beispiel #6
0
 def test_lex_vars(self):
     # Check that we can handle variable names that run into keywords. ("YandQ1then"
     tokens = self._lexer.lex("X<>YANDQ1<7")
     print(tokens)
     expected = [
         lexer_token(token='X', type='id'),
         lexer_token(token='<>', type='op'),
         lexer_token(token='Y', type='id'),
         lexer_token(token='AND', type='op'),
         lexer_token(token='Q1', type='id'),
         lexer_token(token='<', type='op'),
         lexer_token(token=7.0, type='num')
     ]
     self.assertEqual(expected, tokens)
Beispiel #7
0
 def eval(self, stack, *, op):
     self.check_args(stack)
     first = stack.pop()
     answer = self.eval1(first.token, op=op)
     return_type = self._return_type if self._return_type is not None else first.type
     return lexer_token(answer, return_type)
Beispiel #8
0
 def eval(self, stack, *, op):
     self.check_args(stack)
     second = stack.pop()
     first = stack.pop()
     answer = self.eval2(first.token, second.token)
     return lexer_token(answer, first.type)
Beispiel #9
0
    def lex2(self, text):
        state = None
        token = ""
        back = None
        index = 0

        def cur():
            if text is None:
                assert (0)
            if index == len(text):
                return None
            return text[index]

        def peek():
            if index + 1 == len(text):
                return None
            return text[index + 1]

        def consume():
            nonlocal index
            current = text[index]
            index += 1
            return current  # So we can get and consume in one operation.

        while (c := cur()) is not None:
            if state is None:
                if c in LETTERS:
                    token = ""
                    if peek() is not None and peek() in NUMBERS or peek(
                    ) == '$':
                        # Only consume if on identifier path.
                        token += consume()
                        if cur() in NUMBERS:
                            token += consume()
                        if cur() == '$':
                            token += consume()
                        yield lexer_token(token, "id")
                        continue

                    if peek() is None or peek() not in LETTERS:
                        yield lexer_token(consume(), "id")
                        continue

                    # At this point, we know it's not a variable.
                    found = self.scan_for_keyword(TEXT_OPERATORS, text[index:])
                    if not found:
                        # Can't make an operator from it, so much be an ID.
                        yield lexer_token(consume(), "id")
                        continue

                    for _ in found:
                        consume()
                    if found in BOOLEAN_OPERATORS:
                        yield lexer_token(found, "op")
                    else:
                        yield lexer_token(found, "id")
                elif c in OPERATORS:
                    first = consume()
                    if first == "<" and cur() == ">":
                        consume()
                        yield lexer_token("<>", "op")
                    elif first == "<" and cur() == "=":
                        consume()
                        yield lexer_token("<=", "op")
                    elif first == ">" and cur() == "=":
                        consume()
                        yield lexer_token(">=", "op")
                    else:
                        yield lexer_token(first, "op")
                elif c in NUMBERS or c == '.':
                    token = ""
                    while (c := cur()) is not None and (c in NUMBERS
                                                        or c == '.'):
                        token += consume()
                    yield lexer_token(float(token), "num")
                elif c == '"':
                    consume()
                    token = ""
                    while (c := cur()) is not None and (c != '"'):
                        token += consume()
                    if cur() != '"':
                        raise BasicSyntaxError(F"No closing quote char.")
                    consume()
                    yield lexer_token(token, "str")
Beispiel #10
0
    def eval(self, tokens: list[lexer_token], *, symbols=None) -> lexer_token:
        """
        Evalulates an expression, like "2+3*5-A+RND()"
        :param symbols: Symbols (BASIC variables) to use when evaluating the expression
        :param tokens: the incoming list[lexer_token]
        :return: A lexer token with the result and the type.
        """
        from basic_operators import get_op, get_precedence  # Import it in two places, so the IDE knows it's there.
        # "-" is ambiguous. It can mean subtraction or unary minus.
        # if "-" follows a data item, it's subtraction.
        # if "-" follows an operator, it's unary minus, unless the operator is )
        # Why ")"? I need to be able to express this better.
        is_unary_context = True
        assert type(symbols) != dict
        if symbols is None:  # Happens during testing.
            symbols = SymbolTable()  # TODO Fix this. No "if test" allowed.

        if len(tokens) == 0:
            raise BasicSyntaxError(F"No expression.")

        data_stack = []
        op_stack: OP_TOKEN = []
        token_index = 0
        while token_index < len(tokens):
            current = tokens[token_index]

            if current.type == "op":
                if current.token == "-" and is_unary_context:
                    current = lexer_token(UNARY_MINUS, current.type)
                # Do anything on the stack that has higher precedence.
                while len(op_stack):
                    top = op_stack[-1]
                    # This makes everything left associative. I think that's ok. Might be wrong for exponentiation
                    # This says visual basic was left associative for everything.
                    # https://docs.microsoft.com/en-us/dotnet/visual-basic/language-reference/operators/operator-precedence
                    # This shows left associative exponentiation: (they use **, not ^)
                    # http://www.quitebasic.com/
                    if top.token != "(" and get_precedence(
                            top) >= get_precedence(
                                current):  # Check operator precedence
                        self.one_op(op_stack, data_stack)
                    else:
                        break
                if current.token != ")":
                    op_stack.append(
                        OP_TOKEN(current.token,
                                 current.type,
                                 None,
                                 None,
                                 symbols=None))
                else:
                    assert_syntax(top.token == "(", F"Unbalanced parens.")
                    op_stack.pop()
                if current.token == ")":
                    is_unary_context = False
                else:
                    is_unary_context = True
            else:
                if current.type == "id":
                    # TODO Problem: We now need to know the SymbolType of a variable to retrieve it
                    # but we don't know it here. Maybe we can defer referencing it, until it is
                    # used? At that point, we would know array vs function. I think.
                    # I think this works:
                    symbol_type = self.get_type_from_name(
                        current, tokens, token_index)

                    if not symbols.is_symbol_defined(current.token,
                                                     symbol_type):
                        raise UndefinedSymbol(
                            F"Undefined variable: '{current.token}'")
                    symbol_value = symbols.get_symbol(current.token,
                                                      symbol_type)
                    symbol_type2 = symbols.get_symbol_type(
                        current.token, symbol_type)
                    # Changed the way that symbols tables work. Check that we are still consistent.
                    assert (symbol_type == symbol_type2)
                    if symbol_type == SymbolType.VARIABLE:
                        if current.token.endswith("$"):
                            data_stack.append(lexer_token(symbol_value, "str"))
                        else:
                            data_stack.append(lexer_token(symbol_value, "num"))
                    elif symbol_type == SymbolType.FUNCTION:
                        # Handle function as operators. Lower priority than "(", but higher than everything else.
                        # So don't append this to the data stack, append it to the op stack as a function.
                        arg = symbols.get_symbol_arg(current.token,
                                                     SymbolType.FUNCTION)
                        op_stack.append(
                            OP_TOKEN(current.token,
                                     SymbolType.FUNCTION,
                                     arg,
                                     symbol_value,
                                     symbols=symbols))
                    else:
                        # Array access
                        arg = current.token
                        op_stack.append(
                            OP_TOKEN(ARRAY_ACCESS,
                                     "array_access",
                                     arg,
                                     None,
                                     symbols=symbols))
                else:
                    data_stack.append(current)
                is_unary_context = False
            token_index += 1

        # Do anything left on the stack
        while len(op_stack):
            self.one_op(op_stack, data_stack)

        assert_syntax(len(op_stack) == 0, F"Expression not completed.")
        assert_syntax(len(data_stack) == 1, F"Data not consumed.")

        return data_stack[0].token
Beispiel #11
0
    def lex2(self, text):
        state = None
        token = ""
        back = None
        index = 0

        def cur():
            if text is None:
                assert(0)
            if index == len(text):
                return None
            return text[index]

        def peek():
            if index + 1 ==  len(text):
                return None
            return text[index+1]

        def consume():
            nonlocal index
            current = text[index]
            index += 1
            return current # So we can get and consume in one operation.
        ST_ANY = 1
        ST_REF = 2 # VAR, FUNC,
        ST_INT = 3
        ST_FLOAT = 4
        ST_STRING = 5
        state = ST_ANY
        while (c := cur()) is not None:
            print(state, c)
            assert state
            if state == ST_ANY:
                if c == ' ':
                    consume()
                elif c in LETTERS:
                    token = consume()
                    state = ST_REF
                elif c in NUMBERS:
                    token = consume()
                    state = ST_INT
                elif c == '.': # Number starts with ., like ".5"
                    token = "0"+consume()
                    state = ST_FLOAT
                elif c in OP_FIRST:
                    p = peek()
                    if c not in OP_TWO_FIRST:
                        consume()
                        yield lexer_token(c, "op")
                        state = ST_ANY
                        token = ""
                    elif c+p in OP_TWO:
                        consume()
                        consume()
                        yield lexer_token(c+p, "op")
                        state = ST_ANY
                        token = ""
                    else:
                        consume()
                        yield lexer_token(c, "op")
                        state = ST_ANY
                        token = ""
                elif c =='"':
                    consume()
                    state = ST_STRING
                    token = ""
                else:
                    raise BasicSyntaxError(F"Unexpected char {c} in state {state}")
            elif state == ST_REF:
                if c in NUMBERS: # Need to check for A1$
                    token += consume()
                    yield lexer_token(token, "id")
                    token = ""
                    state = ST_ANY
                elif c in LETTERS:
                    token += consume()
                else:
                    if token in BOOLEAN_OPERATORS:
                        yield lexer_token(token, "op")
                    else:
                        yield lexer_token(token, "id")
                    token = ""
                    state = ST_ANY
            elif state == ST_INT:
                if c in NUMBERS:
                    token += consume()
                elif c == '.':
                    token += consume()
                    state = ST_FLOAT
                else:
                    yield lexer_token(float(token), "num")
                    token = ""
                    state = ST_ANY
            elif state == ST_FLOAT:
                if c in NUMBERS:
                    token += consume()
                else:
                    yield lexer_token(float(token), "num")
                    token = ""
                    state = ST_ANY
            elif state == ST_STRING:
                if c == '"':
                    consume()
                    yield lexer_token(token, "str")
                    token = ""
                    state = ST_ANY
                else:
                    token += consume()
                    if len(token) > 65536:
                        raise BasicSyntaxError(F"String too long (> 65536).")
            elif c == ' ' or c == '\t':
                consume() # Ignore non quoted whitespace.
            else:
                raise BasicSyntaxError("Invalid character {c} in state {state}")
Beispiel #12
0
class Lexer:
    def __init__(self):
        pass

    def scan_for_keyword(self, array, text):
        """
        Find any strings matching an element of array in text.

        :param array:
        :param text:
        :return:
        """
        match = ""
        for i, c in enumerate(text):
            match += c
            potentials = [op for op in array if i < len(op) and op[i] == match[i]]
            #print(c, potentials)

            if not potentials:
                return None
            for p in potentials:
                if i + 1 == len(p):
                    return p
            array = potentials
        return None

    def lex(self, text):
        tokens = [token for token in self.lex2(text)]
        return tokens

    def lex2(self, text):
        state = None
        token = ""
        back = None
        index = 0

        def cur():
            if text is None:
                assert(0)
            if index == len(text):
                return None
            return text[index]

        def peek():
            if index + 1 ==  len(text):
                return None
            return text[index+1]

        def consume():
            nonlocal index
            current = text[index]
            index += 1
            return current # So we can get and consume in one operation.
        ST_ANY = 1
        ST_REF = 2 # VAR, FUNC,
        ST_INT = 3
        ST_FLOAT = 4
        ST_STRING = 5
        state = ST_ANY
        while (c := cur()) is not None:
            print(state, c)
            assert state
            if state == ST_ANY:
                if c == ' ':
                    consume()
                elif c in LETTERS:
                    token = consume()
                    state = ST_REF
                elif c in NUMBERS:
                    token = consume()
                    state = ST_INT
                elif c == '.': # Number starts with ., like ".5"
                    token = "0"+consume()
                    state = ST_FLOAT
                elif c in OP_FIRST:
                    p = peek()
                    if c not in OP_TWO_FIRST:
                        consume()
                        yield lexer_token(c, "op")
                        state = ST_ANY
                        token = ""
                    elif c+p in OP_TWO:
                        consume()
                        consume()
                        yield lexer_token(c+p, "op")
                        state = ST_ANY
                        token = ""
                    else:
                        consume()
                        yield lexer_token(c, "op")
                        state = ST_ANY
                        token = ""
                elif c =='"':
                    consume()
                    state = ST_STRING
                    token = ""
                else:
                    raise BasicSyntaxError(F"Unexpected char {c} in state {state}")
            elif state == ST_REF:
                if c in NUMBERS: # Need to check for A1$
                    token += consume()
                    yield lexer_token(token, "id")
                    token = ""
                    state = ST_ANY
                elif c in LETTERS:
                    token += consume()
                else:
                    if token in BOOLEAN_OPERATORS:
                        yield lexer_token(token, "op")
                    else:
                        yield lexer_token(token, "id")
                    token = ""
                    state = ST_ANY
            elif state == ST_INT:
                if c in NUMBERS:
                    token += consume()
                elif c == '.':
                    token += consume()
                    state = ST_FLOAT
                else:
                    yield lexer_token(float(token), "num")
                    token = ""
                    state = ST_ANY
            elif state == ST_FLOAT:
                if c in NUMBERS:
                    token += consume()
                else:
                    yield lexer_token(float(token), "num")
                    token = ""
                    state = ST_ANY
            elif state == ST_STRING:
                if c == '"':
                    consume()
                    yield lexer_token(token, "str")
                    token = ""
                    state = ST_ANY
                else:
                    token += consume()
                    if len(token) > 65536:
                        raise BasicSyntaxError(F"String too long (> 65536).")
            elif c == ' ' or c == '\t':
                consume() # Ignore non quoted whitespace.
            else:
                raise BasicSyntaxError("Invalid character {c} in state {state}")

        # check for tokens in progress
        if state == ST_REF:
            yield lexer_token(token, "id")
        elif state == ST_INT:
            yield lexer_token(int(token), "num")
        elif state == ST_FLOAT:
            yield lexer_token(float(token), "num")
        elif state == ST_STRING:
            raise BasicSyntaxError("END of line in string.")

        return