def assign_variable(executor, variable, value): """ Variable assignment can include assigning array elements. :param variable: :param value: :return: """ variable = variable.replace(" ", "") # TODO Should move parsing of this to ParsedStatementLet. # TODO Need to handle N-dimensional array element assignment. i = variable.find("(") if i != -1: # Array reference j = variable.find(")", i + 1) if j == -1: raise BasicSyntaxError( F"Missing ) in in array assignment to {variable}") if i + 1 == j: raise BasicSyntaxError( F"Missing array subscript in assignment to {variable}") subscripts = variable[i + 1:j].split(",") variable = variable[:i] is_valid_identifier(variable) subscripts = [ int(eval_expression(executor._symbols, subscript)) - 1 for subscript in subscripts ] executor.put_symbol_element(variable, value, subscripts) else: is_valid_identifier(variable) executor.put_symbol(variable, value, symbol_type=SymbolType.VARIABLE, arg=None)
def __init__(self, keyword, args): super().__init__(keyword, "") try: variable, value = args.split("=", 1) except Exception as e: raise BasicSyntaxError(F"Error in expression. No '='.") lexer = get_lexer() self._tokens = lexer.lex(value) self._expression = Expression() self._variable = variable.strip()
def __init__(self, keyword, args): super().__init__(keyword, "") try: variable, value = args.split("=", 1) except Exception as e: raise BasicSyntaxError(F"Error in expression. No '='.") variable = variable.strip() assert_syntax( len(variable) == 6 and variable.startswith("FN") and variable[3] == '(' and variable[5] == ')', "Function definition error") self._function_arg = variable[4] self._variable = variable[:3] lexer = get_lexer() self._tokens = lexer.lex(value) self._value = value.strip()
def get_op(token): """ This gets the class that handles the operation. # TODO should change to "get_op_class" :param token: May be an OP_TOKEN, or a lexer_token # TODO Should subclass, maybe. :return: An instance of a class that handles that operation. """ functions = basic_functions.PredefinedFunctions() if token.type == SymbolType.FUNCTION:# and token.token.startswith("FN"): if token.token in functions.functions: return functions.functions[token.token] if token.token.startswith("FN"): op_def = get_op_def("∫") # Handles user defined functions. return op_def.cls raise BasicSyntaxError("Unknown Function '" + token.token + "'") operator = token.token op_def = get_op_def(operator) return op_def.cls
def eval2(self, first, second): if second == 0: raise BasicSyntaxError("Division by zero") if self._lambda: return self._lambda(first, second)
def lex2(self, text): state = None token = "" back = None index = 0 def cur(): if text is None: assert (0) if index == len(text): return None return text[index] def peek(): if index + 1 == len(text): return None return text[index + 1] def consume(): nonlocal index current = text[index] index += 1 return current # So we can get and consume in one operation. while (c := cur()) is not None: if state is None: if c in LETTERS: token = "" if peek() is not None and peek() in NUMBERS or peek( ) == '$': # Only consume if on identifier path. token += consume() if cur() in NUMBERS: token += consume() if cur() == '$': token += consume() yield lexer_token(token, "id") continue if peek() is None or peek() not in LETTERS: yield lexer_token(consume(), "id") continue # At this point, we know it's not a variable. found = self.scan_for_keyword(TEXT_OPERATORS, text[index:]) if not found: # Can't make an operator from it, so much be an ID. yield lexer_token(consume(), "id") continue for _ in found: consume() if found in BOOLEAN_OPERATORS: yield lexer_token(found, "op") else: yield lexer_token(found, "id") elif c in OPERATORS: first = consume() if first == "<" and cur() == ">": consume() yield lexer_token("<>", "op") elif first == "<" and cur() == "=": consume() yield lexer_token("<=", "op") elif first == ">" and cur() == "=": consume() yield lexer_token(">=", "op") else: yield lexer_token(first, "op") elif c in NUMBERS or c == '.': token = "" while (c := cur()) is not None and (c in NUMBERS or c == '.'): token += consume() yield lexer_token(float(token), "num") elif c == '"': consume() token = "" while (c := cur()) is not None and (c != '"'): token += consume() if cur() != '"': raise BasicSyntaxError(F"No closing quote char.") consume() yield lexer_token(token, "str")
elif c in NUMBERS or c == '.': token = "" while (c := cur()) is not None and (c in NUMBERS or c == '.'): token += consume() yield lexer_token(float(token), "num") elif c == '"': consume() token = "" while (c := cur()) is not None and (c != '"'): token += consume() if cur() != '"': raise BasicSyntaxError(F"No closing quote char.") consume() yield lexer_token(token, "str") elif c == ' ' or c == '\t': consume() # Ignore non quoted whitespace. else: raise BasicSyntaxError(F"Unexpected char '{c}'") return if __name__ == '__main__': p = Lexer() tokens = p.lex("XRND") # tokens = p.lex("IFX>YANDQ1<7") for t in tokens: print("Token: ", t) #print(p.consume_from(TEXT_OPERATORS, "AND ABC"))
def eval(self, tokens: list[lexer_token], *, symbols=None) -> lexer_token: """ Evalulates an expression, like "2+3*5-A+RND()" :param symbols: Symbols (BASIC variables) to use when evaluating the expression :param tokens: the incoming list[lexer_token] :return: A lexer token with the result and the type. """ from basic_operators import get_op, get_precedence # Import it in two places, so the IDE knows it's there. # "-" is ambiguous. It can mean subtraction or unary minus. # if "-" follows a data item, it's subtraction. # if "-" follows an operator, it's unary minus, unless the operator is ) # Why ")"? I need to be able to express this better. is_unary_context = True assert type(symbols) != dict if symbols is None: # Happens during testing. symbols = SymbolTable() # TODO Fix this. No "if test" allowed. if len(tokens) == 0: raise BasicSyntaxError(F"No expression.") data_stack = [] op_stack: OP_TOKEN = [] token_index = 0 while token_index < len(tokens): current = tokens[token_index] if current.type == "op": if current.token == "-" and is_unary_context: current = lexer_token(UNARY_MINUS, current.type) # Do anything on the stack that has higher precedence. while len(op_stack): top = op_stack[-1] # This makes everything left associative. I think that's ok. Might be wrong for exponentiation # This says visual basic was left associative for everything. # https://docs.microsoft.com/en-us/dotnet/visual-basic/language-reference/operators/operator-precedence # This shows left associative exponentiation: (they use **, not ^) # http://www.quitebasic.com/ if top.token != "(" and get_precedence( top) >= get_precedence( current): # Check operator precedence self.one_op(op_stack, data_stack) else: break if current.token != ")": op_stack.append( OP_TOKEN(current.token, current.type, None, None, symbols=None)) else: assert_syntax(top.token == "(", F"Unbalanced parens.") op_stack.pop() if current.token == ")": is_unary_context = False else: is_unary_context = True else: if current.type == "id": # TODO Problem: We now need to know the SymbolType of a variable to retrieve it # but we don't know it here. Maybe we can defer referencing it, until it is # used? At that point, we would know array vs function. I think. # I think this works: symbol_type = self.get_type_from_name( current, tokens, token_index) if not symbols.is_symbol_defined(current.token, symbol_type): raise UndefinedSymbol( F"Undefined variable: '{current.token}'") symbol_value = symbols.get_symbol(current.token, symbol_type) symbol_type2 = symbols.get_symbol_type( current.token, symbol_type) # Changed the way that symbols tables work. Check that we are still consistent. assert (symbol_type == symbol_type2) if symbol_type == SymbolType.VARIABLE: if current.token.endswith("$"): data_stack.append(lexer_token(symbol_value, "str")) else: data_stack.append(lexer_token(symbol_value, "num")) elif symbol_type == SymbolType.FUNCTION: # Handle function as operators. Lower priority than "(", but higher than everything else. # So don't append this to the data stack, append it to the op stack as a function. arg = symbols.get_symbol_arg(current.token, SymbolType.FUNCTION) op_stack.append( OP_TOKEN(current.token, SymbolType.FUNCTION, arg, symbol_value, symbols=symbols)) else: # Array access arg = current.token op_stack.append( OP_TOKEN(ARRAY_ACCESS, "array_access", arg, None, symbols=symbols)) else: data_stack.append(current) is_unary_context = False token_index += 1 # Do anything left on the stack while len(op_stack): self.one_op(op_stack, data_stack) assert_syntax(len(op_stack) == 0, F"Expression not completed.") assert_syntax(len(data_stack) == 1, F"Data not consumed.") return data_stack[0].token
def lex2(self, text): state = None token = "" back = None index = 0 def cur(): if text is None: assert(0) if index == len(text): return None return text[index] def peek(): if index + 1 == len(text): return None return text[index+1] def consume(): nonlocal index current = text[index] index += 1 return current # So we can get and consume in one operation. ST_ANY = 1 ST_REF = 2 # VAR, FUNC, ST_INT = 3 ST_FLOAT = 4 ST_STRING = 5 state = ST_ANY while (c := cur()) is not None: print(state, c) assert state if state == ST_ANY: if c == ' ': consume() elif c in LETTERS: token = consume() state = ST_REF elif c in NUMBERS: token = consume() state = ST_INT elif c == '.': # Number starts with ., like ".5" token = "0"+consume() state = ST_FLOAT elif c in OP_FIRST: p = peek() if c not in OP_TWO_FIRST: consume() yield lexer_token(c, "op") state = ST_ANY token = "" elif c+p in OP_TWO: consume() consume() yield lexer_token(c+p, "op") state = ST_ANY token = "" else: consume() yield lexer_token(c, "op") state = ST_ANY token = "" elif c =='"': consume() state = ST_STRING token = "" else: raise BasicSyntaxError(F"Unexpected char {c} in state {state}") elif state == ST_REF: if c in NUMBERS: # Need to check for A1$ token += consume() yield lexer_token(token, "id") token = "" state = ST_ANY elif c in LETTERS: token += consume() else: if token in BOOLEAN_OPERATORS: yield lexer_token(token, "op") else: yield lexer_token(token, "id") token = "" state = ST_ANY elif state == ST_INT: if c in NUMBERS: token += consume() elif c == '.': token += consume() state = ST_FLOAT else: yield lexer_token(float(token), "num") token = "" state = ST_ANY elif state == ST_FLOAT: if c in NUMBERS: token += consume() else: yield lexer_token(float(token), "num") token = "" state = ST_ANY elif state == ST_STRING: if c == '"': consume() yield lexer_token(token, "str") token = "" state = ST_ANY else: token += consume() if len(token) > 65536: raise BasicSyntaxError(F"String too long (> 65536).") elif c == ' ' or c == '\t': consume() # Ignore non quoted whitespace. else: raise BasicSyntaxError("Invalid character {c} in state {state}")
class Lexer: def __init__(self): pass def scan_for_keyword(self, array, text): """ Find any strings matching an element of array in text. :param array: :param text: :return: """ match = "" for i, c in enumerate(text): match += c potentials = [op for op in array if i < len(op) and op[i] == match[i]] #print(c, potentials) if not potentials: return None for p in potentials: if i + 1 == len(p): return p array = potentials return None def lex(self, text): tokens = [token for token in self.lex2(text)] return tokens def lex2(self, text): state = None token = "" back = None index = 0 def cur(): if text is None: assert(0) if index == len(text): return None return text[index] def peek(): if index + 1 == len(text): return None return text[index+1] def consume(): nonlocal index current = text[index] index += 1 return current # So we can get and consume in one operation. ST_ANY = 1 ST_REF = 2 # VAR, FUNC, ST_INT = 3 ST_FLOAT = 4 ST_STRING = 5 state = ST_ANY while (c := cur()) is not None: print(state, c) assert state if state == ST_ANY: if c == ' ': consume() elif c in LETTERS: token = consume() state = ST_REF elif c in NUMBERS: token = consume() state = ST_INT elif c == '.': # Number starts with ., like ".5" token = "0"+consume() state = ST_FLOAT elif c in OP_FIRST: p = peek() if c not in OP_TWO_FIRST: consume() yield lexer_token(c, "op") state = ST_ANY token = "" elif c+p in OP_TWO: consume() consume() yield lexer_token(c+p, "op") state = ST_ANY token = "" else: consume() yield lexer_token(c, "op") state = ST_ANY token = "" elif c =='"': consume() state = ST_STRING token = "" else: raise BasicSyntaxError(F"Unexpected char {c} in state {state}") elif state == ST_REF: if c in NUMBERS: # Need to check for A1$ token += consume() yield lexer_token(token, "id") token = "" state = ST_ANY elif c in LETTERS: token += consume() else: if token in BOOLEAN_OPERATORS: yield lexer_token(token, "op") else: yield lexer_token(token, "id") token = "" state = ST_ANY elif state == ST_INT: if c in NUMBERS: token += consume() elif c == '.': token += consume() state = ST_FLOAT else: yield lexer_token(float(token), "num") token = "" state = ST_ANY elif state == ST_FLOAT: if c in NUMBERS: token += consume() else: yield lexer_token(float(token), "num") token = "" state = ST_ANY elif state == ST_STRING: if c == '"': consume() yield lexer_token(token, "str") token = "" state = ST_ANY else: token += consume() if len(token) > 65536: raise BasicSyntaxError(F"String too long (> 65536).") elif c == ' ' or c == '\t': consume() # Ignore non quoted whitespace. else: raise BasicSyntaxError("Invalid character {c} in state {state}") # check for tokens in progress if state == ST_REF: yield lexer_token(token, "id") elif state == ST_INT: yield lexer_token(int(token), "num") elif state == ST_FLOAT: yield lexer_token(float(token), "num") elif state == ST_STRING: raise BasicSyntaxError("END of line in string.") return
def find_line(self, line_number): for index, possible in enumerate(self._program): if possible.line == line_number: return ControlLocation(index=index, offset=0) raise BasicSyntaxError(F"No line {line_number} found.")