def tokenize_line(program_line: str) -> ProgramLine: """ Converts the line into a partially digested form. tokenizing basic is mildly annoying, as there may not be a delimiter between the cmd and the args. Example: FORI=1TO8:FORJ=1TO8:K3=0:Z(I,J)=0:R1=RND(1) The FOR runs right into the I. So we need to prefix search. :param program_line: :return: """ if len(program_line) == 0: return None number, partial = program_line.split(" ", 1) assert_syntax(str.isdigit(number), F"Line number is not in correct format: {number}") number = int(number) # Rem commands don't split on colons, other lines do. if partial.startswith(Keywords.REM.name): commands_text = [partial] else: commands_text = smart_split(partial) try: list_of_statements = tokenize_statements(commands_text) except BasicSyntaxError as bse: print(F"Syntax Error in line {number}: {bse.message}: {program_line}") print() raise bse s = ProgramLine(number, list_of_statements, -1, source=program_line) return s
def tokenize(program_lines: list[str]) -> list[ProgramLine]: tokenized_lines = [] last_line = None for line in program_lines: tokenized_line = tokenize_line(line) if tokenized_line is None: continue # Ignore blank lines. if last_line is not None: assert_syntax( tokenized_line.line > last_line, F"Line {tokenized_line.line} is <= the preceding line {line}") tokenized_lines.append(tokenized_line) last_line = tokenized_line.line # Set default execution of next line. finished_lines = [] if len(tokenized_lines): # Deal with zero length files. for i in range(len(tokenized_lines) - 1): finished_lines.append( ProgramLine(tokenized_lines[i].line, tokenized_lines[i].stmts, i + 1, source=tokenized_lines[i].source)) finished_lines.append( ProgramLine(tokenized_lines[-1].line, tokenized_lines[-1].stmts, None, source=tokenized_lines[-1].source)) return finished_lines
def __init__(self, keyword, args): super().__init__(keyword, "") then = args.find("THEN") assert_syntax(then != -1, "No THEN found for IF") then_clause = args[then + len("THEN"):] self._additional = then_clause.strip() lexer = get_lexer() left_over = args[:then] self._tokens = lexer.lex(left_over) super().__init__(keyword, "")
def do_next_peek(self, var): """ Checks to see if we are on the correct next, and get the for_record :param var: :return: """ assert_syntax(len(self._for_stack) > 0, "NEXT without FOR") for_record = self._for_stack[-1] assert_syntax(for_record.var == var, F"Wrong NEXT. Expected {for_record.var}, got {var}") return for_record
def do_for(self, var, start, stop, step, stmt): """ Begin a FOR loop. :param var: The index of the FOR loop :param start: The starting value :param stop: The upper limit. In BASIC, it is inclusive. :param step: The amount to increment the index after each loop. :param stmt: :return: """ # Note that var and start are evaluated before beginning, but stop and step # get re-evaluated at each loop assert_syntax(len(self._for_stack) < 1000, "FORs nested too deeply") self._for_stack.append(ForRecord(var, stop, step, stmt))
def __init__(self, keyword, args): super().__init__(keyword, "") eq = args.find("=") to = args.find("TO") step = args.find("STEP") assert_syntax(eq != -1, "No = found for FOR") assert_syntax(to != -1, "No TO found for FOR") self._index_clause = args[:eq].strip() # TODO convert to int here. self._start_clause = args[eq + 1:to].strip() end_to = step if step != -1 else None self._to_clause = args[to + 2:end_to].strip() if step == -1: self._step_clause = '1' else: self._step_clause = args[step + 4:].strip()
def put_symbol_element(self, symbol, value, subscripts): # TODO Maybe check is_valid_variable here? Have to allow user defined functions, and built-ins, though. if self._trace_file: print(F"\t\t{symbol}{subscripts}={value}, array element", file=self._trace_file) target = self.get_symbol(symbol, SymbolType.ARRAY) target_type = self.get_symbol_type(symbol, SymbolType.ARRAY) assert_syntax( target_type == SymbolType.ARRAY, F"Can't subscript non-array {symbol} of type {target_type}") v = target for subscript in subscripts[:-1]: v = v[subscript] subscript = subscripts[-1] v[subscript] = value
def get_program_lines(self, start=0, count=None) -> list[str]: """ Returns a range of source lines. Used to implement the LIST command :return: list[str] """ length = len(self._program) if count is None: count = length assert_syntax(0 <= start < length, "Line number out of range.") stop = start + count if stop >= length: stop = length lines = [line.source for line in self._program[start:stop]] return lines
def __init__(self, keyword, args): super().__init__(keyword, "") split_args = smart_split(args, split_char=";") if len(split_args) == 1: # No prompt self._prompt = "" input_vars = split_args[0] else: assert_syntax( len(split_args) == 2, "INPUT statment should only have one ;") self._prompt = split_args[0].strip() input_vars = split_args[1] input_vars = input_vars.split(",") input_vars = [v.strip() for v in input_vars] [is_valid_identifier(v) for v in input_vars] self._input_vars = input_vars
def __init__(self, keyword, args): super().__init__(keyword, "") try: variable, value = args.split("=", 1) except Exception as e: raise BasicSyntaxError(F"Error in expression. No '='.") variable = variable.strip() assert_syntax( len(variable) == 6 and variable.startswith("FN") and variable[3] == '(' and variable[5] == ')', "Function definition error") self._function_arg = variable[4] self._variable = variable[:3] lexer = get_lexer() self._tokens = lexer.lex(value) self._value = value.strip()
def __init__(self, keyword, args): super().__init__(keyword, "") delim = args.find("GOTO") self._op = "GOTO" if delim == -1: delim = args.find("GOSUB") self._op = "GOSUB" assert_syntax(delim != -1, F"No GOTO/GOSUB found for ON statement") self._expression = args[:delim].strip() lines = args[delim + len(self._op):].strip() lines = lines.split(",") lines2 = [] for line in lines: line = line.strip() assert_syntax(str.isdigit(line), F"Invalid line {line} for target of ON GOTO/GOSUB") line = int(line) lines2.append(line) # Why are these ints? self._target_lines = lines2
def stmt_on(executor, stmt): var = stmt._expression op = stmt._op result = eval_expression(executor._symbols, var) assert_syntax( type(result) == int or type(result) == float, "Expression not numeric in ON GOTO/GOSUB") result = int(result) - 1 # Basic is 1-based. # According to this: https://hwiegman.home.xs4all.nl/gw-man/ONGOSUB.html # on gosub does NOT generate an error in the value is out of range, # It just goes on to the next line. #assert_syntax(result < len(stmt._target_lines), "No target for value of {result} in ON GOTO/GOSUB") if result >= len(stmt._target_lines): # No line matching the index, just go on. return if op == "GOTO": executor.goto_line(stmt._target_lines[result]) elif op == "GOSUB": executor.gosub(stmt._target_lines[result]) else: assert_syntax(False, "Bad format for ON statement.")
def check_args(self, stack): super().check_args(stack) # Functions get their arguments in an array of parameters args = stack[-1].token if not isinstance(args, list): # Right now, function args are delivered in a list, only if there is more than one. TODO args = [args] assert_syntax(len(args) == self._arg_count, F"Wrong number of arguments {len(args)} for {self._name}") assert_syntax(isinstance(args[0], str), F"First operand of {self._name} must be a string.") if self._arg_count >= 2: is_number = isinstance(args[1], int) or isinstance(args[1], float) assert_syntax(is_number, F"Second operand of {self._name} must be a number.") if self._arg_count == 3: is_number = isinstance(args[2], int) or isinstance(args[2], float) assert_syntax(is_number, F"Third operand of {self._name} must be a number.")
def __init__(self, keyword, args): super().__init__(keyword, "") args = args.strip() if args.endswith(";"): self._no_cr = True else: self._no_cr = False self._outputs = [] args = smart_split(args, split_char=";") # TODO have a print_arg type, that tells stmt_print whether it is a quoted string or an expression # Of course, a quited string should be an expression, so maybe I don't need both branches. for i, arg in enumerate(args): arg = arg.strip() if len(arg) == 0: continue if arg[0] == '"': # quoted string assert_syntax(arg[0] == '"' and arg[-1] == '"', "String not properly quoted for 'PRINT'") self._outputs.append(arg) else: # Expression self._outputs.append( arg) # TODO Parse it here, evaluate in stmt_print return
def eval1(self, first, *, op): array_name = op.arg variable = op.symbols.get_symbol_value(array_name, SymbolType.ARRAY) variable_type = op.symbols.get_symbol_type(array_name, SymbolType.ARRAY) assert_syntax(variable_type == SymbolType.ARRAY, F"Array access to non-array variable '{variable}'") if type(first) == list: # Multidimensional array access args = [int(arg)-ARRAY_OFFSET for arg in first] # TODO check type and syntax error. No strings, no arrays v = variable for arg in args: assert_syntax(type(v) is list, F"Too many array dimensions for {array_name} subscript.") assert_syntax(arg < len(v), F"Array subscript out of bounds for {array_name}") v = v[arg] return v else: # TODO should only need the above. assert_syntax(int(first) == first, F"Non-integral array subscript {first}'") subscript = int(first) - ARRAY_OFFSET return variable[subscript] # TODO This will only work for one dimensional arrays, that don't have expressions as subscripts.
def __init__(self, keyword, args): super().__init__(keyword, "") self._dimensions = [] stmts = smart_split(args.strip(), enquote="(", dequote=")", split_char=",") for s in stmts: s = s.strip() # TODO a 'get_identifier' function name = s[0] assert_syntax(len(s) > 1, "Missing dimensions") if s[1] in NUMBERS: name += s[1] if s[len(name)] == "$": name += "$" dimensions = s[len(name):] assert_syntax(dimensions[0] == '(', "Missing (") assert_syntax(dimensions[-1] == ')', "Missing )") dimensions = dimensions[1:-1] # Remove parens dimensions = dimensions.split(",") dimensions = [int(dimension) for dimension in dimensions] self._dimensions.append((name, dimensions))
def check_args(self, stack): assert_syntax(len(stack) >= 1, "Not enough operands for binary operator")
def get_op_def(operator:str): assert_syntax(operator in OP_MAP, F"Invalid operator {operator}") return OP_MAP[operator].value
def check_args(self, stack): super().check_args(stack) assert_syntax(stack[-1].type == "num" or stack[-1].type == "str", "Operand not string or number.'") assert_syntax(stack[-2].type == "num" or stack[-2].type == "str", "Operand not string or number.") assert_syntax(stack[-1].type == stack[-2].type, "Operands don't match (string vs number) for '+'")
def check_args(self, stack): super().check_args(stack) assert_syntax(stack[-1].type == "num", "Operand not numeric for binary op") assert_syntax(stack[-2].type == "num", "Operand not numeric for binary op")
def do_next_pop(self, var): assert_syntax(len(self._for_stack) > 0, "NEXT without FOR") for_record = self._for_stack.pop() assert_syntax(for_record.var == var, F"Wrong NEXT. Expected {for_record.var}, got {var}")
def do_return(self): assert_syntax(len(self._gosub_stack), "RETURN without GOSUB") return_to = self._gosub_stack.pop() self._goto_location(return_to) return
def eval(self, tokens: list[lexer_token], *, symbols=None) -> lexer_token: """ Evalulates an expression, like "2+3*5-A+RND()" :param symbols: Symbols (BASIC variables) to use when evaluating the expression :param tokens: the incoming list[lexer_token] :return: A lexer token with the result and the type. """ from basic_operators import get_op, get_precedence # Import it in two places, so the IDE knows it's there. # "-" is ambiguous. It can mean subtraction or unary minus. # if "-" follows a data item, it's subtraction. # if "-" follows an operator, it's unary minus, unless the operator is ) # Why ")"? I need to be able to express this better. is_unary_context = True assert type(symbols) != dict if symbols is None: # Happens during testing. symbols = SymbolTable() # TODO Fix this. No "if test" allowed. if len(tokens) == 0: raise BasicSyntaxError(F"No expression.") data_stack = [] op_stack: OP_TOKEN = [] token_index = 0 while token_index < len(tokens): current = tokens[token_index] if current.type == "op": if current.token == "-" and is_unary_context: current = lexer_token(UNARY_MINUS, current.type) # Do anything on the stack that has higher precedence. while len(op_stack): top = op_stack[-1] # This makes everything left associative. I think that's ok. Might be wrong for exponentiation # This says visual basic was left associative for everything. # https://docs.microsoft.com/en-us/dotnet/visual-basic/language-reference/operators/operator-precedence # This shows left associative exponentiation: (they use **, not ^) # http://www.quitebasic.com/ if top.token != "(" and get_precedence( top) >= get_precedence( current): # Check operator precedence self.one_op(op_stack, data_stack) else: break if current.token != ")": op_stack.append( OP_TOKEN(current.token, current.type, None, None, symbols=None)) else: assert_syntax(top.token == "(", F"Unbalanced parens.") op_stack.pop() if current.token == ")": is_unary_context = False else: is_unary_context = True else: if current.type == "id": # TODO Problem: We now need to know the SymbolType of a variable to retrieve it # but we don't know it here. Maybe we can defer referencing it, until it is # used? At that point, we would know array vs function. I think. # I think this works: symbol_type = self.get_type_from_name( current, tokens, token_index) if not symbols.is_symbol_defined(current.token, symbol_type): raise UndefinedSymbol( F"Undefined variable: '{current.token}'") symbol_value = symbols.get_symbol(current.token, symbol_type) symbol_type2 = symbols.get_symbol_type( current.token, symbol_type) # Changed the way that symbols tables work. Check that we are still consistent. assert (symbol_type == symbol_type2) if symbol_type == SymbolType.VARIABLE: if current.token.endswith("$"): data_stack.append(lexer_token(symbol_value, "str")) else: data_stack.append(lexer_token(symbol_value, "num")) elif symbol_type == SymbolType.FUNCTION: # Handle function as operators. Lower priority than "(", but higher than everything else. # So don't append this to the data stack, append it to the op stack as a function. arg = symbols.get_symbol_arg(current.token, SymbolType.FUNCTION) op_stack.append( OP_TOKEN(current.token, SymbolType.FUNCTION, arg, symbol_value, symbols=symbols)) else: # Array access arg = current.token op_stack.append( OP_TOKEN(ARRAY_ACCESS, "array_access", arg, None, symbols=symbols)) else: data_stack.append(current) is_unary_context = False token_index += 1 # Do anything left on the stack while len(op_stack): self.one_op(op_stack, data_stack) assert_syntax(len(op_stack) == 0, F"Expression not completed.") assert_syntax(len(data_stack) == 1, F"Data not consumed.") return data_stack[0].token
def stmt_gosub(executor, stmt: ParsedStatementGo): destination = stmt.destination assert_syntax(str.isdigit(destination), F"Gosub target is not an int ") executor.gosub(int(destination)) return None
def __init__(self, keyword, args): super().__init__(keyword, "") self.keyword = keyword assert_syntax( len(args.strip()) == 0, "Command does not take any arguments.") self.args = ""
def __init__(self, keyword, args): super().__init__(keyword, "") self.destination = args.strip() assert_syntax(str.isdigit(self.destination), F"GOTO/GOSUB target is not an int ")