def validate(): json_to_parse = request.form['data'] tokenizer = Lexer(json_to_parse) tokens = [] while tokenizer.has_next(): tokens.append(tokenizer.next()) return ', '.join([token.token_value for token in tokens])
def parse(self): """This method takes the inputted equation in string format, tokenizes it and creates an AST""" lexer = Lexer(self.eqn_string) self.tokens = lexer.obilisk_lex() self.vars = deepcopy(lexer.vars) if self.vars: tokens = TreeBuilder(self.tokens, has_var=True) else: tokens = TreeBuilder(self.tokens) self.tree, self.exprs = tokens.build_tree()
def test_multiple_control_comes_out_as_2_token(self): actual = Lexer("""if else\n""").tokenize() expected = [ Token(type="Control", value="if"), Token(type="Control", value="else") ] self.assertListEqual(actual, expected)
def test_multiple_string_inputs_comes_out_as_2_token(self): actual = Lexer(""""abc" "xyz"\n""").tokenize() expected = [ Token(type="String", value="abc"), Token(type="String", value="xyz") ] self.assertListEqual(actual, expected)
def test_multiple_float_comes_out_as_2_token(self): actual = Lexer("""1.9 2.5\n""").tokenize() expected = [ Token(type="Float", value=1.9), Token(type="Float", value=2.5) ] self.assertListEqual(actual, expected)
def test_multiple_variable_comes_out_as_2_token(self): actual = Lexer("""testVariableA testVariableB\n""").tokenize() expected = [ Token(type="Variable", value="testVariableA"), Token(type="Variable", value="testVariableB") ] self.assertListEqual(actual, expected)
def test_multiple_operator_comes_out_as_2_token(self): actual = Lexer("""+ -\n""").tokenize() expected = [ Token(type="Operator", value="+"), Token(type="Operator", value="-") ] self.assertListEqual(actual, expected)
def test_arithmetic_expression(self): actual = Lexer("""5+7\n""").tokenize() expected = [ Token(type="Int", value=5), Token(type="Operator", value="+"), Token(type="Int", value=7) ] self.assertEqual(actual, expected)
def test_method_call(self): actual = Lexer("""print("hello world")\n""").tokenize() expected = [ Token(type="Variable", value="print"), Token(type="Operator", value="("), Token(type="String", value="hello world"), Token(type="Operator", value=")") ] self.assertEqual(actual, expected)
def test_arithmetic_assignment_expression(self): actual = Lexer("""testVar = 5 + 7\n""").tokenize() expected = [ Token(type="Variable", value="testVar"), Token(type="Operator", value="="), Token(type="Int", value=5), Token(type="Operator", value="+"), Token(type="Int", value=7) ] self.assertEqual(actual, expected)
def test_assorted_space_characters_between_tokens_are_removed(self): actual = Lexer(""" 1 3\t5 \n""").tokenize() expected = [ Token(type="Int", value=1), Token(type="Int", value=3), Token(type="Int", value=5) ] self.assertEqual(actual, expected)
class ExpressionLexer(Lexer): Lexer.compile_regexes([ (r'\d+', 'NUMBER'), (r'\+', 'PLUS'), (r'-', 'MINUS'), (r'/', 'DIVIDE'), (r'\*', 'TIMES'), (r'\(', 'LEFT_PAREN'), (r'\)', 'RIGHT_PAREN'), (r'\s+', 'SPACE'), ])
def test_all_possible_controls(self): actual = Lexer("""if elif else for while in or and\n""").tokenize() expected = [ Token(type="Control", value="if"), Token(type="Control", value="elif"), Token(type="Control", value="else"), Token(type="Control", value="for"), Token(type="Control", value="while"), Token(type="Control", value="in"), Token(type="Control", value="or"), Token(type="Control", value="and") ] self.assertListEqual(actual, expected)
def test_conditional_expression(self): # Todo: should this throw an error? actual = Lexer(""" if 5==5: v = 5\n""").tokenize() expected = [ Token(type="Control", value="if"), Token(type="Int", value=5), Token(type="Operator", value="=="), Token(type="Int", value=5), Token(type="Operator", value=":"), Token(type="Variable", value="v"), Token(type="Operator", value="="), Token(type="Int", value=5) ] self.assertEqual(actual, expected)
def test_single_variable_comes_out_as_1_token(self): actual = Lexer("""testVariable\n""").tokenize() expected = [Token(type="Variable", value="testVariable")] self.assertListEqual(actual, expected)
def test_no_infinite_floating_on(self): with self.assertRaises(Exception): Lexer("""1.9.1.3\n""").tokenize()
class Parser: def __init__(self, source): self.lexer = Lexer(source) self.reset() def reset(self): self.token = None self.lexer.reset() def advance(self): lexeme = self.token self.token = self.lexer.get_token() return lexeme def skip_newlines(self): while self.token.type == TokenType.NEWLINE: self.advance() def parse_anon_field(self): result = Field(None, None) if self.token.type == TokenType.IDENTIFIER: result.name = self.advance() if self.token.type in [ TokenType.TYPE_IDENTIFIER, TokenType.LEFT_CURLY_BRACKET ]: result.type = self.parse_type() return result def parse_type_struct(self): if self.token.type != TokenType.LEFT_CURLY_BRACKET: error.highlight(self.token.position) print("Error: expected a `{` while parsing struct type") exit() result = StructTypeNode([self.advance()], []) self.skip_newlines() if self.token.type != TokenType.RIGHT_CURLY_BRACKET: result.fields.append(self.parse_anon_field()) while self.token.type in [TokenType.COMMA, TokenType.NEWLINE]: self.advance() self.skip_newlines() if self.token.type == TokenType.RIGHT_CURLY_BRACKET: break result.fields.append(self.parse_anon_field()) if self.token.type != TokenType.RIGHT_CURLY_BRACKET: error.highlight(self.token.position) print("Error: expected a `}` while parsing struct type") exit() result.tokens.append(self.advance()) return result def parse_type_array(self): if self.token.type != TokenType.LEFT_SQUARE_BRACKET: error.highlight(self.token.position) print("Error: expected a `[` while parsing array type") exit() result = ArrayTypeNode([self.advance()], self.parse_type()) if self.token.type != TokenType.RIGHT_SQUARE_BRACKET: error.highlight(self.token.position) print("Error: expected a `]` while parsing array type") exit() result.tokens.append(self.advance()) return result def parse_type(self): if self.token.type == TokenType.TYPE_IDENTIFIER: return IdenTypeNode([self.advance()]) if self.token.type == TokenType.LEFT_SQUARE_BRACKET: return self.parse_type_array() if self.token.type == TokenType.LEFT_CURLY_BRACKET: return self.parse_type_struct() # factor ::= number | identifier | array def parse_factor(self): if self.token.type == TokenType.NUMBER: result = NumNode([self.advance()]) elif self.token.type == TokenType.IDENTIFIER: result = IdenNode([self.advance()]) elif self.token.type == TokenType.LEFT_SQUARE_BRACKET: result = self.parse_array() elif self.token.type == TokenType.LEFT_CURLY_BRACKET: result = self.parse_struct() elif self.token.type == TokenType.LEFT_ROUND_BRACKET: result = self.parse_stmts() elif self.token.type in [ TokenType.VAR, TokenType.CON, TokenType.IF, TokenType.WHILE ]: result = self.parse_stmt() else: error.highlight(self.token.position) print("Error: expected a number or identifier") exit() # call or index while self.token.type in [ TokenType.LEFT_SQUARE_BRACKET, TokenType.LEFT_CURLY_BRACKET ]: if self.token.type == TokenType.LEFT_SQUARE_BRACKET: result = IndexNode([self.advance()], [result]) result.nodes.append(self.parse_expr()) if self.token.type != TokenType.RIGHT_SQUARE_BRACKET: error.highlight(self.token.position) print("Error: expected `]` while parsing index") exit() result.tokens.append(self.advance()) elif self.token.type == TokenType.LEFT_CURLY_BRACKET: result = CallNode([], [result, self.parse_struct()]) return result # unary ::= ('+' | '-') unary # | factor def parse_oper0(self): if self.token.type == TokenType.PLUS: return PosNode([self.advance()], self.parse_oper0()) if self.token.type == TokenType.MINUS: return NegNode([self.advance()], self.parse_oper0()) return self.parse_factor() # oper1 ::= oper1 ('*' | '/' | '%' | '<<' | '>>') unary # unary def parse_oper1(self): result = self.parse_oper0() while self.token.type in [ TokenType.ASTERISK, TokenType.SLASH, TokenType.PERCENT, TokenType.SHIFT_LEFT, TokenType.SHIFT_RIGHT ]: if self.token.type == TokenType.ASTERISK: result = MulNode([self.advance()], [result, self.parse_oper0()]) elif self.token.type == TokenType.SLASH: result = DivNode([self.advance()], [result, self.parse_oper0()]) elif self.token.type == TokenType.PERCENT: result = ModNode([self.advance()], [result, self.parse_oper0()]) elif self.token.type == TokenType.SHIFT_LEFT: result = ShlNode([self.advance()], [result, self.parse_oper0()]) elif self.token.type == TokenType.SHIFT_RIGHT: result = ShrNode([self.advance()], [result, self.parse_oper0()]) else: error.highlight(self.token.position) print("Error: internal error") exit() return result # oper2 ::= oper2 ('+' | '-') oper1 # | oper1 def parse_oper2(self): result = self.parse_oper1() while self.token.type in [TokenType.PLUS, TokenType.MINUS]: if self.token.type == TokenType.PLUS: result = AddNode([self.advance()], [result, self.parse_oper1()]) elif self.token.type == TokenType.MINUS: result = SubNode([self.advance()], [result, self.parse_oper1()]) else: error.highlight(self.token.position) print("Error: internal error") exit() return result # oper3 ::= oper3 ('|' | '&' | '^') oper2 # | oper2 def parse_oper3(self): result = self.parse_oper2() while self.token.type in [ TokenType.BITWISE_OR, TokenType.BITWISE_AND, TokenType.BITWISE_XOR ]: if self.token.type == TokenType.BITWISE_OR: result = BitOrNode([self.advance()], [result, self.parse_oper2()]) elif self.token.type == TokenType.BITWISE_AND: result = BitAndNode([self.advance()], [result, self.parse_oper2()]) elif self.token.type == TokenType.BITWISE_XOR: result = BitXorNode([self.advance()], [result, self.parse_oper2()]) else: error.highlight(self.token.position) print("Error: internal error") exit() return result # oper4 ::= oper3 ('<' | '>' | '<=' | '>=') oper3 def parse_oper4(self): result = self.parse_oper3() if self.token.type == TokenType.LESS_THAN: return LtNode([self.advance()], [result, self.parse_oper3()]) if self.token.type == TokenType.GREATER_THAN: return GtNode([self.advance()], [result, self.parse_oper3()]) if self.token.type == TokenType.LESS_THAN_OR_EQUAL: return LteNode([self.advance()], [result, self.parse_oper3()]) if self.token.type == TokenType.GREATER_THAN_OR_EQUAL: return GteNode([self.advance()], [result, self.parse_oper3()]) return result # oper5 ::= oper4 ('==' | '!=') oper4 def parse_oper5(self): result = self.parse_oper4() if self.token.type == TokenType.EQUAL: return EqNode([self.advance()], [result, self.parse_oper4()]) if self.token.type == TokenType.NOT_EQUAL: return NeqNode([self.advance()], [result, self.parse_oper4()]) return result # expr ::= expr ('||' | '&&') oper5 # | oper5 def parse_expr(self): result = self.parse_oper5() while self.token.type in [TokenType.OR, TokenType.AND]: if self.token.type == TokenType.OR: return OrNode([self.advance()], [result, self.parse_oper5()]) elif self.token.type == TokenType.AND: return AndNode([self.advance()], [result, self.parse_oper5()]) return result # array ::= '[' nls expr ((',' | \n nls) expr) nls ']' def parse_array(self): if self.token.type != TokenType.LEFT_SQUARE_BRACKET: error.highlight(self.token.position) print("Error: expected a `[` while parsing an array") exit() result = ArrayNode([self.advance()], []) self.skip_newlines() if self.token.type != TokenType.RIGHT_SQUARE_BRACKET: result.nodes.append(self.parse_expr()) while self.token.type in [TokenType.COMMA, TokenType.NEWLINE]: self.advance() self.skip_newlines() if self.token.type == TokenType.RIGHT_SQUARE_BRACKET: break result.nodes.append(self.parse_expr()) if self.token.type != TokenType.RIGHT_SQUARE_BRACKET: error.highlight(self.token.position) print("Error: expected a `]` while parsing an array") exit() result.nodes.append(self.advance()) return result # struct ::= '{' nls expr ((',' | \n nls) expr) nls '}' def parse_struct(self): if self.token.type != TokenType.LEFT_CURLY_BRACKET: error.highlight(self.token.position) print("Error: expected a `{` while parsing a struct") exit() result = StructNode([self.advance()], []) self.skip_newlines() if self.token.type != TokenType.RIGHT_CURLY_BRACKET: result.nodes.append(self.parse_expr()) while self.token.type in [TokenType.COMMA, TokenType.NEWLINE]: self.advance() self.skip_newlines() if self.token.type == TokenType.RIGHT_CURLY_BRACKET: break result.nodes.append(self.parse_expr()) if self.token.type != TokenType.RIGHT_CURLY_BRACKET: error.highlight(self.token.position) print("Error: expected a `}` while parsing a struct") exit() result.tokens.append(self.advance()) return result def parse_var(self): if self.token.type != TokenType.VAR: error.highlight(self.token.position) print("Error: expected a `var` while parsing a var statemnt") exit() result = VarNode([self.advance()], []) if self.token.type != TokenType.IDENTIFIER: error.highlight(self.token.position) print("Error: expected an identifier while parsing a var statemnt") exit() result.tokens.append(self.advance()) if self.token.type != TokenType.SET: error.highlight(self.token.position) print("Error: expected a `=` while parsing a var statemnt") exit() result.tokens.append(self.advance()) result.nodes.append(self.parse_expr()) return result def parse_con(self): if self.token.type != TokenType.CON: error.highlight(self.token.position) print("Error: expected a `con` while parsing a con statemnt") exit() result = ConNode([self.advance()], []) if self.token.type != TokenType.IDENTIFIER: error.highlight(self.token.position) print("Error: expected an identifier while parsing a con statemnt") exit() result.tokens.append(self.advance()) if self.token.type != TokenType.SET: error.highlight(self.token.position) print("Error: expected a `=` while parsing a con statemnt") exit() result.tokens.append(self.advance()) result.nodes.append(self.parse_expr()) return result def parse_if(self): if self.token.type != TokenType.IF: error.highlight(self.token.position) print("Error: expected a `if` while parsing an if statement") exit() result = IfNode( [self.advance()], [self.parse_expr(), self.parse_stmts()]) if self.token.type != TokenType.ELSE: return result result.tokens.append(self.advance()) result.nodes.append(self.parse_stmts()) return result def parse_while(self): if self.token.type != TokenType.WHILE: error.highlight(self.token.position) print("Error: expected a `while` while parsing an while statement") exit() return WhileNode( [self.advance()], [self.parse_expr(), self.parse_stmts()]) def parse_decl_func(self): if self.token.type != TokenType.FUNC: error.highlight(self.token.position) print("Error: expected a `func` while parsing a func decleration") exit() result = DeclFuncNode([self.advance()], [None, None, None]) if self.token.type != TokenType.IDENTIFIER: error.highlight(self.token.position) print( "Error: expected an identifier while parsing a func decleration" ) exit() result.tokens.append(self.advance()) if self.token.type in [ TokenType.TYPE_IDENTIFIER, TokenType.LEFT_CURLY_BRACKET ]: result.nodes[0] = self.parse_type() if self.token.type == TokenType.TO: result.tokens.append(self.advance()) if not self.token.type in [ TokenType.TYPE_IDENTIFIER, TokenType.LEFT_CURLY_BRACKET ]: error.highlight(self.token.position) print( "Error: expected a type while parsing a func decleration") exit() result.nodes[1] = self.parse_type() result.nodes[2] = self.parse_stmts() return result def parse_set(self): result = self.parse_expr() if self.token.type == TokenType.SET: return SetNode([self.advance()], [result, self.parse_expr()]) if self.token.type == TokenType.SET_PLUS: lexeme = self.advance() return SetNode( [lexeme], [result, AddNode([lexeme], [result, self.parse_expr()])]) if self.token.type == TokenType.SET_MINUS: lexeme = self.advance() return SetNode( [lexeme], [result, SubNode([lexeme], [result, self.parse_expr()])]) if self.token.type == TokenType.SET_ASTERISK: lexeme = self.advance() return SetNode( [lexeme], [result, MulNode([lexeme], [result, self.parse_expr()])]) if self.token.type == TokenType.SET_SLASH: lexeme = self.advance() return SetNode( [lexeme], [result, DivNode([lexeme], [result, self.parse_expr()])]) if self.token.type == TokenType.SET_PERCENT: lexeme = self.advance() return SetNode( [lexeme], [result, ModNode([lexeme], [result, self.parse_expr()])]) if self.token.type == TokenType.SET_SHIFT_LEFT: lexeme = self.advance() return SetNode( [lexeme], [result, ShlNode([lexeme], [result, self.parse_expr()])]) if self.token.type == TokenType.SET_SHIFT_RIGHT: lexeme = self.advance() return SetNode( [lexeme], [result, ShrNode([lexeme], [result, self.parse_expr()])]) if self.token.type == TokenType.SET_BITWISE_OR: lexeme = self.advance() return SetNode( [lexeme], [result, BitOrNode([lexeme], [result, self.parse_expr()])]) if self.token.type == TokenType.SET_BITWISE_AND: lexeme = self.advance() return SetNode( [lexeme], [result, BitAndNode([lexeme], [result, self.parse_expr()])]) if self.token.type == TokenType.SET_BITWISE_XOR: lexeme = self.advance() return SetNode( [lexeme], [result, BitXorNode([lexeme], [result, self.parse_expr()])]) return result def parse_stmt(self): self.skip_newlines() if self.token.type == TokenType.VAR: return self.parse_var() if self.token.type == TokenType.CON: return self.parse_con() if self.token.type == TokenType.IF: return self.parse_if() if self.token.type == TokenType.WHILE: return self.parse_while() if self.token.type == TokenType.FUNC: return self.parse_decl_func() return self.parse_set() def parse_stmts(self): if self.token.type != TokenType.LEFT_ROUND_BRACKET: return self.parse_stmt() result = StmtsNode([self.advance()], []) self.skip_newlines() if self.token.type != TokenType.RIGHT_ROUND_BRACKET: result.nodes.append(self.parse_stmt()) while self.token.type == TokenType.NEWLINE: self.skip_newlines() if self.token.type == TokenType.RIGHT_ROUND_BRACKET: break result.nodes.append(self.parse_stmt()) if self.token.type != TokenType.RIGHT_ROUND_BRACKET: error.highlight(self.token.position) print( "Error: expected a `)` or a newline while parsing statements") exit() result.tokens.append(self.advance()) return result def parse_prgrm(self): result = PrgrmNode([], []) self.skip_newlines() if self.token.type != TokenType.EOF: result.nodes.append(self.parse_stmt()) while self.token.type == TokenType.NEWLINE: self.skip_newlines() if self.token.type == TokenType.EOF: break result.nodes.append(self.parse_stmt()) if self.token.type != TokenType.EOF: error.highlight(self.token.position) print("Error: expected EOF while parsing program") exit() return result def parse(self): self.advance() return self.parse_decl_func()
def test_single_float_comes_out_as_1_token(self): actual = Lexer("""1.9\n""").tokenize() expected = [Token(type="Float", value=1.9)] self.assertListEqual(actual, expected)
def test_multiple_int_comes_out_as_2_token(self): actual = Lexer("""1 2\n""").tokenize() expected = [Token(type="Int", value=1), Token(type="Int", value=2)] self.assertListEqual(actual, expected)
def test_newlines_between_tokens_are_removed(self): #??? actual = Lexer("""1 3\n""").tokenize() expected = [Token(type="Int", value=1), Token(type="Int", value=3)] self.assertEqual(actual, expected)
import argparse # https://docs.python.org/3/library/argparse.html import pdb import sys from parser.lexer import Lexer print('Parsing command line arguments') parser = argparse.ArgumentParser( description='Compile source starting at the input file into TBD output') parser.add_argument( 'source_file', help='the source file in the J&Z limited liability language') parsed_args = parser.parse_args() print(parsed_args) with open(parsed_args.source_file, 'r') as source_file: source_code = source_file.read() lexer = Lexer(source_code) tokens = lexer.tokenize() print(f"Look at all of our tokens: {tokens}")
def test_multiple_character_string_comes_out_as_1_token(self): actual = Lexer(""""abc"\n""").tokenize() expected = [Token(type="String", value="abc")] self.assertListEqual(actual, expected)
def test_single_control_comes_out_as_1_token(self): actual = Lexer("""if\n""").tokenize() expected = [Token(type="Control", value="if")] self.assertListEqual(actual, expected)
def test_space_within_string_comes_out_as_1_token(self): actual = Lexer(""""abc xyz"\n""").tokenize() expected = [Token(type="String", value="abc xyz")] self.assertListEqual(actual, expected)
def test_single_alphanumeric_variables(self): actual = Lexer("""testVariable1\n""").tokenize() expected = [Token(type="Variable", value="testVariable1")] self.assertListEqual(actual, expected)
def __init__(self, source): self.lexer = Lexer(source) self.reset()
from parser.lexer import Lexer from parser.combinator import TreeBuilder from parser.interpreter import Interpreter lexer = Lexer("69+((21/3)^8.2)/(3*cos(45))") list_of_tokens = lexer.obilisk_lex() if not lexer.vars: tokens = TreeBuilder(list_of_tokens) tree, _ = tokens.build_tree() inter = Interpreter(tree) print(inter.solve()) lexer = Lexer("sqrt(-3)+2") list_of_tokens = lexer.obilisk_lex() if not lexer.vars: tokens = TreeBuilder(list_of_tokens) tree, _ = tokens.build_tree() inter = Interpreter(tree) print(inter.solve()) lexer = Lexer("2-0.7j*cos(45))") list_of_tokens = lexer.obilisk_lex() if not lexer.vars: tokens = TreeBuilder(list_of_tokens) tree, _ = tokens.build_tree() inter = Interpreter(tree) print(inter.solve()) lexer = Lexer("69*(((x-1)/(x+2))^8-((x-1)/(x+2))^6)=x+3") list_of_tokens = lexer.obilisk_lex() if not lexer.vars:
def test_invalid_characters_create_errors(self): # actual = Lexer("""&$#\n""").tokenize() # expected = [Token(type="Operator", value = "+"), Token(type="Operator", value = "-")] # Todo: consider how we'd handle a non-alphanumeric non-keyword character with self.assertRaises(InvalidCharacterError): Lexer("""&$#\n""").tokenize()