def __init__(self, lp, lex_ws, ptrees, sen=None): self.lex = Lexer.parse(open(lp, 'r').read()) self.lex_ws = lex_ws self.ptrees = ptrees self.sen = sen self.amb_type = 'horizontal' for l in iter(self.ptrees.splitlines()): if re.match(VERTICAL_AMBIGUITY, l): self.amb_type = 'vertical' break amb1, amb2 = None, None if self.amb_type == 'vertical': amb1, amb2 = self.parse_vamb() else: amb1, amb2 = self.parse_hamb() # extract the ambiguous string from sentence amb1s = self.ambiguous_string(amb1) amb2s = self.ambiguous_string(amb2) assert amb1s == amb2s self.amb_str = amb1s # extract the ambiguous grammar rules from parse trees _cfg = self.ambiguous_cfg_subset(amb1, amb2) # first, minimise the lex based on the cfg self.sym_toks, self.toks = self.minimise_lex(_cfg) tp = tempfile.mktemp() Lexer.write(self.sym_toks, self.toks, self.lex_ws, tp) lex = Lexer.parse(open(tp, 'r').read()) # convert _cfg to a CFG instance. self.min_cfg = self.to_CFG(_cfg, lex)
def verify_ambiguity(self, mingp, minlp, minsen, duration=None): print "==> verify grammar %s with minimiser %s \n" % \ (mingp, self._sin.minp) self._sin.lex = Lexer.parse(open(self._sin.lp, 'r').read()) self._sin.cfg = CFG.parse(self._sin.lex, open(self._sin.gp, "r").read()) self._sin.parser = Accent.compile(self._sin.gp, self._sin.lp) minlex = Lexer.parse(open(minlp, 'r').read()) mincfg = CFG.parse(minlex, open(mingp, 'r').read()) seq = mincfg.get_rule('root').seqs[0] # check if the root rule of minimised cfg == root of original cfg if (len(seq) == 1) and (str(seq[0]) == self._sin.cfg.start_rulen): out = Accent.run(self._sin.parser, minsen) if Accent.was_ambiguous(out): print "** verified **" minbend = "%sm" % self._sin.backend if minbend in Backends.BACKENDS: bend = Backends.BACKENDS[minbend](self._sin, mincfg, minsen) else: bend = Backends.WGTBACKENDS[minbend](self._sin, mincfg, minsen) # we keep trying until we hit the subseq while not bend.found: bend.run(self._sin.t_depth, self._sin.wgt, duration) print "** verified **"
def run(self): tst = Lexer("") prs = Parser(tst) while True: try: compound = 0 tst.flush() text = "" descend = False; text = input('mpr> ') if '{' in text: descend = True; compound += 1 while compound > 0 or (text != "" and text[-1] != ';' and text[-1] != '}'): inpt = input('... ') if '{' in inpt: compound += 1 if '}' in inpt: compound -= 1 text += inpt except EOFError: break; tst.append(text) try: self.interpret(descend, prs.compound()) except ValueError as err: print(err) except SyntaxError as err: print(err) except TypeError as err: print(err) except KeyError as err: print("Variable {var} not defined!".format(var=err))
def parse(self): lexer = Lexer() self.token = lexer.nextToken() self.expr(lexer) print 'PRINT' if lexer.count < len(lexer.text)+1: print 'Syntax error!'
def make_include(sourceText): """ """ global token moduleFile = "" waveformFile = "" signalFile = "" sequenceFile = "" lexer.initialize(sourceText) getToken() while True: if token.type == EOF: break elif found("MODULE_FILE"): consume("MODULE_FILE") consume("=") if found(STRING): moduleFile = token.cargo consume(STRING) elif found("WAVEFORM_FILE"): consume("WAVEFORM_FILE") consume("=") if found(STRING): waveformFile = token.cargo consume(STRING) elif found("SIGNAL_FILE"): consume("SIGNAL_FILE") consume("=") if found(STRING): signalFile = token.cargo consume(STRING) elif found("SEQUENCE_FILE"): consume("SEQUENCE_FILE") consume("=") if found(STRING): sequenceFile = token.cargo consume(STRING) else: error("unrecognized keyword: " + dq(token.cargo)) if len(waveformFile) == 0: raise ParserError("missing WAVEFORM_FILE") if len(signalFile) == 0: raise ParserError("missing SIGNAL_FILE") if len(sequenceFile) == 0: raise ParserError("missing SEQUENCE_FILE") print( "MODULE_FILE " + moduleFile ) # PHM requires this to appear in the output print( "SIGNAL_FILE " + signalFile ) # PHM requires this to appear in the output print( "#include " + signalFile ) print( "#include " + waveformFile ) print( "#include " + sequenceFile )
def test_bug_with_quotation(self): lexer = Lexer('A = "word" B;') token = lexer.get_next_token() self.assertEquals(IDENTIFIER, token.type) self.assertEquals('A', token.value) token = lexer.get_next_token() self.assertEquals(EQUAL, token.type) self.assertEquals('EQUAL', token.value) token = lexer.get_next_token() self.assertEquals(QUOTATION_MARK, token.type) self.assertEquals('QUOTATION_MARK', token.value) token = lexer.get_next_token() self.assertEquals(IDENTIFIER, token.type) self.assertEquals('word', token.value) token = lexer.get_next_token() self.assertEquals(QUOTATION_MARK, token.type) self.assertEquals('QUOTATION_MARK', token.value) token = lexer.get_next_token() self.assertEquals(IDENTIFIER, token.type) self.assertEquals('B', token.value) token = lexer.get_next_token() self.assertEquals(SEMICOLON, token.type) self.assertEquals('SEMICOLON', token.value) token = lexer.get_next_token() self.assertEquals(EOF, token.type) self.assertEquals('EOF', token.value)
def parse_waveform(sourceText): """ """ global token lexer.initialize(sourceText) while True: getToken() if token.type == EOF: break waveform()
def main(sourceText): global f f = open(outputFilename, "w") writeln("Here are the tokens returned by the lexer:") Lexer.initialize(sourceText) while True: token = Lexer.get() writeln(token.show(True)) if token.type == EOF: break f.close()
def parse(sourceText): """ """ global token global paramList lexer.initialize(sourceText) waveformText = "" sequenceText = "" mainText = "" moduleFile = "" signalFile = "" getToken() while True: if token.type == EOF: break elif found("WAVEFORM"): waveformText += waveform() elif found("param"): param() elif found("MAIN"): mainText = main() elif found(IDENTIFIER): sequenceText += sequence() elif found("MODULE_FILE"): consume("MODULE_FILE") moduleFile = token.cargo.strip('"') consume(STRING) elif found("SIGNAL_FILE"): consume("SIGNAL_FILE") signalFile = token.cargo.strip('"') consume(STRING) else: error("unrecognized token " + token.show(align=False) ) break if not gotMain: error("must define at least one MAIN in the .seq file") retval ="" retval += "modulefile " + moduleFile + "\n" retval += "signalfile " + signalFile + "\n\n" retval += mainText retval += sequenceText retval += waveformText for p in paramList: retval += p + "\n" return retval
def main(): verbosity = False args = sys.argv if len(args) == 3 and sys.argv[1] in ["-v", "--verbose"]: verbosity = True args.pop(1) f = open(args[1], 'r') program = f.read() f.close() token_list = Lexer.lex_program(program) if verbosity: print("Token list:\n", [x["type"] for x in token_list]) Parser.parse_program(token_list, verbosity) cst_root = Tree.Tree({"type": "Program"}) cst_root.generate_cst(token_list) if verbosity: print("CST:") cst_root.print_tree(1) print() ast_root = Tree.Tree({"type": "Block"}) ast_root.generate_ast(cst_root.children[0]) if verbosity: print("AST:") ast_root.print_tree(1) print() symbol_table = Semantics.Scope(ast_root) if verbosity: symbol_table.print_table(0) code = CodeGen.ExecEnv(ast_root, symbol_table) print("\nCompilation successful!")
def getToken(): """ Gets the next token from the source text and assigns it to the global variable 'token'. """ global token token = lexer.get()
def parse_sequence(sourceText): """ """ global token global paramList lexer.initialize(sourceText) getToken() param() main() while True: getToken() if token.type == EOF: break param() sequence()
def __init__(self, namespace, module, outFile, force=False): self.namespace = namespace self.lexer = Lexer(module) self.outFile = outFile self.moduleFile = module.__file__ self._force = force self.actionTable = {} self._tokenChars = {} self._closedTokens = [self.lexer.default.name]
def test_alternatives(self): lexer = Lexer('A | B') token = lexer.get_next_token() self.assertEquals(IDENTIFIER, token.type) self.assertEquals('A', token.value) token = lexer.get_next_token() self.assertEquals(ALTERNATIVE, token.type) self.assertEquals('ALTERNATIVE', token.value) token = lexer.get_next_token() self.assertEquals(IDENTIFIER, token.type) self.assertEquals('B', token.value) token = lexer.get_next_token() self.assertEquals(EOF, token.type) self.assertEquals('EOF', token.value)
def get_params(sourceText): """ """ global token global paramNames lexer.initialize(sourceText) while True: getToken() if token.type == EOF: break if found("param"): consume("param") paramNames.append(token.cargo) consume(IDENTIFIER) consume("=") consume(NUMBER) return paramNames
def main(): if (len(sys.argv) > 1): if (sys.argv[1].split(".")[1].upper() != 'NEO') : print("Advertencia: se recomienda usar archivos .neo con este Lexer\n") try: stream = open(sys.argv[1]) data = stream.read() stream.close() except IOError: print ('ERROR al abrir el archivo \"%s\"' % sys.argv[1]) exit() neoLexer = Lexer() neoLexer.build() neoLexer.tokenize(data) else: print("Verifique que se ejecuto el programa correctamente:") print("./LexNeo <nombre del archivo>\n")
def compare(gp1, gp2, lp): lex = Lexer.parse(open(lp,"r").read()) cfg1 = CFG.parse(lex, open(gp1, "r").read()) cfg2 = CFG.parse(lex, open(gp2, "r").read()) _cfg1 = _cfg(cfg1) _cfg2 = _cfg(cfg2) if _cfg1 == _cfg2: return True return False
def write_stat(self, gp, lp, tag=''): """ write no of rules, alts, symbols Use the tag to mark the final line """ s = "-,-,-" if gp is not None: lex = Lexer.parse(open(lp, 'r').read()) cfg = CFG.parse(lex, open(gp, 'r').read()) rules, alts, syms = cfg.size() s = "%s,%s,%s" % (rules, alts, syms) with open(self.statslog, "a") as logp: logp.write("%s%s\n" % (tag, s))
def __init__(self): # Create a Lexer self.the_Lex = Lexer() # Calls getData() to get all of the Tokens from input self.statements = self.the_Lex.getData() # A stack that we will use to load all of the Tokens that make # up a single statement self.the_stack = [] #self.state_pos = 0 # Calls the statements function self.Statements()
def test_one_line(self): lexer = Lexer('PROGRAM = RULE, {RULE};') token = lexer.get_next_token() self.assertEquals(IDENTIFIER, token.type) self.assertEquals('PROGRAM', token.value) token = lexer.get_next_token() self.assertEquals(EQUAL, token.type) self.assertEquals('EQUAL', token.value) token = lexer.get_next_token() self.assertEquals(IDENTIFIER, token.type) self.assertEquals('RULE', token.value) token = lexer.get_next_token() self.assertEquals(COMMA, token.type) self.assertEquals('COMMA', token.value) token = lexer.get_next_token() self.assertEquals(BRACKET_CURLY_OPEN, token.type) self.assertEquals('BRACKET_CURLY_OPEN', token.value) token = lexer.get_next_token() self.assertEquals(IDENTIFIER, token.type) self.assertEquals('RULE', token.value) token = lexer.get_next_token() self.assertEquals(BRACKET_CURLY_CLOSE, token.type) self.assertEquals('BRACKET_CURLY_CLOSE', token.value) token = lexer.get_next_token() self.assertEquals(SEMICOLON, token.type) self.assertEquals('SEMICOLON', token.value) token = lexer.get_next_token() self.assertEquals(EOF, token.type) self.assertEquals('EOF', token.value)
def eval(self, s): if self.debug: print "Input: " + s lexemes = Lexer.lex(s) if self.debug: print "Lexemes: " + str(lexemes) ast = self.parser.parse(lexemes) if self.debug: print "AST:" self.printAST(ast, 0) result = self.interpreter.eval(ast) if self.debug: print "Result: ", print str(result)
def get_subroutines(sourceText): """ """ global token global subroutines lexer.initialize(sourceText) subroutines = [] while True: getToken() if token.type == EOF: break if found(IDENTIFIER): name = token.cargo consume(IDENTIFIER) if found("{"): subroutines.append(name) consume("{") while not found("}"): getToken() return subroutines
def parse_modules(sourceText): """ """ global token global drvOutput global adcOutput global hvlOutput global hvhOutput global sysOutput lexer.initialize(sourceText) # initialize the system output string sysOutput = "" sysOutput += "BACKPLANE_ID=0000000000000000\n" sysOutput += "BACKPLANE_REV=0\n" sysOutput += "BACKPLANE_TYPE=1\n" sysOutput += "BACKPLANE_VERSION=0.0.0\n" getToken() while True: if token.type == EOF: break elif found("SLOT"): slot() else: error("unrecognized token " + token.show(align=False) ) break retval = "" retval += drvOutput retval += adcOutput retval += hvlOutput retval += hvhOutput retval += dioOutput return retval
def main(): #initialize variables tokens = [] lexemes = [] #global init lets us change the global variable global toProcess global _filename #main loop will go until the user is finished while True: #reset global variables after loop reset() #call lexer #returned from Lexer.main() is deque containing lexers and filename of processed file toProcess, _filename = Lexer.main() #if there is contents in toProcess (sent from Lexer.main()), proceed to Syntax Analyser if toProcess: #set the output filehandle so that we can print to a file setFileHandle() #Syntax Analyser print('\nObject Code Generator running...') getNext() #get input rat15S() #call Syntax Analyser print('\n...Object Code Generator finished!\n') #report to user if error or no error in syntax analysis print('There were no errors!') if _error else print('An error was found!') #report to user where the contents of the file have been saved print('Your syntactic analysis of {} has been saved as {} in the working directory.'.format(_filename,_filename + '.SA')) #print object code tables instr_table.print_table() print('\n', file = out_fh_OC) symbol_table.list() print('Your object code of {} has been saved as {} in the working directory.'.format(_filename,_filename + '.OC')) #ask user if they would like to run another file _continue = input('\nWould you like to process another file? (yes/no): ') if _continue == 'no' or _continue == 'quit': print('Goodbye!') sys.exit()
def to_accent(self, sen, lp): """ sen contains symbolic tokens, convert to 'actual' tokens using the lex """ lex = Lexer.parse(open(lp, "r").read()) _sen = [] for tok in sen.split(): if tok in lex.keys(): _sen.append(lex[tok]) else: # single char quoted tokens _sen.append(tok.replace("'", "")) if not self._sin.lex_ws: return " ".join(_sen) return "".join(_sen)
def valid(gf, lf, max_alts_allowed=None, empty_alts_ratio=None): """ Generated grammar is valid if it: a) has no empty rule b) number of alternatives/rule < max_alts_allowed c) %age of empty alternatives < empty_alts_ratio d) has no unreachable rules e) doesn't contain a subset which taken no input f) is not trivially ambiguous """ lex = Lexer.parse(open(lf, "r").read()) cfg = CFG.parse(lex, open(gf, "r").read()) # check for empty rules if empty_rule(cfg): return False # check if any of the rule has > N alts if max_alts_allowed is not None: if has_too_many_alts(cfg, max_alts_allowed): return False # check if we have too many empty alts if empty_alts_ratio is not None: if has_too_many_empty_alts(cfg, empty_alts_ratio): return False # Check if all the rules are reachable from the start rule. if (len(unreachable(cfg)) > 0): print "unreachable: " , unreachable(cfg) sys.stdout.write("r") sys.stdout.flush() return False # Check if the grammar is unproductive if unproductive(cfg,lex): sys.stdout.write("u") sys.stdout.flush() return False # Check the grammar for trivial ambiguities if ambiguous(cfg): sys.stdout.write("a") sys.stdout.flush() return False return True
def run(self): currgp = self.mingp currlp = self.minlp currparse = self._sin.ambi_parse n = 1 found = True while found: found = False lex = Lexer.parse(open(currlp, "r").read()) cfg = CFG.parse(lex, open(currgp, "r").read()) # work on rules with no of alts > 1 keys = [r.name for r in cfg.rules if len(r.seqs) > 1] for key in keys: seqs = cfg.get_rule(key).seqs for i in range(len(seqs)): _cfg = self.cfg_minus_alt(cfg, key, i) if self.valid_cfg(_cfg): # we could minimise lex first before pruning _cfg_p = self.prune_cfg(_cfg, lex) _gf, _lf = "%s.acc" % n, "%s.lex" % n _gp = os.path.join(self._sin.td, "pruned.%s" % _gf) CFG.write(_cfg_p, _gp) n += 1 amb, _, ptrees = self._sin.find_ambiguity(_gp, currlp, self._sin.backend, self._sin.mint) if amb: ambi_parse = AmbiParse.parse(currlp, self._sin.lex_ws, ptrees) __gp = os.path.join(self._sin.td, "min.%s" % _gf) __lp = os.path.join(self._sin.td, "min.%s" % _lf) self.write_cfg_lex(ambi_parse, __gp, __lp) self.write_stat(__gp, __lp) found = True currparse = ambi_parse currgp = __gp currlp = __lp break if found: break return currgp, currlp, currparse.amb_str
def run(self): currgp = self.mingp currlp = self.minlp currparse = self._sin.ambi_parse n = 1 found = True while found: found = False lex = Lexer.parse(open(currlp, 'r').read()) cfg = CFG.parse(lex, open(currgp, 'r').read()) combs = self.rule_alts_combs(cfg) random.shuffle(combs) while combs: key, i = combs.pop() _cfg = self.cfg_minus_alt(cfg, key, i) if self.valid_cfg(_cfg): # we could minimise lex first before pruning _cfg_p = self.prune_cfg(_cfg, lex) _gf, _lf = "%s.acc" % n, "%s.lex" % n _gp = os.path.join(self._sin.td, "pruned.%s" % _gf) CFG.write(_cfg_p, _gp) n += 1 amb, _, ptrees = self._sin.find_ambiguity(_gp, currlp, self._sin.backend, self._sin.mint) if amb: ambi_parse = AmbiParse.parse(currlp, self._sin.lex_ws, ptrees) __gp = os.path.join(self._sin.td, "min.%s" % _gf) __lp = os.path.join(self._sin.td, "min.%s" % _lf) self.write_cfg_lex(ambi_parse, __gp, __lp) self.write_stat(__gp, __lp) found = True currparse = ambi_parse currgp = __gp currlp = __lp break return currgp, currlp, currparse.amb_str
def mutate_cfg(gp, lp, type): lex = Lexer.parse(open(lp, "r").read()) cfg = CFG.parse(lex, open(gp, "r").read()) sym_toks = Utils.sym_tokens(gp) _cfg = cfg.clone() if type == 'empty': empty(_cfg) elif type == 'add': tok = Utils.randomTok(cfg, lex, sym_toks) add(_cfg, tok) elif type == 'mutate': tok = Utils.randomTok(cfg, lex, sym_toks) mutate(_cfg, tok) elif type == 'delete': delete(_cfg) elif type == 'switch': switch(_cfg) else: assert "mutation type '%s' is not supported" % type return _cfg
import sys from Lexer import * from TokenTypes import * if __name__ == "__main__": input = sys.argv[1] lexer = Lexer(input) print("Tokenizing ", end="") print(input) while True: t = lexer.lex() if t.get_token().value == TokenTypes.EOF.value: break
def for_expr(self): res = ParseResult() if not self.current_tok.matches(Lexer.TT_KEYWORD, 'Untuk_kita'): return res.failure( Lexer.InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, f"Mengharapkan seperti (senja kala itu) 'Untuk_kita'")) res.register_advancement() self.advance() if self.current_tok.type != Lexer.TT_IDENTIFIER: return res.failure( Lexer.InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, f"Mengharapkan seperti (senja kala itu) identifier")) var_name = self.current_tok res.register_advancement() self.advance() if self.current_tok.type != Lexer.TT_EQ: return res.failure( Lexer.InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, f"Mengharapkan seperti (senja kala itu) '='")) res.register_advancement() self.advance() start_value = res.register(self.expr()) if res.error: return res if not self.current_tok.matches(Lexer.TT_KEYWORD, 'Ke'): return res.failure( Lexer.InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, f"Mengharapkan seperti (senja kala itu) 'Ke'")) res.register_advancement() self.advance() end_value = res.register(self.expr()) if res.error: return res if self.current_tok.matches(Lexer.TT_KEYWORD, 'Melangkah'): res.register_advancement() self.advance() step_value = res.register(self.expr()) if res.error: return res else: step_value = None if not self.current_tok.matches(Lexer.TT_KEYWORD, 'Maka_cakrawala'): return res.failure( Lexer.InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, f"Mengharapkan seperti (senja kala itu) 'Maka_cakrawala'")) res.register_advancement() self.advance() body = res.register(self.expr()) if res.error: return res return res.success( Lexer.ForNode(var_name, start_value, end_value, step_value, body))
[email protected]@[email protected]@ 0 1 Where the first line is the RE and the next ones are the alphabet ''' regular_expressions = automata.readFile("RE.txt") with open('quintuple_NFA.json', 'w') as file: file.write("") with open('quintuple_DFA.json', 'w') as file: file.write("") for regular_expression in regular_expressions: #print(regular_expression) identifier = regular_expressions[regular_expression][0] alphabet = regular_expressions[regular_expression][1] REPostfix = automata.convertREToPostfix(regular_expression, alphabet) #print(REPostfix) automata.convertREToNFA(identifier, REPostfix, alphabet) ''' nfa_example = automata_IO.nfa_json_importer('quintuple_NFA.json') automata_IO.nfa_to_dot(nfa_example, 'graphic_NFA', './') ''' automata.createTransitionMatrix() automata.NFA_to_DFA() ''' dfa_example = automata_IO.dfa_json_importer('quintuple_DFA.json') automata_IO.dfa_to_dot(dfa_example, 'graphic_DFA', './') ''' lexer = Lexer.Lexer() DFAs = lexer.readFileDFAs("quintuple_DFA.json") tokens = lexer.readFileTokens("code.txt") lexer.evalauteTokens(DFAs, tokens)
def __init__(self, asm_file): self.lex = Lexer.Lex(asm_file) self.cmd_info()
class Parser: def __init__(self, file_path): self.ERROR = 0 self.RIGHT = 1 self.path = file_path self.lexer = Lexer(file_path) self.token = Token(Token_Type.ERRTOKEN, "", 0.0, None) self.state = self.RIGHT self.count = 0 self.iters = 0 self.origin_x = 0.0 self.origin_y = 0.0 self.rot_ang = 0.0 self.scale_x = 1.0 self.scale_y = 1.0 self.tree = Tree() self.root = Node() def typecheck(self, _type): if (self.token.type != _type): self.state = self.ERROR def add_node(self, node_name, parents=None, _data=None): node = Node(tag=node_name, data=_data) self.tree.add_node(node, parent=parents) return node def getValue(self): fig = plt.figure() pic = plt.subplot() with open(self.path, 'r') as f: lines = f.readline() while (lines): lines = lines.lower() if (lines.find('pi') != -1): lines = lines.replace('pi', '3.1415926') if (lines.find('origin') != -1): start = lines.find('(') end = lines.find(',') endd = lines.find(')') self.origin_x = eval(lines[start + 1:end]) self.origin_y = eval(lines[end + 1:endd]) elif (lines.find('rot') != -1): start = lines.find('is') self.rot_ang = eval(lines[start + 2:-2]) elif (lines.find('scale') != -1): start = lines.find('(') end = lines.find(',') endd = lines.find(')') self.scale_x = eval(lines[start + 1:end]) self.scale_y = eval(lines[end + 1:endd]) elif (lines.find('for') != -1): first = lines.find('from') second = lines.find('to') third = lines.find('step') fourth = lines.find('draw') start = eval(lines[first + 4:second]) end = eval(lines[second + 2:third]) steps = eval(lines[third + 4:fourth]) ax = [] ay = [] l_c = lines.find('(') comma = lines.find(',') r_c = lines.rfind(')') # for iters in range (start, end, steps) : # t = iters # ax.append ( eval (lines[l_c + 1 : comma]) ) # ay.append ( eval (lines[comma + 1 : r_c]) ) iters = start while (iters < end): t = iters ax.append(eval(lines[l_c + 1:comma])) ay.append(eval(lines[comma + 1:r_c])) iters += steps ax = np.array(ax) ay = np.array(ay) ax = ax * self.scale_x ay = ay * self.scale_y temp = ax * np.cos(self.rot_ang) + ay * np.sin( self.rot_ang) ay = ay * np.cos(self.rot_ang) - ax * np.sin(self.rot_ang) ax = temp ax += self.origin_x ay += self.origin_y color = ['blue', 'green', 'yellow', 'red'] ax = ax.tolist() ay = ay.tolist() self.count = self.count % 4 pic.scatter(ax, ay, s=2, c=color[self.count]) # plt.show () self.count += 1 self.origin_x = 0 self.origin_y = 0 self.scale_x = 1 self.scale_y = 1 self.rot_ang = 0 lines = f.readline() print(self.origin_x, self.origin_y) print(self.scale_x, self.scale_y) print(self.rot_ang) plt.show() def program(self): ''' Program → Statement SEMICO Program |ε P -> S ; P ''' # node = Node (tag= 'a') self.root = Node(tag='Program') self.token = self.lexer.getToken() self.tree.add_node(self.root) node = self.root while (self.token.type != Token_Type.NONTOKEN): node1 = Node(tag='Statement') node2 = Node(tag=';') node3 = Node(tag='Program') self.tree.add_node(node1, node) self.tree.add_node(node2, node) self.tree.add_node(node3, node) self.statement(node1) # self.token = self.lexer.getToken () # print (self.token.type) self.typecheck(Token_Type.SEMICO) self.token = self.lexer.getToken() node = node3 if (self.state == self.ERROR): raise SyntaxError('SyntaxError !') self.add_node('Empty', node) print('---------------------Object Tree----------------------') self.tree.show() self.getValue() def statement(self, node): ''' Statement → OriginStatment | ScaleStatment | RotStatment | ForStatment ''' print('--Enter Statement--') if (self.token.type == Token_Type.ORIGIN): node_temp = self.add_node('OriginStatment', node) self.originstatement(node_temp) elif (self.token.type == Token_Type.SCALE): node_temp = self.add_node('ScaleStatment', node) self.scalestatement(node_temp) elif (self.token.type == Token_Type.ROT): node_temp = self.add_node('RotStatment', node) self.rotstatement(node_temp) elif (self.token.type == Token_Type.FOR): node_temp = self.add_node('forstatement', node) self.forstatement(node_temp) else: self.state = self.ERROR print(self.state) print('--End statement--') def originstatement(self, node): ''' OriginStatment → ORIGIN is L_BRACKET Expression COMMA Expression R_BRACKET ''' print('--Enter originstatement--') temp_node = Node(tag=' ') if (self.token.type == Token_Type.ORIGIN): temp_node = self.add_node('ORIGIN', node) self.token = self.lexer.getToken() if (self.token.type == Token_Type.IS): self.token = self.lexer.getToken() temp_node = self.add_node('IS', node) if (self.token.type == Token_Type.L_BRACKET): temp_node = self.add_node('L_BRACKET', node) self.token = self.lexer.getToken() temp_node = self.add_node('Expression', node) self.expression(temp_node) # self.token = self.lexer.getToken () self.typecheck(Token_Type.COMMA) temp_node = self.add_node('COMMA', node) self.token = self.lexer.getToken() temp_node = self.add_node('Expression', node) self.expression(temp_node) # self.token = self.lexer.getToken () if (self.token.type != Token_Type.R_BRACKET): self.state = self.ERROR temp_node = self.add_node('R_BRACKET', node) self.token = self.lexer.getToken() else: self.state = self.ERROR else: self.state = self.ERROR print(self.state) print('--End originstatement--') def scalestatement(self, node): ''' ScaleStatment → SCALE IS L_BRACKET Expression COMMA Expression R_BRACKET ''' print('--Enter scalestatement--') temp_node = self.add_node('SCALE', node) temp_node = self.add_node('IS', node) temp_node = self.add_node('L_BRACKET', node) self.token = self.lexer.getToken() if (self.token.type != Token_Type.IS): self.state = self.ERROR self.token = self.lexer.getToken() if (self.token.type != Token_Type.L_BRACKET): self.state = self.ERROR self.token = self.lexer.getToken() temp_node = self.add_node('Expression', node) self.expression(temp_node) temp_node = self.add_node('COMMA', node) temp_node = self.add_node('Expression', node) # self.token = self.lexer.getToken () self.typecheck(Token_Type.COMMA) self.token = self.lexer.getToken() self.expression(temp_node) # self.token = self.lexer.getToken () if (self.token.type != Token_Type.R_BRACKET): self.state = self.ERROR temp_node = self.add_node('R_BRACKET', node) self.token = self.lexer.getToken() print(self.state) print('--End scalestatement--') def rotstatement(self, node): ''' RotStatment → ROT IS Expression ''' print('--Enter rotstatement --') temp_node = self.add_node('ROT', node) temp_node = self.add_node('IS', node) temp_node = self.add_node('Expression', node) self.token = self.lexer.getToken() if (self.token.type != Token_Type.IS): self.state = self.ERROR self.token = self.lexer.getToken() # print (self.token.type) self.expression(temp_node) print(self.state) print('--End rotstatement--') def forstatement(self, node): ''' ForStatment → FOR T FROM Expression TO Expression STEP Expression DRAW L_BRACKET Expression COMMA Expression R_BRACKET ''' print('--Enter forstatement--') temp_node = self.add_node('FOR', node) temp_node = self.add_node('T', node) temp_node = self.add_node('FROM', node) temp_node = self.add_node('Expression', node) self.token = self.lexer.getToken() if (self.token.type != Token_Type.T): self.state = self.ERROR self.token = self.lexer.getToken() if (self.token.type != Token_Type.FROM): self.state = self.ERROR self.token = self.lexer.getToken() self.expression(temp_node) temp_node = self.add_node('TO', node) temp_node = self.add_node('Expression', node) # self.token = self.lexer.getToken () self.typecheck(Token_Type.TO) self.token = self.lexer.getToken() self.expression(temp_node) temp_node = self.add_node('STEP', node) temp_node = self.add_node('Expression', node) # self.token = self.lexer.getToken () self.typecheck(Token_Type.STEP) self.token = self.lexer.getToken() self.expression(temp_node) temp_node = self.add_node('DRAW', node) temp_node = self.add_node('L_BRACKET', node) temp_node = self.add_node('Expression', node) # self.token = self.lexer.getToken () self.typecheck(Token_Type.DRAW) self.token = self.lexer.getToken() self.typecheck(Token_Type.L_BRACKET) self.token = self.lexer.getToken() self.expression(temp_node) temp_node = self.add_node('COMMA', node) temp_node = self.add_node('Expression', node) # self.token = self.lexer.getToken () self.typecheck(Token_Type.COMMA) self.token = self.lexer.getToken() self.expression(temp_node) temp_node = self.add_node('R_BRACKET', node) # self.token = self.lexer.getToken () self.typecheck(Token_Type.R_BRACKET) self.token = self.lexer.getToken() print(self.state) print('--End forstatement--') def expression(self, node): ''' Expression → Term {(PLUS|MINUS)Term } E -> T {(PLUS | MINUS) T} ''' print('--Enter expression--') temp_node = self.add_node('Term', node) self.term(temp_node) while (self.token.type == Token_Type.PLUS or self.token.type == Token_Type.MINUS): if (token.type == Token_Type.PLUS): temp_node = self.add_node('PLUS', node, _data='+') else: temp_node = self.add_node('MINUS', node, _data='-') temp_node = self.add_node('Term', node) self.token = self.lexer.getToken() self.term(temp_node) # self.token = self.lexer.getToken () print(self.state) print('--End expression--') def term(self, node): ''' Term → Factor { ( MUL | DIV ) Factor } ''' print('--Enter term--') temp_node = self.add_node('Factor', node) self.factor(temp_node) while (self.token.type == Token_Type.MUL or self.token.type == Token_Type.DIV): if (self.token.type == Token_Type.MUL): temp_node = self.add_node('*', node) else: temp_node = self.add_node('/', node) temp_node = self.add_node('Factor', node) self.token = self.lexer.getToken() self.factor(temp_node) # self.token = self.lexer.getToken () print(self.state) print('--End term--') def factor(self, node): ''' Factor → PLUS Factor | MINUS Factor | Component ''' print('--Enter factor--') if (self.token.type == Token_Type.PLUS or self.token.type == Token_Type.MINUS): if (self.token.type == Token_Type.PLUS): temp_node = self.add_node('+', node) else: temp_node = self.add_node('-', node) temp_node = self.add_node('Factor', node) self.token = self.lexer.getToken() self.factor(temp_node) else: # print (self.token.type, self.token.value) temp_node = self.add_node('Component', node) self.component(temp_node) print(self.state) print('--End Factor--') def component(self, node): ''' Component → Atom [POWER Component] ''' print('--Enter component--') temp_node = self.add_node('Atom', node) self.atom(temp_node) self.token = self.lexer.getToken() if (self.token.type == Token_Type.POWER): # self.token = self.lexer.getToken () self.token = self.lexer.getToken() temp_node = self.add_node('POWER', node) temp_node = self.add_node('Component', node) self.component(temp_node) print(self.state) # print (self.token.type) print('--End component--') def atom(self, node): ''' Atom → CONST_ID | T | FUNC L_BRACKET Expression R_BRACKET | L_BRACKET Expression R_BRACKET ''' print('--Enter atom--') if (self.token.type == Token_Type.CONST_ID): value = self.token.value print(value) temp_node = self.add_node('CONST_ID', node, _data=value) elif (self.token.type == Token_Type.T): temp_node = self.add_node('T', node) elif (self.token.type == Token_Type.FUNC): temp_node = self.add_node('FUNC', node) temp_node = self.add_node('L_BRACKET', node) temp_node = self.add_node('Expression', node) self.token = self.lexer.getToken() self.typecheck(Token_Type.L_BRACKET) self.token = self.lexer.getToken() self.expression(temp_node) self.typecheck(Token_Type.R_BRACKET) temp_node = self.add_node('R_BRACKET', node) elif (self.token.type == Token_Type.L_BRACKET): temp_node = self.add_node('L_BRACKET', node) temp_node = self.add_node('Expression', node) self.token = self.lexer.getToken() self.typecheck(Token_Type.R_BRACKET) temp_node = self.add_node('R_BRACKET', node) else: self.state = self.ERROR print(self.state) print('--End Atom--') def start(self): print('Begin !') self.program() print('End !')
import Lexer from Parser import Parser from Token import Token from IllegalCharError import IllegalCharError while True: text = input('Compiler> ') result,error = Lexer.run('<file> ',text) if error: print(error.as_string()) else: print(result)
def test_lex_tokens(): l = Lexer() l.lex("./Tok_test.txt") for i in range(len(l.Tok_list)): assert (l.Tok_list[i][0] == exp_tok[i])
types = {"INT": r"[0-9]+", "string": r"[a-zA-Z]+", "VARCHAR": r"[a-zA-Z]+"} dic = {} for sig, params in util.parse_signature(sig_loc): if params == [('', )]: rules.append((sig + r"\(\)", SIGNATURE)) dic[sig] = ([], []) else: rules.append( (sig + r"\(" + ",".join([types.get("string") for _ in params]) + r"\)", SIGNATURE)) dic[sig] = ([ e.replace("int", "INT").replace("string", "VARCHAR") for _, e in params ], []) rules.extend(Lexer.get_rules()) tokens = Lexer.lex(util.parse_formula(formula_loc), rules) node = MyParser.parse(tokens) print(node.to_str()) conn = sqlite3.connect(":memory:") context = Context(conn, ["A", "B"]) with open(log_loc, 'r') as f: for line in f: parse_ts = re.compile(r"@[0-9]+").findall(line) if len(parse_ts) == 1: ts = int(parse_ts[0][1:]) else: raise RuntimeError("No timestamp found") context.set_ts(ts)
global l print("Enter list") if next_token.get_token().value == TokenTypes.LPAREN.value: next_token = l.lex() list() if next_token.get_token().value == TokenTypes.RPAREN.value: next_token = l.lex() list() print("Exit list") def seq(): global next_token global l print("Enter seq") while next_token.get_token().value == TokenTypes.INT.value: next_token = l.lex() if next_token.get_token().value != TokenTypes.RPAREN.value: seq() print("Exit seq") def main(): global next_token global l l = Lexer(sys.argv[1]) next_token = l.lex() lisp() if next_token.get_token().value == TokenTypes.EOF.value: print("PARSE SUCCEEDED") else : print("PARSE FAILED")
# -*- coding: utf-8 -*- import sys import Lexer import Parser import CodeGenerator tl_file = sys.argv[1] file_name = tl_file.split('.')[0] tok_file = file_name + '.tok' ast_file = file_name + '.ast.dot' cfg_file = file_name + '.3A.cfg.dot' s_file = file_name + '.s' if Lexer.lexer(tl_file, tok_file): temp = Parser.parser(tok_file, ast_file) if temp: ast_tree = temp[0] symbol_table = temp[1] CodeGenerator.code_generator(ast_tree, symbol_table, cfg_file, s_file)
def atom(self): res = ParseResult() tok = self.current_tok if tok.type in (Lexer.TT_INT, Lexer.TT_FLOAT): res.register_advancement() self.advance() return res.success(Lexer.NumberNode(tok)) elif tok.type == Lexer.TT_STRING: res.register_advancement() self.advance() return res.success(Lexer.StringNode(tok)) elif tok.type == Lexer.TT_IDENTIFIER: res.register_advancement() self.advance() return res.success(Lexer.VarAccessNode(tok)) elif tok.type == Lexer.TT_LPAREN: res.register_advancement() self.advance() expr = res.register(self.expr()) if res.error: return res if self.current_tok.type == Lexer.TT_RPAREN: res.register_advancement() self.advance() return res.success(expr) else: return res.failure( Lexer.InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, "Mengharapkan seperti (senja kala itu) ')'")) elif tok.type == Lexer.TT_LSQUARE: list_expr = res.register(self.list_expr()) if res.error: return res return res.success(list_expr) elif tok.matches(Lexer.TT_KEYWORD, 'Ketika_nada'): if_expr = res.register(self.if_expr()) if res.error: return res return res.success(if_expr) elif tok.matches(Lexer.TT_KEYWORD, 'Untuk_kita'): for_expr = res.register(self.for_expr()) if res.error: return res return res.success(for_expr) elif tok.matches(Lexer.TT_KEYWORD, 'Sedangkan'): while_expr = res.register(self.while_expr()) if res.error: return res return res.success(while_expr) elif tok.matches(Lexer.TT_KEYWORD, 'Fungsi'): func_def = res.register(self.func_def()) if res.error: return res return res.success(func_def) return res.failure( Lexer.InvalidSyntaxError( tok.pos_start, tok.pos_end, "Mengharapkan seperti (senja kala itu) int, float, identifier, '+', '-', '(', '[', 'Ketika_nada', 'Untuk_kita', 'Sedangkan', 'Fungsi'" ))
# set the perl $0 to name of the script vm.set_variable('0', Value(filename), 'scalar') vm.set_variable('ARGV', Value(argv_list), 'list') fp = open(filename) code = fp.read() fp.close() else: # run the -e code vm.set_variable('ARGV', Value(argv_list), 'list') # set the perl $0 to <stdin> vm.set_variable('0', Value('-'), 'scalar') if is_n: code = "while (<>) { " + code + "}" lex = Lexer(code) if (is_dump_toks): # dump tokens and exit lex.dump_tokens() sys.exit(0) # scan, parse, to IR (AST) p = Parser(lex) ast = p.parse() # walk and emit bytecode from AST (compile) ast.emit(vm) # run the VM #try: if (not is_dump):
def Unit(self, priority): if priority == 0: if self.current_token.type == LPAREN: self.eat(LPAREN) ans = self.Unit(Max_Priority) self.eat(RPAREN) elif self.current_token.type == NUM: ans = AST_Num(self.current_token.value) self.eat(NUM) elif self.current_token.type == STRING: ans = AST_String(self.current_token.value) self.eat(STRING) elif self.current_token.type in UnaryOp: token = self.current_token self.eat(token.type) ans = AST_UnaryOp(token, self.Unit(0)) elif self.current_token.type == NAME: name = self.current_token.value self.eat(NAME) if self.current_token.type == LPAREN: self.eat(LPAREN) arglist = [] if self.current_token.type != RPAREN: arglist.append(self.Unit(Max_Priority)) while self.current_token.type != RPAREN: self.eat(COMMA) arglist.append(self.Unit(Max_Priority)) self.eat(RPAREN) ans = AST_FuncCall(name, arglist) else: ans = AST_ID(name) elif self.current_token.type == LBRACK: self.eat(LBRACK) lst = [] if self.current_token.type != RBRACK: lst.append(self.Unit(Max_Priority)) while self.current_token.type == COMMA: self.eat(COMMA) lst.append(self.Unit(Max_Priority)) self.eat(RBRACK) ans = AST_Array(lst) else: self.Error('invalid syntax at "%r" at pos %d' % (self.lexer.get_local_text(), self.lexer.pos - 1l)) while self.current_token.type == LBRACK: self.eat(LBRACK) ind = self.Unit(Max_Priority) self.eat(RBRACK) ans = AST_BinOp(Lexer.Token(INDEX, '[]'), ans, ind) elif Associativity[priority] == LeftAssoc: ans = self.Unit(priority - 1) while self.current_token.type in Prio and Prio[ self.current_token.type] == priority: token = self.current_token self.eat(token.type) ans = AST_BinOp(token, ans, self.Unit(priority - 1)) else: ans = self.Unit(priority - 1) rightest_node = ans first = True while self.current_token.type in Prio and Prio[ self.current_token.type] == priority: token = self.current_token self.eat(token.type) if first: ans = AST_BinOp(token, ans, self.Unit(priority - 1)) rightest_node = ans first = False else: rightest_node.rson = AST_BinOp(token, rightest_node.rson, self.Unit(priority - 1)) rightest_node = rightest_node.rson return ans
class Parser(object): def __init__(self, lexer): self.lexer = lexer self.tokens_list = lexer.lex() self.current_token = self.tokens_list[0] self.states = [] self.transitions = [] self.final = [] self.alphabet = [] self.initial_state = '' # Method that raises and error. def error(self, type_got): print('Token type {type} expected, received {type_got}!'.format(type=self.current_token.type, type_got=type_got)) # Method that goes to the next token if the current one has already been processed. def pop_token(self, token_type): if self.current_token.type == token_type: if not self.lexer.expr_end(): self.current_token = self.lexer.token_next() else: self.error(token_type) def process_statement(self): token = self.current_token if token.type == RESERVED: if token.value == 'alphabet:': self.pop_token(RESERVED) while self.current_token.type == LETTER_SMALL: self.alphabet.append(self.current_token) if self.lexer.expr_end(): break self.pop_token(LETTER_SMALL) elif token.value == 'states:': self.pop_token(RESERVED) while self.current_token.type == LETTER_CAPITAL: self.states.append(State(name=self.current_token.value)) if self.lexer.expr_end(): break self.pop_token(LETTER_CAPITAL) if self.current_token.type == COMMA: self.pop_token(COMMA) elif token.value == 'final:': self.pop_token(RESERVED) while self.current_token.type == LETTER_CAPITAL: self.final.append(State(self.current_token.value)) if self.lexer.expr_end(): break self.pop_token(LETTER_CAPITAL) if self.current_token.type == COMMA: self.pop_token(COMMA) elif token.value == 'transitions:': self.pop_token(RESERVED) while not self.current_token.value == 'end.': origin = self.current_token.value if type(self.initial_state) == str: for state in self.states: if state.state_name == origin: self.initial_state = state self.pop_token(LETTER_CAPITAL) self.pop_token(COMMA) edge = self.current_token.value self.pop_token(LETTER_SMALL) self.pop_token(DASH) self.pop_token(DASH) self.pop_token(ANGLE_BRACKET) destination = self.current_token.value self.pop_token(LETTER_CAPITAL) self.transitions.append(Transition(origin=origin, edge=edge, destination=destination)) if self.lexer.expr_end(): break self.pop_token(RESERVED) else: print('Unexpected type!') def process_regex(self): token = self.current_token node = "" if token.type == LETTER_SMALL: # probably for assigning an alphabet letter! # self.pred_list.append(token.value) node = Letter(self.current_token) self.pop_token(LETTER_SMALLq) if self.current_token.type == COMMA: self.pop_token(COMMA) if self.current_token.type == RPAR: self.pop_token(RPAR) return node # Logic if the current token is a start for repeating the letter. elif token.type == STAR: op = Token(type=STAR, value='*') self.pop_token(STAR) if self.current_token.type == LPAR: self.pop_token(LPAR) node = Repeat(op=op, letter=self.process_regex()) return node elif token.type == UNDERSCORE: # Logic if the current token is a start for repeating the letter. op = Token(type=UNDERSCORE, value='e') self.pop_token(UNDERSCORE) if self.current_token.type == LPAR: self.pop_token(LPAR) node = ET(symbol=self.process_regex()) return node # Logic if the current token is one of the Contingency operators. elif token.type in (DOT, PIPE): if token.type == DOT: op = Token(type=DOT, value='.') elif token.type == BICOND: op = Token(type=PIPE, value='|') self.pop_token(token.type) self.pop_token(LPAR) node = TransitionOp(left=self.op_statement(), op=op, right=self.op_statement()) return node # Logic if the current token is a right parentheses. elif token.type == RPAR: self.pop_token(RPAR) node = self.op_statement() return node # Logic if the current token is a comma. elif token.type == COMMA: self.pop_token(COMMA) node = self.op_statement() return node return node def new_lexer(self, _expression): self.lexer = Lexer(_expression) self.tokens_list = self.lexer.lex() self.current_token = self.tokens_list[0] def parse(self): node = self.process_regex() return node
def UploadAction(self, event=None, arg=None): """ Uploading a file of rules. :param event: Any :param arg: a filename. :return: None """ self.interpreter.deleted = False if self.searching: return if arg is not None: filename = arg else: filename = filedialog.askopenfilename() if filename == "": return print('Selected:', filename) try: z = open(filename, 'r') except FileNotFoundError: self.sendMessage("Error: File Not Found") return self.interpreter.setFilePath("/".join(filename.split("/")[:-1])) data = z.read() z.close() self.lexer.input(data) tokens = [] while True: tok = self.lexer.token() if not tok: break # No more input tokens.append(tok) if len(Lexer.SyntaxErrors) != 0: for e in Lexer.SyntaxErrors: self.sendMessage(e) Lexer.SyntaxErrors = [] return try: def reader(): self.interpreter.read(tokens) if len(self.interpreter.errorLoad) == 0: self.sendMessage(f"File {filename} has been uploaded.\n") else: self.viewErrorsAndMessages() self.searching = False if "on-start" in self.interpreter.predicates: self.queryReceived(given="on-start()") self.searching = True threading.Thread(target=reader).start() except Exception as e: ed, md = self.viewErrorsAndMessages() if not ed: self.sendMessage(f"Unknown Error: {e}") self.lexer = Lexer.build()
class Parser: # Säännöllisten lausekkeiden syntaksi on seuraava: # regex -> alt EOF # alt -> concat alt_tail* # alt_tail -> '' | '|' concat alt_tail # concat -> quant concat_tail # concat_tail -> '' | quant concat_tail # quant -> paren quant_tail # quant_tail -> '?' | '+' | '*' | '' # paren -> '(' alt ')' | lit # toteutus perstuntumalta käsin koodattu LL-parseri suurinpiirtein tämän mukaisesti: https://www.cs.helsinki.fi/i/vihavain/k10/okk/content3.html # Parserin sisällä NFA:t kulkevat parina (first, out_transitions), missä first on alkutila, ja out_transitions on List()a Transition-olioista, josta kulkemalla pääsee hyväksyvään tilaan # Jäsentämisen jälkeen hyväksyvistä tiloista pidetään kirjaa solmujen accepting-attribuutin avulla ################ kielen produktiot ############# def regex(self): (first, accept_transitions) = self.alt() accepting_state = Node() accepting_state.accepting = True for x in accept_transitions: x.attach_destination(accepting_state) self.assert_match("<EOF>") return first def alt(self): (first, transitions) = self.concat() branch_node = None # alt_tail while self.match('|'): (second, snd_transitions) = self.concat() if branch_node == None: branch_node = Node() branch_node.add_epsilon_transition(first) branch_node.add_epsilon_transition(second) transitions += snd_transitions self.append_postfix('|') if branch_node == None: return (first, transitions) else: return (branch_node, transitions) def concat(self): (first, transitions) = self.quant() while self.quant_matches(): (second, second_transitions) = self.quant() for tr in transitions: tr.attach_destination(second) transitions = second_transitions self.append_postfix('#') return (first, transitions) def quant(self): (first, transitions) = self.paren() # quant_tail res = self.match('+', '?', '*') if res: self.append_postfix(res) if res == '*': loop_node = Node() for n in transitions: n.attach_destination(loop_node) loop_node.add_epsilon_transition(first) transition = Transition(loop_node, None) return (loop_node, List(transition)) elif res == '+': loop_node = Node() for tr in transitions: tr.attach_destination(loop_node) loop_node.add_epsilon_transition(first) out_transition = Transition(loop_node, None) return (first, List(out_transition)) elif res == '?': skip_node = Node() skip_node.add_epsilon_transition(first) return (skip_node, transitions + List(Transition(skip_node, None))) return (first, transitions) def quant_matches(self): return self.__token == '(' or isinstance(self.__token, CharSet) def paren(self): ret = None if self.match('('): ret = self.alt() self.assert_match(')') else: ret = self.lit() return ret def lit(self): token = self.advance() if isinstance(token, CharSet): self.append_postfix(token) first = Node() return (first, token.to_transition_list(first)) raise ParseError('Unexpected input: ' + str(token)) ############# luokan varsinaiset metodit ##### def match(self, *lits): """Tarkistaa, onko seuraavana syötteessä oleva tekstialkio jokin parametrina annetuista. Palauttaa sen ja siirtyy eteenpäin syötteessä jos on, palauttaa False muuten""" if self.__token in lits: old_token = self.advance() return old_token return False def assert_match(self, *lits): """Sama kuin match(), mutta heittää fataalin poikkeuksen mikäli seuraava tekstialkio ei vastaa jotain parametrinä annetuista""" if self.match(*lits) is False: raise ParseError('Unexpected `' + str(self.__token) + "', expecting one of " + ", ".join(lits)) def advance(self): """Palauttaa nykyisen tekstialkion ja siirtyy eteenpäin syötteessä""" old_token = self.__token self.__token = self.__lexer.next_token() return old_token def __init__(self, str): self.__lexer = Lexer(str) self.__token = self.__lexer.next_token() self.postfix = '' def append_postfix(self, c): """Lisää merkki postfix-sivutuotteeseen""" self.postfix += str(c) def parse(self): """Jäsentää konstruktorissa annetun lausekkeen, ja palauttaa viitteen NFA:n alkutilaan""" return self.regex()
def __init__(self): self.tokens = Lexer.TPTPLexer.tokens self.lexer = Lexer.TPTPLexer() self.parser = yacc.yacc(module=self)
def new_lexer(self, _expression): self.lexer = Lexer(_expression) self.tokens_list = self.lexer.lex() self.current_token = self.tokens_list[0]
def genericError(structure): global val Lex.markActualError(val, structure) match("Error")
def illegal_operation(self, other=None): if not other: other = self return Lexer.RTError(self.pos_start, other.pos_end, ' Engkau tak diperkenankan (Operasi illegal)', self.context)
BEGIN BEGIN number := 2; a := number; b := 10 * a + 10 * number / 4; c := a - - b END; x := 11; END. """ text = """PROGRAM Part10; VAR number : INTEGER; a, b, c, x : INTEGER; y : REAL; BEGIN BEGIN number := 2; a := number; b := 10 * a + 10 * number DIV 4; c := a - - b END; x := 11; y := 20 / 7 + 3.14; END.""" lexer = Lexer.Lexer(text) parser = Parser.Parser(lexer) interpreter = Interpret.Interpreter(parser) interpreter.interpret() print(interpreter.GLOBAL_SCOPE)
""" if deep == 0: return [] result = [] for seq in rule.seqs: genrule = [] for sym in seq: if isinstance(sym, CFG.Term): gen = [sym.tok] else: if isinstance(sym, CFG.Sym_Term): gen = [sym.tok[3:]] # remove TK_ prefix else: if isinstance(sym, CFG.Non_Term_Ref): gen = list(be(grammar, grammar.get_rule(sym.name), deep-1)) genrule = combine(result, gen) result = result + genrule return set(result) if __name__ == "__main__": if len(sys.argv) < 4: print "Usage: " + sys.argv[0] + " grammar lex unrool-level" else: l = open(sys.argv[2], "r") g = open(sys.argv[1], "r") n = int(sys.argv[3]) lex = Lexer.parse( l.read() ) grammar = CFG.parse(lex, g.read()) r = be(grammar, grammar.rules[0], n) print r, ": ", len(r)
def __init__(self, version, time_limit, recursion_limit, imports): """ Creates an empty console design using tkinter. :param version: the current version of the console :param time_limit: the time limit in searches :param recursion_limit: the recursion limit in the language :param imports: a list of possible libraries to import. """ self.version = version self.time_limit = time_limit self.recursion_limit = recursion_limit # Build Lexer and Interpreter self.lexer = Lexer.build() self.interpreter = Interpreter.Interpreter(self.time_limit, imports) # For Queries self.asked_for_more, self.found_more = False, False self.requested_input, self.got_input = False, False # Get the width and the height of the Page width = GetSystemMetrics(0) height = GetSystemMetrics(1) # define TK self.root = Tk() self.root.geometry( f"1500x750+{width // 2 - 750}+{height // 2 - 750 // 2}") # Handle Arrow Press self.root.bind('<Down>', self.downPress) self.root.bind('<Up>', self.upPress) # define image for tkinter p1 = PhotoImage(file=sys.path[0] + '\\Images\\LCL.png') # Setting icon of master window self.root.iconphoto(False, p1) self.root.title("Local") # define the main frame self.mainFrame = Frame(self.root, highlightbackground="black", highlightthickness=1) self.mainFrame.grid(row=0, column=0, padx=(10), pady=(10), sticky=W + E + S + N) # configure the grid self.root.rowconfigure(0, weight=1) self.root.columnconfigure(0, weight=1) self.mainFrame.columnconfigure(1, weight=1) self.mainFrame.rowconfigure(1, weight=100) self.mainFrame.rowconfigure(2, weight=1) # set a variable for the query text and console text self.queryText = StringVar() self.consoleText = '\n' * 100 + f'Local Version {self.version} Loaded...\n\n' # A variable to save the generator in between the first and later results self.solutions = None self.pastQueries = ['', ''] self.currentIndex = 1 self.searching = False # Frame for console, and console definition self.textFrame = Frame(self.mainFrame, width=10000, height=27000) self.textFrame.grid(row=0, column=0, rowspan=2, columnspan=3, sticky=N + S + W + E) self.console = Text(self.textFrame, height=1650, width=500, padx=4, pady=4, wrap=WORD, font=("Courier", 12), borderwidth=4, relief="groove") self.console.insert(END, self.consoleText) self.console.config(state=DISABLED) self.console.pack() self.scrolling = Scrollbar(self.textFrame) self.scrolling.pack(side=RIGHT, fill=Y) self.scrolling.config(command=self.console.yview) self.console.config(yscrollcommand=self.scrolling.set) self.console.see(END) # Query button and text self.directionsQuery = Label(self.mainFrame, text="Enter Query Here:", font=("Helvetica", 14, "bold")) self.directionsQuery.grid(row=3, column=0, padx=(10), pady=(10, 15), sticky=W) self.query = Entry(self.mainFrame, font=("Courier", 14), textvariable=self.queryText) self.query.grid(row=3, column=1, padx=(10), pady=(10, 20), sticky=S + W + E) self.root.bind("<Return>", self.moreSolutions) self.sendQuery = Button(self.mainFrame, text='Send Query', command=self.queryReceived, font=("Helvetica", 14)) self.sendQuery.grid(row=3, column=2, padx=(10), pady=(10, 20), stick=W) # upload query self.upload = Button(self.mainFrame, text='Upload Rules', command=self.UploadAction, height=4, width=24, font=("Helvetica", 10, "bold")) self.upload.grid(row=0, column=0, padx=(10), pady=(10)) # delete button self.delete = Button(self.mainFrame, text='Delete All Rules', command=self.DeleteRules, height=4, width=24, font=("Helvetica", 10, "bold")) self.delete.grid(row=0, column=1, padx=(10), pady=(10), sticky='W') self.clear = Button(self.mainFrame, text='Clear Console', command=self.ClearConsole, height=4, width=24, font=("Helvetica", 10, "bold")) self.clear.grid(row=0, column=2, padx=(10), pady=(10), sticky='E') self.sendMessage("", False)
import Lexer input = 'let result = add(five, ten);' lexer = Lexer.Lexer(input,'',0,0) l = lexer.next_token() m = lexer.next_token() n = lexer.next_token() print('Read the next token {} {}'.format(m.type.name,n.type.name))
def __init__(self, text): self.level = 0 self.lexer = Lexer.Lexer(text) self.current_token = self.lexer.get_next_token()
def func_def(self): res = ParseResult() if not self.current_tok.matches(Lexer.TT_KEYWORD, 'Fungsi'): return res.failure( Lexer.InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, f"Mengharapkan seperti (senja kala itu) 'Fungsi'")) res.register_advancement() self.advance() if self.current_tok.type == Lexer.TT_IDENTIFIER: var_name_tok = self.current_tok res.register_advancement() self.advance() if self.current_tok.type != Lexer.TT_LPAREN: return res.failure( Lexer.InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, f"Mengharapkan seperti (senja kala itu) '('")) else: var_name_tok = None if self.current_tok.type != Lexer.TT_LPAREN: return res.failure( Lexer.InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, f"Mengharapkan seperti (senja kala itu) identifier or '('" )) res.register_advancement() self.advance() arg_name_toks = [] if self.current_tok.type == Lexer.TT_IDENTIFIER: arg_name_toks.append(self.current_tok) res.register_advancement() self.advance() while self.current_tok.type == Lexer.TT_COMMA: res.register_advancement() self.advance() if self.current_tok.type != Lexer.TT_IDENTIFIER: return res.failure( Lexer.InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, f"Mengharapkan seperti (senja kala itu) identifier" )) arg_name_toks.append(self.current_tok) res.register_advancement() self.advance() if self.current_tok.type != Lexer.TT_RPAREN: return res.failure( Lexer.InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, f"Mengharapkan seperti (senja kala itu) ',' or ')'")) else: if self.current_tok.type != Lexer.TT_RPAREN: return res.failure( Lexer.InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, f"Mengharapkan seperti (senja kala itu) identifier or ')'" )) res.register_advancement() self.advance() if self.current_tok.type != Lexer.TT_ARROW: return res.failure( Lexer.InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, f"Mengharapkan seperti (senja kala itu) '->'")) res.register_advancement() self.advance() node_to_return = res.register(self.expr()) if res.error: return res return res.success( Lexer.FuncDefNode(var_name_tok, arg_name_toks, node_to_return))