def update_co_occurrence(): if len(sys.argv) != 3: print('No Co-Occurrence input') else: tokens_desired = Lexer(sys.argv[2]) token_stream = Lexer(sys.argv[1]) co_occurrence(tokens_desired, token_stream)
def testParseEdgeList(self): # edgeList -> ID | ID '->' edgeList # Left off first required ID. p = Parser(Lexer('')) g = Graph() self.assertRaises(SyntaxError, p._parseEdgeList, g) # Using non-existing label should create a new vertex. g = Graph() p = Parser(Lexer('A')) p._parseEdgeList(g) self.assertEqual(g._vertices['v0'].label, 'A') # Using an existing label should not create a new vertex. g = Graph() g.addVertex(Vertex('u0', 'A')) p = Parser(Lexer('A')) p._parseEdgeList(g) self.assertEqual(g._vertices['u0'].label, 'A') # Left off second ID. p = Parser(Lexer('A ->')) g = Graph() self.assertRaises(SyntaxError, p._parseEdgeList, g) # Simple transition should create two vertices and connect them. g = Graph() p = Parser(Lexer('A -> B')) p._parseEdgeList(g) self.assertEquals(len(g._vertices), 2) self.assertEquals(g._vertices['v0'].label, 'A') self.assertEquals(g._vertices['v1'].label, 'B') self.assertEquals(g._edges['v0'][0].label, 'B')
def testNextToken(self): # Test all the acceptable tokens. lex = Lexer('; , { } -> ==> = 123 configuration productions abc123') self.assertEquals(lex.nextToken().type, TokenTypes.SEMICOLON) self.assertEquals(lex.nextToken().type, TokenTypes.COMMA) self.assertEquals(lex.nextToken().type, TokenTypes.LBRACE) self.assertEquals(lex.nextToken().type, TokenTypes.RBRACE) self.assertEquals(lex.nextToken().type, TokenTypes.ARROW) self.assertEquals(lex.nextToken().type, TokenTypes.DOUBLEARROW) self.assertEquals(lex.nextToken().type, TokenTypes.EQUALS) self.assertEquals(lex.nextToken().type, TokenTypes.NUMBER) self.assertEquals(lex.nextToken().type, TokenTypes.CONFIGURATION) self.assertEquals(lex.nextToken().type, TokenTypes.PRODUCTIONS) self.assertEquals(lex.nextToken().type, TokenTypes.ID) self.assertEquals(lex.nextToken().type, TokenTypes.EOF) # Test comments lex = Lexer(""" #comment abc #comment def # comment """) self.assertEquals(lex.nextToken().type, TokenTypes.ID) self.assertEquals(lex.nextToken().type, TokenTypes.ID) self.assertEquals(lex.nextToken().type, TokenTypes.EOF)
def state_act(self, stat, buff, next_state): if stat == 4 or stat == 5 or stat == 6 or stat == 9 or stat == 41: buff.add_buff(self.curr_symbs[0]) #print("buff ", buff.get_buff(), " next state ", next_state) if buff.get_buff() == "-" and next_state == 4: list_d = ["+", "*", "-", "/", ",", "(", "[", ">", "<", ":=", "-=", "+=", "*=", "/="] #print("last lexem", self.Lexems_arr[-1].get_orig()) if not (self.Lexems_arr[-1].get_orig() in list_d): self.Lexems_arr.append(Lexer(self.curr_p - len(buff.get_buff())+1, "минус", buff.get_buff(), "имя")) buff.clear_buff() if next_state == 1 or stat == 3: if stat == 4: type_s = 'число' elif stat == 5: type_s = 'строка' elif stat == 6: type_s = 'идентификатор' elif stat == 9: type_s = 'знак' else: type_s = stat new_lex = buff.get_buff() if new_lex != "": self.Lexems_arr.append(Lexer(self.curr_p - len(new_lex)+1, type_s, new_lex, "имя")) buff.clear_buff()
def testParseConfig(self): # config -> ID '=' (ID | NUMBER) # Missing first ID raises an error. l = Lexer('= 123;') p = Parser(l) self.assertRaises(SyntaxError, p._parseConfig) # Missing '=' raises an error. l = Lexer('A 123;') p = Parser(l) self.assertRaises(SyntaxError, p._parseConfig) # Missing right-hand side raises an error. l = Lexer('A = ;') p = Parser(l) self.assertRaises(SyntaxError, p._parseConfig) # Simple ID=ID. l = Lexer('A = B') p = Parser(l) p._parseConfig() self.assertTrue('A' in p.config) self.assertEquals(p.config['A'], 'B') # Simple ID=NUMBER. l = Lexer('C = 123') p = Parser(l) p._parseConfig() self.assertTrue('C' in p.config) self.assertEquals(p.config['C'], '123')
def testParseConfiguration(self): # configuration -> 'configuration' '{' config_list '}' # Missing 'configuration' raises an error. l = Lexer('{ A = B; }') p = Parser(l) self.assertRaises(SyntaxError, p._parseConfiguration) # Left off '{' raises an error. l = Lexer('configuration A = B; }') p = Parser(l) self.assertRaises(SyntaxError, p._parseConfiguration) # Left off '}' raises an error. l = Lexer('configuration { A = B;') p = Parser(l) self.assertRaises(SyntaxError, p._parseConfiguration) # Left off config list is completely valid. l = Lexer('configuration { }') p = Parser(l) p._parseConfiguration() self.assertEqual(len(p.config), 0) # Simple valid configuration. l = Lexer('configuration { A = B; }') p = Parser(l) p._parseConfiguration() self.assertEquals(p.config['A'], 'B')
def run_ide(): print(": Welcome to Dymond V0.0.0!") print( ": Play around a little in this nice IDE type simple statements in and we will process them :)" ) print(": Type exit() to exit") user_in = "" total_user_in = "" while user_in != "exit()": user_in = input(">>> ") if (user_in == "exit()"): break try: test = total_user_in + user_in lexer = Lexer(test) se_parser = Parser(lexer, "ide") semantic_analyzer = SemanticAnalyzer(se_parser) semantic_analyzer.analyze() lexer = Lexer(test) in_parser = Parser(lexer, "ide") semantic_analyzer.current_scope.reset_multi_scope_vars() interpreter = Interpreter(in_parser, semantic_analyzer.current_scope) interpreter.interpret() if "print(" not in user_in and "input(" not in user_in: total_user_in += "\n" + user_in except Exception as ex: print(ex)
def testParseStartGraph(self): # start_graph -> graph ';' # Forgot the ; is an error. p = Parser(Lexer('A->B')) self.assertRaises(SyntaxError, p._parseStartGraph) # Valid test. p = Parser(Lexer('A->B;')) p._parseStartGraph() self.assertIsNotNone(p.startGraph)
def run_file(file_name): lexer = Lexer(open(file_name, "r").read()) se_parser = Parser(lexer, "input") semantic_analyzer = SemanticAnalyzer(se_parser) semantic_analyzer.analyze() lexer = Lexer(open(file_name, "r").read()) in_parser = Parser(lexer, "input") semantic_analyzer.current_scope.reset_multi_scope_vars() interpreter = Interpreter(in_parser, semantic_analyzer.current_scope) result = interpreter.interpret()
def testParseProduction(self): # production -> graph '==>' graph # Forgot double arrow is an error. p = Parser(Lexer('A->B C->D')) self.assertRaises(SyntaxError, p._parseProduction) # Simple test. p = Parser(Lexer('A->B ==> C->D')) p._parseProduction() self.assertEquals(len(p.productions), 1) self.assertEquals(len(p.productions[0]._lhs._vertices), 2) self.assertEquals(len(p.productions[0]._rhs._vertices), 2)
def testParseGraph(self): # graph -> edge_list | edge_list ',' graph""" # Single edgeList. p = Parser(Lexer('A->B')) g = p._parseGraph() self.assertEquals(len(g._vertices), 2) # Two edgeLists. p = Parser(Lexer('A->B,A->C')) g = p._parseGraph() self.assertEquals(len(g._vertices), 3) # A will be v0, B will be v1, and C will be v2. self.assertEquals(g._edges['v0'][0].id, 'v1') # A points to B self.assertEquals(g._edges['v0'][1].id, 'v2') # A points to C
def __init__(self, string: str = None, lexer: Lexer = None): if str is not None: self.__lexer = Lexer(string) elif lexer is not None: self.__lexer = lexer else: raise ValueError("String or Lexer object should be passed")
def test_math_symbols(): lexer = Lexer('+ - * /') assert lexer.next_token() == Token(TokenTypes.ADD) assert lexer.next_token() == Token(TokenTypes.SUB) assert lexer.next_token() == Token(TokenTypes.MUL) assert lexer.next_token() == Token(TokenTypes.DIV)
def test_input(text: str): lexer = Lexer() tokens = lexer.tokenize(text) print(''' The lexer input was: {} The Tokenized output from it was: {} '''.format(text, tokens)) parser = Parser(tokens) node_tree = parser.parse() print(''' The Parser then created this Node Tree: {} '''.format(node_tree)) runner = Runner(node_tree, infoLevel) print(''' The Runner ran the node tree, and came up with this result: {} '''.format(runner.run()))
def testConsumeLineEnding(self): lex = Lexer("\nhello") lex._consume() # consume the newline self.assertEqual(lex.lineNum, 2) # line has increased self.assertEqual(lex.charNum, 1) # charNum as been reset self.assertEqual(lex.p, 1) # p has advanced self.assertEqual(lex.c, 'h') # c is the next character
def buildIncludeStatement(self) -> None: self.delmov() # verify syntax self.checkToks([T_STRING, T_INCLUDER]) path = self.current_token.value if path not in self.includeCache: # \see loadRaw rawdata = self.loadRaw(path) # create tokens from new file, and insert them in this PreProcessor's # tokenlist lex = Lexer(path, rawdata) tokens = lex.getTokens() self.includeCache[path] = tokens self.includeMulti[path] = False elif self.includeMulti[path]: tokens = self.includeCache[path] else: tokens = [] self.delmov() # emplace the new tokens ahead of the current position #self.tokens[self.tkidx:self.tkidx] = tokens[:-1] self.insertTokens(self.tkidx, self.tkidx, tokens[:-1]) self.update()
def derivate(): global derivation, lexer, grammar, file_path, LAMBDA lexer = lx.Lexer(file_path) lexer.readFile() tk = lexer.nextToken() prefix = [] while (len(derivation)): print("-------------") a = derivation[0] print(">>>>", derivation) print("<<<TK: ", tk.parse()) if a in grammar.non_terminals: # Expand new_prefix = getNewPrefix(a, tk.token_type) while len(new_prefix) and new_prefix[0] == LAMBDA: new_prefix = new_prefix[1:] derivation = new_prefix + derivation[1:] prefix = new_prefix print('') elif a == tk.token_type: # Match tk = lexer.nextToken() derivation = derivation[1:] else: print("-----", derivation) print("-----TK: ", tk.parse()) return tk, prefix # It means we have unsatisfied expected values return tk, [] # it means it finished correctly
def L_AND_S(rules, text): try: lex = Lexer(text) lex.run(show_states=False, show_spaces=False) #lex.show() table = Run(rules, lr_letter="LR", ft_letter="FR", word="", show_all=False) runner = Runner_cl(1, table, "$") for i in lex.list: if i[1] != "new_line" and i[1] != "Comment": runner.Run(i[1], i[2]) #print(runner.pos_stack) is_end = runner.Run("$", "end_end") if not is_end: raise Exception("Промахнулись с концом", runner.current_pos, runner.pos_stack) except Exception as e: print("Не подходит") print(e) else: print("Подходит")
def __init__(self, path): lex = Lexer(path) lex.tokenize() self.TOKENS = lex.tokenList self.INDEX = 0 tree = Tree() self.TREE = tree
def main(): memory = defaultdict(int) states = ['PREFIX', 'INFIX', 'POSTFIX'] state = states[1] while True: try: text = input(state + ' --> ') except EOFError: break if not text: continue command = text.strip().upper() if command == 'EXIT': break if command in states: state = command continue lexer = Lexer(text) interpreter = Interpreter(lexer, state, memory) result = interpreter.evaluate() print(result)
def __init__(self, from_file=False): self.__file = from_file self.lexer = Lexer() self.parser = yacc.yacc(module=self, optimize=True, debug=False, write_tables=False)
def main(): #text = open('src/test/pascal_13.pas', 'r').read() # Error: Symbol(identifier) not found 'y' #text = open('src/test/pascal_01_13.pas', 'r').read() # valid program #text = open('src/test/pascal_02_13.pas', 'r').read() # Duplicate id x found #text = open('src/test/pascal_03_13.pas', 'r').read() # Error: Symbol(identifier) not found 'z' #text = open('src/test/pascal_04_13.pas', 'r').read() # Error: Duplicate identifier 'x' found #text = open('src/test/factorial.pas', 'r').read() # need to parse IF statements #text = open('src/test/pascal_01_14.pas', 'r').read() #text = open('src/test/nested_scope_01_14.pas', 'r').read() #text = open('src/test/nested_scope_02_14.pas', 'r').read() #text = open('src/test/nested_scope_03_14.pas', 'r').read() #text = open('src/test/test_14/nested_scope_04_14.pas', 'r').read() text = open('src/test/test_14/nested_function_01_14.pas', 'r').read() #text = open('src/test/test_14/barebones.pas', 'r').read() lexer = Lexer(text) parser = Parser(lexer) tree = parser.parse() semantic_analyzer = SemanticAnalyzer() try: semantic_analyzer.visit(tree) except SystemError as sys_error: print(sys_error) for token in lexer.tokens: print(token)
def parse(self, source_text): if self.lexer is None: self.lexer = Lexer() self.parser = ply.yacc.yacc(module=self, debug=False, **self.yacc_args) return self.parser.parse(source_text, lexer=self.lexer)
def compile(self): try: self.tokens["state"] = NORMAL tokens = Lexer(self.code.get("0.0", END)).tokenize() self.tokens.delete("0.0", END) translated = "" scope = 0 for token in tokens: if token.typ in ["WHILE", "GOES", "FUNC ASSIGN", "IF", "ELSE"]: scope += 1 elif token.typ == "LOCAL END": scope -= 1 if token.typ in FACTORS + ["ID"]: translated += " [" + str("PREV" if token.val is None else token.val) + "]" elif token.typ in ENDING: translated += "\n" + " " * scope else: translated += " " + token.typ # print(token.typ, scope) translated = re.sub("\n\s+\n", "\n", translated.replace("\n ", "\n")).strip() self.tokens.insert(END, translated) self.tokens["state"] = DISABLED except Exception as e: self.tokens["state"] = NORMAL self.tokens.delete("0.0", END) self.tokens.insert(END, str(e)) self.tokens["state"] = DISABLED pass
def test_next_token(): lexer = Lexer('1+ 3') assert lexer.next_token() == Token(TokenTypes.INT, 1) assert lexer.next_token() == Token(TokenTypes.ADD) assert lexer.next_token() == Token(TokenTypes.INT, 3) assert lexer.next_token() == Token(TokenTypes.EOF)
def rev_input(self, text): self.lexer = Lexer(text) self.current_token = self.lexer.get_next_token() rev_text = '' while self.current_token.type is not TokenType.EOF: word = self.current_token.value # print("Word je " + str(word)) if self.current_token.type is TokenType.STRING: # print('String je') self.eat(TokenType.STRING) # print("Curr token je " + str(self.current_token.value)) if word in Library.RESERVED_METHOD_WORDS: # print("Rez rec") try: self.eat(TokenType.LPAREN) word += "(" + self.current_token.value + ")" self.eat(TokenType.STRING) self.eat(TokenType.RPAREN) except Exception: pass # print("Appendujem " + str(word)) rev_text = ' ' + str(word) + rev_text else: # print("Appendujem " + str(word)) rev_text = ' ' + str(word) + rev_text self.current_token = self.lexer.get_next_token() # print(rev_text) return rev_text
def nfaSyntactic(): nfaForm = forms.NFASyn(request.form) nfas = set([]) dfaER = None msg = None if request.method == 'POST': regularExpressions = nfaForm.regularExpressions.data regularExpressions = regularExpressions.split('\r\n') print(regularExpressions) try: for re in regularExpressions: auxRE = re.split(' ') lex = Lexer(syntacticDFA, auxRE[0]) syn = SyntacticNFA(lex) nfaAux = syn.start() if (nfaAux == None): print('Error') msg = 2 break nfaAux.setToken(int(auxRE[1])) nfas.add(nfaAux) if (msg != 2): nfaER = NFA.specialJoin(nfas) dfaER = nfaER.convertToDFA() dfaDictionary[dfaER.getId()] = dfaER msg = 1 except: msg = 2 NFA.resetId() DFA.resetId() return render_template('analysis/nfa.html', nfaF=nfaForm, dfaER=dfaER, msg=msg)
def __init__(self, filename): # init default vars self.lexer = Lexer(filename) self.parser = Parser(self.lexer, PL.default_vars, PL.default_subs, PL.default_functions) self.curIndent = 1 self._internal = {"stackPointer": 0, "fvCounter": 0, "brtUsed": False} self.free_vars = ["r9", "r8", "r7", "r6", "r5"] self.output = [] self.compile(self.parser.cur_parent) self.optimize() print("#options=-lbrt\n\n" if self._internal["brtUsed"] else "") + "#include <libstd.inc>" if len(self.parser.var_table.children) > len(PL.default_vars): print "\nstatic:" print "\n".join([ "\t%s %s" % (x.value, "int" if Token.T_INT in x else "string[%s]" % (x.type >> 32)) for x in self.parser.var_table.children if x.value not in self.default_vars ]) print "code:" if cfg["dummy_output"]: print "\tcall createwin" print "\n".join( [" ".join(x) for x in self.output + [["\tlabel", "__exit"]] if x])
def run_imitation(text): print('Making Lexer') lexer = Lexer(text) print('Making Parser') parser = Parser(lexer) print('Making Imitation Compiler') imitation_compiler = Imitation_Compiler(parser) make_imitation(imitation_compiler)
def run_facility_domain(text): print('Making Lexer') lexer = Lexer(text) print('Making Parser') parser = Parser(lexer) print('Making Facility Domain Compiler') facility_compiler = Facility_Domain_Compiler(parser) make_facility_domain(facility_compiler)