def re2ast(s): """This function turns a regular expression (passed in as a string s) into an abstract syntax tree, and returns the tree (encoded in Python) """ mylexer = lex() myparser = yacc() pt = myparser.parse(s, lexer=mylexer) return (pt['ast'], pt['dig']['nl'], pt['dig']['el'])
def re2nfa(s, stno=0): """Given a string s representing an RE and an optional state number stno (default 0), generate an NFA that is language equivalent to the RE """ # Reset the state number generator to 0 ResetStNum() # NxtStateStr() gets called whenever needed. # Defined in StateNameSanitizers.py relexer = lex() #-- NOW BUILD THE PARSER -- reparser = yacc() #-- FEED IT THE LEXER -- myparsednfa = reparser.parse(s, lexer=relexer) #-- for debugging : return dotObj_nfa(myparsednfa, nfaname) return myparsednfa
def md2mc(src="None", fname="None"): """md2mc converts a markdown source to a machine (mc). One can feed the markdown in three ways, shown via pseudo-examples: 1) md2mc() It means you will provide a file-name (you will be prompted for one). Then the markdown is read from that file. 2) md2mc(src="<any string S other than 'File'>") S is now taken as the markdown string and parsed. This is bound to be a multi-line file. There is a Jupyter bug that if the parser (or any process) consuming a multi-line input throws an exception, you will get a strange error message: ERROR:root:An unexpected error occurred while tokenizing input Ignore it please, and instead spend your time fixing the markdown input. See for details: https://github.com/ipython/ipython/issues/6864 3) md2mc(src="File", fname="<your file name path>") Obviously, you should not be feeding a markdown with contents "File". It is not legit markdown syntax. So if src="File", then fname is taken to be the path-name to a file that is opened and read. In all cases, the returned result is a machine structure (dict). """ if (src=="None"): mdstr = open(input('File name ='), 'r').read() elif (src=="File"): mdstr = open(fname).read() else: mdstr = src myparser = yacc() mdlexer = lex() # Build lexer custom-made for markdown rslt = myparser.parse(mdstr, lexer=mdlexer) # feed into parse fn #-- # Now, based on machine type, return correct machine object. #-- (machine_type, (From_s, To_s, G_in, G_out, Q0, F, Sigma, Dirn, Delta)) = rslt #-- #-- for now, make struct right here; later call right maker #-- if machine_type != 'NFA': assert(len(Q0)==1) q0 = list(Q0)[0] if machine_type=='DFA': return {"Q" : From_s | To_s, "Sigma": Sigma, "Delta": Delta, "q0" : q0, "F" : F} elif machine_type=='NFA': return {"Q" : From_s | To_s, "Sigma": Sigma - {'',""}, "Delta": Delta, "Q0" : Q0, "F" : F} elif machine_type=='PDA': G_out_set = reduce(lambda x,y: x|y, map(set, G_out), set({})) return {"Q" : From_s | To_s, "Sigma": Sigma - {'',""}, "Gamma": (G_in | G_out_set | {'#'} | Sigma) - {'',""}, "Delta": Delta, "q0" : q0, "z0" : '#', # Hash-mark is the new "z0" for a PDA! "F" : set(F)} else: assert(machine_type=='TM') return {"Q" : From_s | To_s, "Sigma": Sigma - {'',"",'@','.'}, "Gamma": (G_in | G_out | {'.'} | Sigma) - {'',"",'@'}, "Delta": Delta, "q0" : q0, "B" : '.', "F" : F} return rslt
#-- Upon new lines, increase the lexer's line count variable def t_newline(t): r'\n+' t.lexer.lineno += t.value.count("\n") #-- Lexer's error announcer for illegal characters def t_error(t): print("Illegal character '%s'" % t.value[0]) t.lexer.skip(1) #-- NOW BUILD THE LEXER -- lexer = lex() #-------------------------------------------------------------------- #--- Here is the parser set-up in terms of binary operator attributes #-------------------------------------------------------------------- #--- This is a global - for name generation in parser NxtStateNum = 0 def NxtStateStr(): global NxtStateNum NxtStateNum += 1 return "St" + str(NxtStateNum)