Beispiel #1
0
def re2ast(s):
    """This function turns a regular expression (passed in as a string s)
       into an abstract syntax tree, and returns the tree (encoded in Python)
    """
    mylexer = lex()
    myparser = yacc()
    pt = myparser.parse(s, lexer=mylexer)
    return (pt['ast'], pt['dig']['nl'], pt['dig']['el'])
Beispiel #2
0
def re2nfa(s, stno=0):
    """Given a string s representing an RE and an optional
       state number stno (default 0), generate an NFA that
       is language equivalent to the RE
    """
    # Reset the state number generator to 0
    ResetStNum()
    # NxtStateStr() gets called whenever needed.
    # Defined in StateNameSanitizers.py

    relexer = lex()

    #-- NOW BUILD THE PARSER --
    reparser = yacc()
    #-- FEED IT THE LEXER --
    myparsednfa = reparser.parse(s, lexer=relexer)
    #-- for debugging : return dotObj_nfa(myparsednfa, nfaname)
    return myparsednfa
Beispiel #3
0
def md2mc(src="None", fname="None"):
    """md2mc converts a markdown source to a machine (mc).
    
       One can feed the markdown in three ways, shown via 
       pseudo-examples:
       
       1) md2mc()
       
          It means you will provide a file-name
          (you will be prompted for one). Then the markdown is read from
          that file. 
          
       2) md2mc(src="<any string S other than 'File'>")
       
          S is now taken as the markdown string and parsed. This is 
          bound to be a multi-line file. 
          
          There is a Jupyter bug that if the parser (or any process) 
          consuming a multi-line input throws an exception, you will get 
          a strange error message: 
          ERROR:root:An unexpected error occurred while tokenizing input
          Ignore it please, and instead spend your time fixing the 
          markdown input. See for details:
          https://github.com/ipython/ipython/issues/6864
          
          
       3) md2mc(src="File", fname="<your file name path>")
       
          Obviously, you should not be feeding a markdown with contents 
          "File". It is not legit markdown syntax. So if src="File", 
          then fname is taken to be the path-name to a file that is 
          opened and read.
        
       In all cases, the returned result is a machine structure (dict).
    """
    if (src=="None"):
        mdstr = open(input('File name ='), 'r').read()
    elif (src=="File"):
        mdstr = open(fname).read()
    else:
        mdstr = src
    myparser = yacc()
    mdlexer = lex()   # Build lexer custom-made for markdown
    rslt = myparser.parse(mdstr, lexer=mdlexer) # feed into parse fn
    #--
    # Now, based on machine type, return correct machine object.
    #--
    (machine_type,
     (From_s, To_s,
      G_in,   G_out,
      Q0,     F,
      Sigma,  Dirn, Delta)) = rslt
    #--
    #-- for now, make struct right here; later call right maker
    #--
    if machine_type != 'NFA':
        assert(len(Q0)==1)
        q0 = list(Q0)[0]
    if machine_type=='DFA':
        return {"Q"    : From_s | To_s,
                "Sigma": Sigma,
                "Delta": Delta,
                "q0"   : q0,
                "F"    : F}
    
    elif machine_type=='NFA':
        return {"Q"    : From_s | To_s,
                "Sigma": Sigma - {'',""},
                "Delta": Delta,
                "Q0"   : Q0,
                "F"    : F}
    
    elif machine_type=='PDA':
        G_out_set = reduce(lambda x,y: x|y, map(set, G_out), set({}))
        return {"Q"    : From_s | To_s,
                "Sigma": Sigma - {'',""},
                "Gamma": (G_in | G_out_set | {'#'} | Sigma) - {'',""},
                "Delta": Delta,
                "q0"   : q0,
                "z0"   : '#',   # Hash-mark is the new "z0" for a PDA!
                "F"    : set(F)}
    else: 
        assert(machine_type=='TM')
        return {"Q"    : From_s | To_s,
                "Sigma": Sigma - {'',"",'@','.'},
                "Gamma": (G_in | G_out | {'.'} | Sigma) - {'',"",'@'},
                "Delta": Delta,
                "q0"   : q0,
                "B"    : '.',
                "F"    : F}        
        
    return rslt
Beispiel #4
0

#-- Upon new lines, increase the lexer's line count variable
def t_newline(t):
    r'\n+'
    t.lexer.lineno += t.value.count("\n")


#-- Lexer's error announcer for illegal characters
def t_error(t):
    print("Illegal character '%s'" % t.value[0])
    t.lexer.skip(1)


#-- NOW BUILD THE LEXER --
lexer = lex()

#--------------------------------------------------------------------
#--- Here is the parser set-up in terms of binary operator attributes
#--------------------------------------------------------------------

#--- This is a global - for name generation in parser
NxtStateNum = 0


def NxtStateStr():
    global NxtStateNum
    NxtStateNum += 1
    return "St" + str(NxtStateNum)