def tokenize(self, sourcecode, filesource='<stdin>'): "Tokenize the given string of source code." self.errmsg = NCPTL_Error(filesource) # Keep track of all the comments we've encountered by storing # a mapping from line number to comment (including the initial # hash character). self.line2comment = {} # Initialize the lexer. lex.lex(module=self) # Repeatedly invoke the lexer and return all of the tokens it produces. self.lineno = 1 lex.input(sourcecode) self.toklist = [] while 1: # Acquire the next token and assign it a line number if necessary. token = lex.token() if not token: break if token.lineno < self.lineno: token.lineno = self.lineno # Hack: Disambiguate op_mult and star on the parser's behalf. if token.type in ["comma", "rparen"]: try: if self.toklist[-1].type == "op_mult": self.toklist[-1].type = "star" except IndexError: pass # We now have one more valid token. self.toklist.append(token) return self.toklist
def parse(string): lex.lex() yacc.yacc() rules = yacc.parse(string) result = [] while rules: current = rules.pop(0) result.extend(current[1]) return(result)
def lex_fun(data, toks): lexer = lex.lex() lexer.input(data) # Tokenize count = 0 while True: tok = lexer.token() if not tok: break # No more input # if tok.type=='NUMBER': # tok.value = int(tok.value) if(tok.type=='EVAL'): count = 1 if count>0: count += 1 if count!=4: # pprint.pprint(tok) toks.append(tok) if count==4: a = tok.value if a[0]=='"' or a[0]=="'": a = a[1:-1] else: a = a lex_fun(a, toks) count = 0
def repl(): base_st = builtin.global_st while True: try: line = raw_input(">>> ") stream = parse.parse(line) stream.reverse() p = lex.lex(stream) p = p[0] print "lex", p s = syntax.replace(p, base_st) print "syntax", s res = s.evaluate() print res out = open("test.c", "w") out.write('#include "builtin.h"\n') out.write("int main() {\n") code = s.emit() out.write("\treturn " + str(code) + ";\n}\n") out.close() except Exception, e: traceback.print_exc(file=sys.stdout)
def myTokenizer(data): tokens = ( 'NUMBER', 'WORD', 'PUNCTUATION' ) t_NUMBER = r'[0-9]+' t_WORD = r'[a-zA-Z_]+' t_PUNCTUATION=r'\+|\-|\*|\!|\@|\#|\$|\%|\^|\&|\(|\)|\_|\=|\~|\`|\{|\[|\]|\}|\}|\\|\||\:|\;|\"|\'|\<|\>|\,|\.|\/|\?' t_ignore = ' \t\n' def t_error(t): #print("Illegal character '%s'" % t.value[0]) t.lexer.skip(1) lexer = lex.lex() lexer.input(data) tokens = [] while True: tok = lexer.token() if not tok: break tokens.append(str(tok.value)) return tokens
def write(self, tokens): for t in tokens: if t.type == 'HEADER_NAME': # token was mis-parsed. Do it again, without the '<', '>'. ta = create_token('<', '<') ta.filename = t.filename ta.lineno = t.lineno self.output.append(ta) l = lex.lex(cls=PreprocessorLexer) l.input(t.value, t.filename) l.lineno = t.lineno tb = l.token() while tb is not None: if hasattr(tb, 'lexer'): del tb.lexer self.output.append(tb) tb = l.token() tc = create_token('>', '>') tc.filename = t.filename tc.lineno = t.lineno self.output.append(tc) continue if hasattr(t, 'lexer'): del t.lexer self.output.append(t)
def main(infile,outbase): fractlexer.keep_all = True fractlexer.t_ignore = "" flex = lex.lex(fractlexer) flex.input(open(infile).read()) # Tokenize toks = [] nfrms = 0 while 1: tok = flex.token() if not tok: break # No more input #element.text = tok.value if tok.type == "FORM_ID": output_frm(toks,outbase,nfrms) toks = [] nfrms += 1 # special case for processing tutorial tok.value = myfrm.sub('MyFormula',tok.value) toks.append(processToken(tok, highlights[nfrms])) # print last formula output_frm(toks,outbase,nfrms)
def __init__(self, language='en'): self.language = language self.lock = Lock() try: modname = os.path.split(os.path.splitext(__file__)[0])[1] + "_" + self.__class__.__name__ except: modname = "parser"+"_"+self.__class__.__name__ self.debugfile = modname + ".dbg" self.tabmodule = modname + "_" + "parsetab" lex.lex(module=self, debug=False) self.p = yacc.yacc(module=self, debug=0, outputdir=outputdir, debugfile=self.debugfile, tabmodule=self.tabmodule,)
def runLexer(): lexer = lex.lex(debug=False) lexer.input(file(sys.argv[1]).read()) while 1: tok = lexer.token() if not tok: break # No more input print tok
def parse(s): lexer = lex.lex() lexer.input(s) parser = yacc.yacc() # debug=1 print("Parsing...") root = parser.parse(lexer=lexer) # debug=1 return root
def parse(input): parser = Parser() parser.input = lex.lex(input) tok = parser.next() while tok: parse_token(tok, parser) tok = parser.next() return parser.items
def __init__(self,localeCode): # Use this if you want to build the parser using LALR(1) instead of SLR # yacc.yacc(method="LALR") localeparser = __import__('numbler.server.locale.parser_%s' % (str(localeCode)),{},{},'*') self.parser = yacc.yacc(tabmodule="localtab_%s" % localeCode, tabmoduleparent="numbler.server.locale", outputdir=resource_filename('numbler.server.locale',''), module=localeparser, optimize=self.optimize) self.lexer = lex.lex(module=localeparser,debug=self.debug) #optimize=self.optimize,
def __init__(self, string): self.tokens = lex(string) self.current_token = next(self.tokens) self.at_last = False self.reader_macros = { 'e': self.rm_exact, 'i': self.rm_inexact, 't': self.rm_true, 'f': self.rm_false, }
def buildlexer(self,**kwargs): # try and find a temporary workspace if os.environ.has_key('TMP'): tempDir = os.environ['TMP'] elif os.environ.has_key('TEMP'): tempDir = os.environ['TEMP'] else: tempDir = os.getcwd() os.chdir(tempDir) self.lexer = lex.lex(object=self, **kwargs)
def test_comment(): test = 'a <!--b {{unclosed--> c' items = lex.lex(test) assert_tokens(items, 'a ', '<!--', 'b {{unclosed', '-->', ' c') assert items[0][0] == 'text' assert items[1][0] == 'left_comment' assert items[2][0] == 'comment' assert items[3][0] == 'right_comment' assert items[4][0] == 'text'
def getLexer(decimalSepType = '.'): import lexer ret = None if decimalSepType == ',': lexer.t_FLOAT.__doc__ = r'((((\d+(\,\d*))|(\d*\,\d+))([eE][\+\-]{0,1}\d+)?)|(\d+[eE][\+\-]{0,1}\d+))[ ]*%?' lexer.t_COMMA.__doc__ = r'[ \t]*\;[ \t]*' # eats space ret = lex.lex(module=lexer) ret.decsep = ',' elif decimalSepType == '.': lexer.t_FLOAT.__doc__ = r'((((\d+(\.\d*))|(\d*\.\d+))([eE][\+\-]{0,1}\d+)?)|(\d+[eE][\+\-]{0,1}\d+))[ ]*%?' lexer.t_COMMA.__doc__ = r'[ \t]*\,[ \t]*' # eats space ret = lex.lex(module=lexer) ret.decsep = '.' elif decimalSepType == '\xd9\xab': lexer.t_FLOAT.__doc__ = r'((((\d+(\xd9\xab\d*))|(\d*\xd9\xab\d+))([eE][\+\-]{0,1}\d+)?)|(\d+[eE][\+\-]{0,1}\d+))[ ]*%?' lexer.t_COMMA.__doc__ = r'[ \t]*\;[ \t]*' # eats space ret = lex.lex(module=lexer) ret.decsep = '\xd9\xab' return ret
def __init__(self): import grammar block.block.__init__(self, parser.pushstream(lex.lex(proarkhe.illiterate()))) self.decladd("OMEGA", "ordinals") self.decladd("natural", "type") self.decladd("integer", "type") self.decladd("real", "type") self.decladd("Boolean", "type") self.decladd("true", "Boolean") self.decladd("false", "Boolean") self.visit(grammar.parentset) self.walk(grammar.tokenset)
def assert_line(self, s, a): s = s.replace('\\n', '\n') s = s.replace('\\r', '\r') tt = lex(s) r = [] for t in tt: r.append(t.line) if len(r) == len(a) + 2: a = a[:] a.append(a[-1]) a.append(a[-1]) assert_equal(r, a)
def doLex(utf8string,cLocale): l = lex.lex() l.input(utf8string) fragments = [] while True: val = l.token() if not val: break else: fragments.append(val.value) return FragHandler(fragments)
def construct_ast(program_text): tokens = lex(program_text, token_rules) # todo: this kind of post-lexing processing should be specified by the caller somehow. tokens = [token for token in tokens if token.klass.name != "whitespace"] right_derivation = parser.parse(tokens) parse_tree = construct_parse_tree(right_derivation, rules, tokens) remove_literal_tokens(parse_tree) for name in reducible_node_names: reduce_tail_recursive_nodes(parse_tree, name) return parse_tree
def create_globals(module, support, debug): global parser, lexer, m, spt if not parser: lexer = lex.lex() parser = yacc.yacc(method="LALR", debug=debug, write_tables=0) if module is not None: m = module else: m = refpolicy.Module() if not support: spt = refpolicy.SupportMacros() else: spt = support
def assert_lex(self, src, lexed): src = src.replace('\\n', '\n') src = src.replace('\\r', '\r') if lexed.endswith(' ...'): lexed = lexed[:-3] + 'Break() Eof()' l = lex(src) r = [] for t in l: r.append(str(t)) act = ' '.join(r) if act != lexed: print('Actual: ', act) print('Expected:', lexed) assert_equal(act, lexed)
def beginParse(program): yacc = lexyacc.yacc() try: result = yacc.parse(program.read(),lexer = lexmelon.lex()) aux = eval({},result) if isinstance(aux,bool): aux = str(aux).lower() if isinstance(aux,NodoBin): if aux.tipo == 'LISTA': print recorrer_list(aux) else: print aux except SyntaxError, e: token = e.token if token: print 'Error de sintaxis en linea ' + str(token.lineno) \ + ' cerca de token ' + '"' + str(token.value) + '"' else: print 'Error al final del programa'
def mk_rpn_query (query): """Transform a CCL query into an RPN query.""" # need to copy or create a new lexer because it contains globals # PLY 1.0 lacks __copy__ # PLY 1.3.1-1.5 have __copy__, but it's broken and returns None # I sent David Beazley a patch, so future PLY releases will # presumably work correctly. # Recreating the lexer each time is noticeably slower, so this solution # is suboptimal for PLY <= 1.5, but better than being thread-unsafe. # Perhaps I should have per-thread lexer instead XXX # with example/twisted/test.py set to parse_only, I get 277 parses/sec # with fixed PLY, vs. 63 parses/sec with broken PLY, on my 500 MHz PIII # laptop. copiedlexer = None if hasattr (lexer, '__copy__'): copiedlexer = lexer.__copy__ () if copiedlexer == None: copiedlexer = lex.lex () ast = yacc.parse (query, copiedlexer) return ast_to_rpn (ast)
def __init__(self,options,cparser): self.defines = ["inline=", "__inline__=", "__extension__=", "_Bool=uint8_t", "__const=const", "__asm__(x)=", "__asm(x)=", "CTYPESGEN=1"] # On OSX, explicitly add these defines to keep from getting syntax # errors in the OSX standard headers. if sys.platform == 'darwin': self.defines += ["__uint16_t=uint16_t", "__uint32_t=uint32_t", "__uint64_t=uint64_t"] self.matches = [] self.output = [] self.lexer = lex.lex(cls=PreprocessorLexer, optimize=1, lextab='lextab', outputdir=os.path.dirname(__file__), module=pplexer) self.options = options self.cparser = cparser # An instance of CParser
def constr_testing(value, constr, var_name): global names lexer = lex.lex() parser = yacc.yacc() # print parser.parse('ASSERT(NOT(123 = 123))') # print constr for index, eachvar in enumerate(var_name): str_value = [] for val in value[index]: if val != '': # TODO: input concrete value must be integer str_val = BitArray(uint = int(val), length = 8) str_value.append('0x' + str_val.hex) names[eachvar] = str_value #print names return ([constr[0]], yacc.parse(constr[1]))
def __init__(self, **kw): self.debug = kw.get('debug', 0) self.parse_result = None # undefined try: modname = os.path.split(os.path.splitext(__file__)[0])[1] \ + "_" + self.__class__.__name__ except: modname = "parser"+"_"+self.__class__.__name__ self.debugfile = modname + ".dbg" self.tabmodule = modname + "_" + "parsetab" #print self.debugfile, self.tabmodule # Build the lexer and parser self.ctx = Context() ryacc.ctx = self.ctx self.lexer = lex.lex(module=self, debug=self.debug) self.parser = ryacc.yacc(module=self, debug=self.debug, debugfile=self.debugfile, tabmodule=self.tabmodule, write_tables=0, optimize=1)
def __init__(self, gcc_search_path=True): yacc.Parser.__init__(self) self.lexer = lex.lex(cls=PreprocessorLexer) PreprocessorGrammar.get_prototype().init_parser(self) # Map system header name to data, overrides path search and open() self.system_headers = {} self.include_path = ['/usr/local/include', '/usr/include'] if sys.platform == 'darwin': self.framework_path = ['/System/Library/Frameworks', '/Library/Frameworks'] else: self.framework_path = [] if gcc_search_path: self.add_gcc_search_path() self.lexer.filename = '' self.defines = {} self.namespace = PreprocessorNamespace()
def new(): t_AND = r"\&" t_ANDAND = r"\&\&" t_ANDEQ = r"\&=" t_BACKSLASH = r"\\" t_COLON = r":" t_DIV = r"\/" t_DIVEQ = r"\/=" t_DOT = r"\." t_DOTDIV = r"\./" t_DOTEXP = r"\.\^" t_DOTMUL = r"\.\*" t_DOTMULEQ = r"\.\*=" t_EQ = r"=" t_EQEQ = r"==" t_EXP = r"\^" t_GE = r">=" t_GT = r"\>" t_HANDLE = r"\@" t_LE = r"<=" t_LT = r"\<" t_MINUS = r"\-" t_MINUSEQ = r"\-=" t_MINUSMINUS = r"\--" t_MUL = r"\*" t_MULEQ = r"\*=" t_NE = r"(~=)|(!=)" t_NEG = r"\~|!" t_OR = r"\|" t_OREQ = r"\|=" t_OROR = r"\|\|" t_PLUS = r"\+" t_PLUSEQ = r"\+=" t_PLUSPLUS = r"\+\+" states = (("matrix", "inclusive"), ("afterkeyword", "exclusive")) ws = r"(\s|(\#|%).*\n|\.\.\..*\n|\\\n)" ws1 = ws + "+" ws0 = ws + "*" ms = r"'([^']|(''))*'" os = r'"([^"\a\b\r\t\0\v\n\\]|(\\[abn0vtr\"\n\\]))*"' mos = "(%s)|(%s)" % (os, ms) id = r"[a-zA-Z_][a-zA-Z_0-9]*" def unescape(s): if s[0] == "'": return s[1:-1].replace("''", "'") else: return s[1:-1].decode('string_escape') # return s[1:-1].replace('\\"','"').replace('\\\n','').replace('\\n','\n').replace('\\\\','\\').replace('\\0','\0').replace('\\v','\v').replace('\\t','\t').replace('\\r','\r') @TOKEN(mos) def t_afterkeyword_STRING(t): t.value = unescape(t.value) t.lexer.begin("INITIAL") return t def t_afterkeyword_error(t): raise SyntaxError # A quote, immediately following any of: (1) an alphanumeric # charater, (2) right bracket, parenthesis or brace, # or (3) another TRANSPOSE, is a TRANSPOSE. Otherwise, it starts a # string. The order of the rules for TRANSPOSE (first) and STRING # (second) is important. Luckily, if the quote is separated from # the term by line continuation (...), matlab starts a string, so # the above rule still holds. def t_TRANSPOSE(t): r"(?<=\w|\]|\)|\})((\.')|')+" # <---context ---><-quotes-> # We let the parser figure out what that mix of quotes and # dot-quotes, which is kept in t.value, really means. return t @TOKEN(mos) def t_STRING(t): t.value = unescape(t.value) return t @TOKEN(r"(\.%s)?%s" % (ws0, id)) def t_IDENT(t): t.lexer.lineno += t.value.count("\n") if t.value[0] == ".": # Reserved words are not reserved when used as fields. # So return=1 is illegal, but foo.return=1 is fine. t.type = "FIELD" return t if t.value in ("endwhile", "endfunction", "endif", "endfor", "endswitch", "end_try_catch"): # octave t.type = "END_STMT" return t if t.value == "end": if t.lexer.parens > 0 or t.lexer.brackets > 0 or t.lexer.braces > 0: t.type = "END_EXPR" else: t.type = "END_STMT" else: t.type = reserved.get(t.value, "IDENT") if t.type != "IDENT" and t.lexer.lexdata[t.lexer.lexpos] == "'": t.lexer.begin("afterkeyword") return t def t_LPAREN(t): r"\(" t.lexer.parens += 1 return t def t_RPAREN(t): r"\)" t.lexer.parens -= 1 return t @TOKEN(ws0 + r"\]") def t_RBRACKET(t): # compare w t_LBRACKET t.lexer.lineno += t.value.count("\n") t.lexer.brackets -= 1 if t.lexer.brackets + t.lexer.braces == 0: t.lexer.begin("INITIAL") return t @TOKEN(r"\[" + ws0) def t_LBRACKET(t): # compare w t_SEMI t.lexer.lineno += t.value.count("\n") t.lexer.brackets += 1 if t.lexer.brackets + t.lexer.braces == 1: t.lexer.begin("matrix") return t # maybe we need a dedicated CELLARRAY state ??? @TOKEN(ws0 + r"\}") def t_RBRACE(t): t.lexer.lineno += t.value.count("\n") t.lexer.braces -= 1 if t.lexer.braces + t.lexer.brackets == 0: t.lexer.begin("INITIAL") return t @TOKEN(r"\{" + ws0) def t_LBRACE(t): t.lexer.lineno += t.value.count("\n") t.lexer.braces += 1 if t.lexer.brackets + t.lexer.braces == 1: t.lexer.begin("matrix") return t @TOKEN(r"," + ws0) def t_COMMA(t): # eating spaces is important inside brackets t.lexer.lineno += t.value.count("\n") if (t.lexer.brackets == 0 and t.lexer.parens == 0 and t.lexer.braces == 0): t.type = "SEMI" return t return t @TOKEN(r"\;" + ws0) def t_SEMI(t): t.lexer.lineno += t.value.count("\n") # if t.lexer.brackets or t.lexer.braces > 0: # t.type = "CONCAT" return t def t_NUMBER(t): r"(0x[0-9A-Fa-f]+)|((\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?[ij]?)" if t.value[-1] == 'i': t.value = t.value[:-1] + 'j' t.value = eval(t.value) return t def t_NEWLINE(t): r'\n+' t.lexer.lineno += len(t.value) if not t.lexer.parens and not t.lexer.braces: t.value = ";" t.type = "SEMI" return t def t_comment(t): r"(%|\#).*" pass # @TOKEN(ws+r"(?=[-+]\S)") # def t_matrix_WHITESPACE(t): # #r"\s+(?=[-+]\S)" # # Whitespace, followed by + or - followed by anything but whitespace # t.lexer.lineno += t.value.count("\n") # t.type = "COMMA" # return t @TOKEN(r"(?<=\w)" + ws1 + r"(?=\()") def t_matrix_BAR(t): # Consume whitespace which follows end of name # and is followed a left paren. This properly handles # a space between a func name and the arguments pass tend = r"(?<=[])}'\".]|\w)" tbeg = r"(?=[-+]?([[({'\"]|\w|\.\d))" @TOKEN(tend + ws1 + tbeg) def t_matrix_FOO(t): # In matrix state, consume whitespace separating two # terms and return a fake COMMA token. This allows # parsing [1 2 3] as if it was [1,2,3]. Handle # with care: [x + y] vs [x +y] # # A term T is # (a) a name or a number # (b) literal string using single or doble quote # (c) (T) or [T] or {T} or T' or +T or -T # # Terms end with # (1) an alphanumeric charater \w # (2) single quote (in octave also double-quote) # (3) right parenthesis, bracket, or brace # (4) a dot (after a number, such as 3. # # The pattern for whitespace accounts for ellipsis as a # whitespace, and for the trailing junk. # # Terms start with # (1) an alphanumeric character # (2) a single or double quote, # (3) left paren, bracket, or brace and finally # (4) a dot before a digit, such as .3 . # TODO: what about curly brackets ??? # TODO: what about dot followed by a letter, as in field # [foo .bar] t.lexer.lineno += t.value.count("\n") t.type = "COMMA" return t def t_ELLIPSIS(t): r"\.\.\..*\n" t.lexer.lineno += 1 pass def t_SPACES(t): r"(\\\n|[ \t\r])+" pass def t_error(t): column = t.lexer.lexpos - t.lexer.lexdata.rfind( "\n", 0, t.lexer.lexpos) raise IllegalCharacterError(t.lineno, column, t.value[0]) lexer = lex.lex(reflags=re.I) lexer.brackets = 0 # count open square brackets lexer.parens = 0 # count open parentheses lexer.braces = 0 # count open curly braces return lexer
def tdiCompile(text, replacementArgs=_replacementArgs(())): import lex if isinstance(replacementArgs, tuple): return tdiCompile(text, _replacementArgs(replacementArgs)) elif not isinstance(replacementArgs, _replacementArgs): raise Exception( "Second argument to tdiCompile, if suppied, must by a tupple") ### Lexical Tokens tokens = [ 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'EQUAL', 'EQUALS', 'LPAREN', 'RPAREN', 'LBRACE', 'RBRACE', 'LBRACKET', 'RBRACKET', 'COMMA', 'BU', 'B', 'WU', 'W', 'LU', 'L', 'QU', 'Q', 'FloatNum', 'T', 'T2', 'IDENT', 'PLACEHOLDER', 'NAME', 'ARROW', 'GREATER', 'LESS', 'RAISE', 'GREATER_EQUAL', 'LESS_EQUAL', 'NOT_EQUAL', 'QUESTION', 'COLON', 'LSHIFT', 'RSHIFT', 'SEMICOLON', 'IAND', 'AND', 'NOT', 'PLUSPLUS', 'MINUSMINUS', 'SLASHSLASH', 'IOR', 'OR', 'INOT', 'EQUALSFIRST', 'TREEPATH', 'BACKQUOTE', ] ### Reserved keywords reserved = { 'if': 'IF', 'else': 'ELSE', 'public': 'IDENTTYPE', 'private': 'IDENTTYPE', 'fun': 'FUN', 'in': 'ARGTYPE', 'out': 'ARGTYPE', 'inout': 'ARGTYPE', 'optional': 'ARGTYPE', 'as_is': 'ARGTYPE', 'switch': 'SWITCH', 'case': 'CASE', 'for': 'FOR', 'while': 'WHILE', 'break': 'BREAK', 'continue': 'CONTINUE', 'not': 'NOT_S', 'and': 'AND_S', 'or': 'OR_S', 'nor': 'NOR_S', 'mod': 'MOD_S', 'eq': 'EQ_S', 'ne': 'NE_S', 'gt': 'GT_S', 'ge': 'GE_S', 'lt': 'LT_S', 'le': 'LE_S', 'default': 'DEFAULT', } tokens += list(set(reserved.values())) ### ignore comments denoted by /* ..... */ NOTE: Nested comments allowed which required the states trick states = (('nestcomment', 'exclusive'), ) def t_nestcomment_comment(t): r'(.|\n)*?(\*/|/\*)' if t.value[-2:] == '/*': t.lexer.push_state('nestcomment') else: t.lexer.pop_state() def t_COMMENT(t): r'(/\*(.|\n)*?(\*/|/\*))' if t.value[-2:] == '/*': t.lexer.push_state('nestcomment') t.lexer.push_state('nestcomment') ### integer token including hex,binary,octal and decimal integer = r'0[Xx][0-9A-Fa-f]+|0[Bb][01]+|0[0-7]+|[1-9]+[0-9]*|0' def fix_backquotes(in_str): import re def replace_backquote_string(match): mstr = match.group(0) if len(mstr) > 4: ans = mstr elif mstr[1] == '\\': ans = mstr elif mstr[1] in 'mntr': ans = eval("'" + mstr + "'") else: ans = chr(int(mstr[1:], 8)) return ans ans = re.sub(r'\\[0-7]+|\\[\\mntr]', replace_backquote_string, in_str) return ans ### string token with double quotes converted to String() instance @lex.TOKEN(r'"(?:[^"\\]|\\.)*"') def t_T(t): t.value = String( fix_backquotes(t.value).replace('\\"', '"').replace("\\'", "'").replace( '\\\\', '\\')[1:-1]) return t ### string token with single quotes converted to String() instance @lex.TOKEN(r"'(?:[^'\\]|\\.)*'") def t_T2(t): t.value = String( fix_backquotes(t.value).replace("\\'", "'").replace('\\"', '"').replace( '\\\\', '\\')[1:-1]) return t ### unsigned byte token converted to Uint8() instance @lex.TOKEN(r'(?i)(byte_unsigned|unsigned_byte)\((?P<number1>(' + integer + r'))\)|(?P<number2>(' + integer + r'))(bu|ub)') def t_BU(t): t.value = Uint8( int( t.lexer.lexmatch.group('number1') or t.lexer.lexmatch.group('number2'), 0)) return t ### unsigned word converted to Uint16() instance @lex.TOKEN(r'(?i)(word_unsigned|unsigned_word)\((?P<number1>(' + integer + r'))\)|(?P<number2>(' + integer + r'))(wu|uw)') def t_WU(t): t.value = Uint16( int( t.lexer.lexmatch.group('number1') or t.lexer.lexmatch.group('number2'), 0)) return t ### signed word converted to Int16() instance @lex.TOKEN(r'(?i)word\((?P<number1>(' + integer + r'))\)|(?P<number2>(' + integer + r'))w') def t_W(t): t.value = Int16( int( t.lexer.lexmatch.group('number1') or t.lexer.lexmatch.group('number2'), 0)) return t ### unsigned quadword converted to Uint64() instance @lex.TOKEN(r'(?i)(quadword_unsigned|unsigned_quadword)\((?P<number1>(' + integer + r'))\)|(?P<number2>(' + integer + r'))(uq|qu)') def t_QU(t): t.value = Uint64( int( t.lexer.lexmatch.group('number1') or t.lexer.lexmatch.group('number2'), 0)) return t ### unsigned int converted to Uint32() instance @lex.TOKEN(r'(?i)(long_unsigned|unsigned_long)\((?P<number1>(' + integer + r'))\)|(?P<number2>(' + integer + r'))(lu|ul|u)') def t_LU(t): t.value = Uint32( int( t.lexer.lexmatch.group('number1') or t.lexer.lexmatch.group('number2'), 0)) return t ### signed quadword converted to Int64() instance @lex.TOKEN(r'(?i)quadword\((?P<number1>(' + integer + r'))\)|(?P<number2>(' + integer + r'))q') def t_Q(t): t.value = Int64( int( t.lexer.lexmatch.group('number1') or t.lexer.lexmatch.group('number2'), 0)) return t ### Float instance converted to either Float32() or Float64() instance @lex.TOKEN( r'(?i)([0-9]+\.(?!\.)[0-9]*|[0-9]*\.[0-9]+|[0-9]+)(?P<exp>([dgef]))[-+]?[0-9]+|[0-9]+\.(?!\.)[0-9]*|[0-9]*\.[0-9]+' ) def t_FloatNum(t): exp = t.lexer.lexmatch.group('exp') if exp is not None: exp = exp.lower() val = t.value.lower().replace('d', 'e').replace('g', 'e').replace('f', 'e') if exp is None or exp == 'e' or exp == 'f': t.value = Float32(val) else: t.value = Float64(val) if 'inf' in repr(t.value.data()): t.value = Float32(val) return t ### signed byte converted to Int8() instance @lex.TOKEN(r'(?i)byte\((?P<number1>(' + integer + '))\)|(?P<number2>(' + integer + '))b') def t_B(t): t.value = Int8( int( t.lexer.lexmatch.group('number1') or t.lexer.lexmatch.group('number2'), 0)) return t ### signed int converted to Int32() instances. NOTE must be end of the scalar tokens to work for some reason. @lex.TOKEN(r'(?i)long\((?P<number1>(' + integer + '))\)|(?P<number2>(' + integer + '))l?') def t_L(t): t.value = Int32( int( t.lexer.lexmatch.group('number1') or t.lexer.lexmatch.group('number2'), 0)) return t ### Ident or builtin constant converted to either Ident() instance or a Builtin() instance for constants such as $PI @lex.TOKEN( r'(?i)(\$([a-z]+[a-z0-9_\$]*)|([0-9]+[a-z_\$]+[a-z0-9_\$]*))|(_[a-z0-9_\$]*)' ) def t_IDENT(t): if t.value.lower() == "$roprand": import numpy as np t.value = np.frombuffer(np.getbuffer(np.int32(2147483647)), dtype=np.float32)[0] else: try: t.value = Builtin(t.value, ()) except Exception: t.value = Ident(t.value) return t ### Placeholders @lex.TOKEN(r'\$[1-9]*[0-9]*') def t_PLACEHOLDER(t): if len(t.value) == 1: idx = replacementArgs.idx else: idx = int(t.value[1:]) if idx <= len(replacementArgs.args): t.value = makeData(replacementArgs.args[idx - 1]) else: raise Exception( '%TDI-E-TdiMISS_ARG, Missing argument is required for function' ) replacementArgs.idx = idx + 1 return t ### Tree path \[treename::]tagname[.|:]node[.|:]node... pname = r'[a-z][a-z0-9$_]*' @lex.TOKEN(r'(?i)(((\\(' + pname + r'::)?|[\.:])?' + pname + r')|(\.-(\.?-)*))([\.:]' + pname + r')*') def t_TREEPATH(t): if t.value.lower() in reserved: t.type = reserved[t.value.lower()] else: import re original_value = t.value if re.match(r'[\s]*(\(|->)', t.lexer.lexdata[t.lexer.lexpos:]) is not None: skip = t.value.find(':') if skip == 0: t.lexer.lexpos = t.lexer.lexpos - len(t.value) + 1 t.type = 'COLON' t.value = ':' else: if skip > -1: t.lexer.lexpos = t.lexer.lexpos - len(t.value) + skip t.value = t.value[0:skip] t.type = 'NAME' else: try: t.value = Tree().getNode(t.value) except: if t.value[0] in '.:': t.value = '\\' + Tree().tree + '::TOP' + t.value elif t.value[0] == '\\': if t.value.find('::') == -1: t.value = '\\' + Tree().tree + '::' + t.value[1:] else: t.value = '\\' + Tree().tree + '::TOP:' + t.value t.value = TreePath(t.value.upper()) t.value.original_value = original_value return t ### Various operators t_PLUS = r'\+' t_MINUS = r'-' t_TIMES = r'\*' t_DIVIDE = r'/' t_EQUALS = r'==' t_EQUAL = r'=' t_LPAREN = r'\(' t_RPAREN = r'\)' t_LBRACE = r'{' t_RBRACE = r'}' t_LBRACKET = r'\[' t_RBRACKET = r'\]' t_COMMA = r',' t_ARROW = r'->' t_GREATER = r'>' t_GREATER_EQUAL = r'>=' t_LESS = r'<' t_LESS_EQUAL = r'<=' t_NOT_EQUAL = r'!=|<>' t_RAISE = r'\^|\*\*' t_QUESTION = r'\?' t_LSHIFT = r'<<' t_RSHIFT = r'>>' t_SEMICOLON = r';' t_IAND = r'&' t_AND = r'&&' t_NOT = r'!' t_PLUSPLUS = r'\+\+' t_MINUSMINUS = r'--' t_SLASHSLASH = r'//' t_IOR = r'\|' t_OR = r'\|\|' t_INOT = r'~' t_EQUALSFIRST = r'\+=|-=|\*=|/=|\^=|\*\*=|<==|>==|>>=|<<=|&=|&&=|!==|\|=|\|\|=|//=' t_BACKQUOTE = r'`' def t_COLON(t): r'\.\.|:' t.value = ':' return t ### Name token which begins with an alpha followed by zero or more of aphanumeric or underscore ### or a reserved word token such as if, while, switch, for ... def t_NAME(t): r'(?i)\b[a-z]+[a-z0-9_]*\b' t.type = reserved.get(t.value.lower(), 'NAME') return t # Define a rule so we can track line numbers def t_newline(t): r'\n+' t.lexer.lineno += len(t.value) # Error handling rule def t_ANY_error(t): print("Illegal character '%s'(%d) at line %d around '%s'" % (t.value[0], ord(t.value[0]), t.lexer.lineno, t.lexer.lexdata[t.lexer.lexpos - 10:t.lexer.lexpos + 10])) # t.lexer.skip(1) # A string containing ignored characters (spaces and tabs) t_ANY_ignore = ' \t\r\0' # Build the lexer lex.lex(debug=0, optimize=optimized, lextab='tdilextab') precedence = ( ('right', 'EQUAL'), ('right', 'COMMA'), ('left', 'COLON'), ('left', 'QUESTION'), ('left', 'OR', 'AND', 'OR_S', 'AND_S'), ('left', 'GREATER', 'GREATER_EQUAL', 'LESS', 'LESS_EQUAL', 'EQUALS', 'NOT_EQUAL', 'GT_S', 'GE_S', 'LT_S', 'LE_S', 'EQ_S', 'NE_S'), ('left', 'SLASHSLASH'), ('left', 'PLUS', 'MINUS', 'IOR', 'IAND'), ('left', 'TIMES', 'DIVIDE'), ('left', 'RAISE', 'MOD_S'), ('right', 'RSHIFT', 'LSHIFT', 'UNOP'), ('left', 'LBRACKET', 'LPAREN', 'IDENTTYPE'), ) def p_compilation(t): """compilation : statements\n| operand\n | operand SEMICOLON """ t[0] = t[1] if isinstance(t[0], Builtin) and len(t[0].args) == 2 and isinstance( t[0].args[0], String) and isinstance(t[0].args[1], String): t[0] = String(str(t[0].args[0]) + str(t[0].args[1])) ### operands can be arguments to operators def p_operand(t): """operand : scalar\n| operation\n| parenthisized_operand\n| ident\n| vector\n| TREEPATH""" t[0] = t[1] ### Subscripting (i.e. _a[32]) def p_subscript(t): """operation : operand vector""" if len(t) == 2: t[0] = t[1] else: args = [ t[1], ] if isinstance(t[2], Builtin): for arg in t[2].args: args.append(arg) else: for arg in t[2]: args.append(arg) t[0] = Builtin('subscript', tuple(args)) ### parenthisized operands such as (1+2) for specifying things like (1+2)*10 def p_parenthisized_operand(t): 'parenthisized_operand : LPAREN operand RPAREN' t[0] = t[2] ### Basic scalars supported by MDSplus def p_scalar(t): 'scalar : BU \n| B \n| WU \n| W \n| LU \n| L \n| QU \n| Q \n| FloatNum \n| T \n| T2 \n| missing' t[0] = t[1] ### Ken variable (i.e. _gub or public _gub) def p_ident(t): """ident : IDENT\n| PLACEHOLDER\n| IDENTTYPE IDENT""" if len(t) == 2: t[0] = t[1] else: t[0] = Builtin(t[1], (str(t[2]), )) ### Missing value specified by asterisk def p_missing(t): 'missing : TIMES' t[0] = makeData(None) ### Range constructor (a : b [:c]) def p_range(t): """range : range COLON operand\n| operand COLON operand""" if isinstance(t[1], list): t[1].append(t[3]) t[0] = t[1] else: t[0] = [t[1], t[3]] def p_op_range(t): """operation : range""" t[0] = Range(tuple(t[1])) ### Loop control operations (i.e. break, continue) def p_loop_control(t): 'operation : BREAK\n| CONTINUE' t[0] = Builtin(t[1], tuple()) ### Unary arithmetic operations such as ~a, -a def p_unaryop(t): """operation : NOT operand %prec UNOP\n| INOT operand %prec UNOP\n| MINUS operand %prec UNOP\n| PLUS operand %prec UNOP | NOT_S operand %prec UNOP""" ops = { '!': 'NOT', '~': 'INOT', '-': 'UNARY_MINUS', 'not': 'NOT', '+': 'UNARY_PLUS' } if t[1] == '-' and isinstance(t[2], Scalar): t[0] = makeData(-t[2].data()) elif t[1] == '+' and isinstance(t[2], Scalar): t[0] = t[2] else: t[0] = Builtin(ops[t[1].lower()], (t[2], )) ### Binary arithmetic operations such as a+b a>=b a^b a&&b def p_binop(t): """operation : operand PLUS operand | operand MINUS operand\n| operand TIMES operand\n| operand DIVIDE operand | operand RAISE operand\n| operand RSHIFT operand\n| operand LSHIFT operand | operand LESS operand\n| operand GREATER operand\n| operand LESS_EQUAL operand | operand GREATER_EQUAL operand\n| operand EQUALS operand \n| operand IAND operand | operand AND operand \n| operand OR operand \n| operand NOT_EQUAL operand | operand IOR operand\n| operand AND_S operand \n| operand OR_S operand\n| operand NOR_S operand | operand MOD_S operand | MOD_S LPAREN operand COMMA operand RPAREN | operand GT_S operand\n| operand GE_S operand\n| operand LT_S operand\n| operand LE_S operand | operand EQ_S operand\n| operand NE_S operand """ ops = { '+': 'add', '-': 'subtract', '*': 'multiply', '/': 'divide', '<': 'lt', '>': 'gt', '^': 'power', '**': 'power', '<=': 'le', '>=': 'ge', '==': 'eq', '>>': 'shift_right', '<<': 'shift_left', '&': 'iand', '&&': 'and', '!=': 'NE', '<>': 'NE', '|': 'ior', '||': 'or', 'and': 'and', 'or': 'or', 'nor': 'nor', 'mod': 'MOD', 'gt': 'gt', 'ge': 'ge', 'lt': 'lt', 'le': 'le', 'eq': 'eq', 'ne': 'ne' } if len(t) == 4: t[0] = Builtin(ops[t[2].lower()], (t[1], t[3])) else: t[0] = Builtin(ops[t[1].lower()], (t[3], t[5])) ### Concatenation operator a // b [// c] ### Jump through hoops to emulate weird tdi behavior which concatenates string types at compile time except for the ### caveat that if the number of concatenation arguments is even and all strings concatenate the first n-1 and ### then make a concat function that concats the first n-1 with the nth, other wise concat them all. If any of the ### items being concatenated is not a string then don't concat anything at run time. class Concat(list): def get(self): compile_time_concat = True for arg in self: if not isinstance(arg, (str, String)): compile_time_concat = False break if compile_time_concat: c = list() c.append(self[0]) if len(self) % 2 == 0: for arg in self[1:-1]: c[-1] = str(c[-1]) + str(arg) c.append(self[-1]) else: for arg in self[1:]: c[-1] = String(str(c[-1]) + str(arg)) if len(c) > 1: return Builtin('concat', tuple(c)) else: return c[0] else: return Builtin('concat', tuple(self)) def p_concat(t): 'concat : operand SLASHSLASH operand\n| concat SLASHSLASH operand\n operation : concat' if len(t) == 4: if isinstance(t[1], Concat): t[1].append(t[3]) t[0] = t[1] else: t[0] = Concat([t[1], t[3]]) else: t[0] = t[1].get() if isinstance(t[0], String): t.type = 'scalar' ### Conditional operation (i.e. a ? b : c) def p_conditional(t): 'operation : operand QUESTION operand COLON operand' t[0] = Builtin('conditional', (t[3], t[5], t[1])) ### Ident increment/decrement (i.e. _i++, _i--, ++_i, --_i) def p_inc_dec(t): """operation : ident PLUSPLUS\n| ident MINUSMINUS\n| PLUSPLUS ident\n| MINUSMINUS ident""" op = {'++': '_inc', '--': '_dec'} if isinstance(t[1], str): t[0] = Builtin('pre' + op[t[1]], (t[2], )) else: t[0] = Builtin('post' + op[t[2]], (t[1], )) ### Ken variable assignment (i.e. _i=1) def p_assignment(t): 'operation : operand EQUAL operand %prec EQUAL' t[0] = Builtin('EQUALS', (t[1], t[3])) ### Argument list for function calls (i.e. ([a[,b[,c]]]) ) def p_arglist(t): """arglist : LPAREN args RPAREN\n args :\n| args operand\n| args COMMA\n| args ARGTYPE LPAREN operand RPAREN""" if len(t) == 4: t[0] = t[2] elif len(t) == 1: t[0] = list() else: if len(t) == 6: t[2] = Builtin(t[2], (t[4], )) if isinstance(t[2], str): if len(t[1]) == 0: t[1].append(None) t[1].append(None) else: if len(t[1]) > 0 and (t[1][-1] is None or isinstance(t[1][-1], EmptyData)): t[1][-1] = t[2] else: t[1].append(t[2]) t[0] = t[1] ### Function call (i.e. gub(1,2,,3)) also handles build_xxx() and make_xxx() operations def p_function(t): """operation : NAME arglist\n| EQ_S arglist\n| NE_S arglist\n| LE_S arglist | LT_S arglist\n| GT_S arglist\n| GE_S arglist""" def doBuild(name, args): def build_with_units(args): args[0].units = args[1] return args[0] def build_with_error(args): args[0].error = args[1] return args[0] def build_param(args): try: args[0].help = args[1] args[0].validation = args[2] except: pass return args[0] def build_slope(args): new_args = list() if len(args) > 1: new_args.append(args[1]) else: new_args.append(None) if len(args) > 2: new_args.append(args[2]) else: new_args.append(None) new_args.append(args[0]) return Range(tuple(new_args)) def buildPath(args): if isinstance(args[0], (str, String)): name = str(args[0]) if len(name) > 1 and name[0:2] == '\\\\': name = name[1:] ans = TreePath(name) else: ans = Builtin('build_path', args) return ans def buildCall(args): ans = Call(args[1:]) ans.retType = args[0] return ans ### retain original node specifiers when building a using function def buildUsing(args_in): def restoreTreePaths(arg): if isinstance(arg, Compound): args = list() for a in arg.args: args.append(restoreTreePaths(a)) arg.args = tuple(args) ans = arg elif isinstance(arg, (TreePath, TreeNode)) and hasattr( arg, 'original_value'): ans = TreePath(arg.original_value) else: ans = arg return ans args = list() for arg in args_in: args.append(restoreTreePaths(arg)) ans = Builtin('using', tuple(args)) return ans known_builds = { 'BUILD_ACTION': Action, #BUILD_CONDITION':Condition, 'BUILD_CONGLOM': Conglom, 'BUILD_DEPENDENCY': Dependency, 'BUILD_DIM': Dimension, 'BUILD_DISPATCH': Dispatch, 'BUILD_EVENT': Event, 'BUILD_FUNCTION': Builtin, 'BUILD_METHOD': Method, 'BUILD_PARAM': build_param, 'BUILD_PROCEDURE': Procedure, 'BUILD_PROGRAM': Program, 'BUILD_RANGE': Range, 'BUILD_ROUTINE': Routine, 'BUILD_SIGNAL': Signal, 'BUILD_SLOPE': build_slope, 'BUILD_WINDOW': Window, 'BUILD_WITH_UNITS': build_with_units, 'BUILD_CALL': buildCall, 'BUILD_WITH_ERROR': build_with_error, 'BUILD_OPAQUE': Opaque, 'BUILD_PATH': buildPath, 'USING': buildUsing, } return known_builds[name.upper()](args) def doMake(name, args): for arg in args: if not isinstance( arg, (Array, Scalar, EmptyData)) and arg is not None: raise Exception('use make opcode') name = name.upper().replace('MAKE_', 'BUILD_') if 'BUILD_' in name: return doBuild(name, tuple(args)) else: raise Exception("not a make_ call") try: t[0] = doBuild(t[1], tuple(t[2])) except Exception: try: t[0] = doMake(t[1], tuple(t[2])) except Exception: try: numbers = [ 'byte', 'byte_unsigned', 'unsigned_byte', 'word', 'word_unsigned', 'unsigned_word', 'long', 'long_unsigned', 'unsigned_long', 'quadword', 'quadword_unsigned', 'unsigned_quadword', 'float', 'double', 'f_float', 'g_float', 'd_float', 'fs_float', 'ft_float' ] if t[1].lower() in numbers and (isinstance( t[2][0], Scalar) or isinstance(t[2][0], Array)): t[0] = Data.evaluate(Builtin(t[1], tuple(t[2]))) else: t[0] = Builtin(t[1], tuple(t[2])) except Exception: t[0] = Builtin('ext_function', tuple([None, t[1]] + t[2])) ### call library (i.e. library->gub(a,b,c)) def p_rettype(t): 'rettype : COLON NAME' rettypes = { 'bu': 2, 'wu': 3, 'lu': 4, 'qu': 5, 'b': 6, 'w': 7, 'l': 8, 'q': 9, 'f': 10, 'd': 11, 'fc': 12, 'dc': 13, 't': 14, 'dsc': 24, 'p': 51, 'f': 52, 'fs': 52, 'ft': 53, 'fsc': 54, 'ftc': 55 } if t[2].lower() in rettypes: t[0] = rettypes[t[2].lower()] def p_call(t): """operation : NAME ARROW NAME arglist\n| NAME ARROW NAME rettype arglist""" if len(t) == 5: t[0] = Call(tuple([t[1], t[3]] + t[4])) else: t[0] = Call(tuple([t[1], t[3]] + t[5]), opcode=t[4]) ### Loop and fun statements found inside braces and sometimes in parens def p_optional_semicolon(t): """optional_semicolon : SEMICOLON\n| empty""" pass class CommaList(list): def get(self): return Builtin('comma', tuple(self)) def p_statement(t): """statement : operand SEMICOLON\n| comma_list SEMICOLON\n| comma_list\n| operand\n| SEMICOLON """ if isinstance(t[1], str): pass elif isinstance(t[1], CommaList): t[0] = t[1].get() else: t[0] = t[1] def p_statements(t): """statements : statement\n| statements statement\n| statements braced_statements""" if len(t) == 2: t[0] = Builtin('statement', (t[1], )) else: if t[2] is None: t[0] = t[1] elif len(t[1].args) < 250: t[1].args = tuple(list(t[1].args) + [t[2]]) t[0] = t[1] else: t[0] = Builtin('statement', (t[1], t[2])) def p_braced_statements(t): """braced_statements : LBRACE statements RBRACE optional_semicolon\n | LBRACE RBRACE optional_semicolon""" if len(t) == 5: if len(t[2].args) == 1: t[0] = t[2].args[0] else: t[0] = t[2] else: pass ### paren statement list as in if_error(_a,(_a=1;_b++),42) def p_statement_list(t): 'operation : LPAREN statements RPAREN' if len(t[2].args) == 1: t[0] = t[2].args[0] else: t[0] = t[2] ### comma operand list as in _a=1,_b=2,3 def p_comma_list(t): """comma_list : COMMA\n| operand COMMA\n| comma_list COMMA\n| comma_list operand""" if isinstance(t[1], CommaList): if isinstance(t[2], str): if t[1].lastNone: t[1].append(None) else: t[1].append(t[2]) t[1].lastNone = False t[0] = t[1] else: t[0] = CommaList() if len(t) == 2: t[0].append(None) t[0].lastNone = True else: t[0].append(t[1]) t[0].lastNone = False ### comma operation as in (_a=1,_b=2,3) def p_comma_list_operation(t): 'operation : LPAREN comma_list RPAREN' t[0] = t[2].get() def p_empty(t): 'empty :' pass ### For statement (i.e. for (_x=1;_x<10;_x++){statements...} or for (...) statement def p_optional_comma_list(t): """optional_operand : comma_list\n| operand\n| empty""" if isinstance(t[1], CommaList): t[0] = t[1].get() else: t[0] = t[1] def p_for(t): """operation : FOR LPAREN optional_operand SEMICOLON operand SEMICOLON optional_operand RPAREN braced_statements | FOR LPAREN optional_operand SEMICOLON operand SEMICOLON optional_operand RPAREN statement""" t[0] = Builtin('for', (t[3], t[5], t[7], t[9])) ### If statement (i.e. if (_x<10) {_x=42;} else {_x=43;}) def p_if_begin(t): """if_begin : IF LPAREN operand RPAREN""" t[0] = t[3] def p_ifelse_body(t): """ifelse_body : braced_statements\n| statement""" t[0] = t[1] def p_if(t): """operation : if_begin ifelse_body\n| if_begin ifelse_body ELSE ifelse_body""" args = [t[1], t[2]] if len(t) > 3: args.append(t[4]) t[0] = Builtin('if', tuple(args)) ### While statement (i.e. while(expression){statements;} ) def p_while(t): """operation : WHILE LPAREN operand RPAREN braced_statements | WHILE LPAREN operand RPAREN statement""" t[0] = Builtin('while', (t[3], t[5])) ### FUN definition (i.e. public fun gub(args){statements} ) def p_fun_arg(t): """fun_arg : ARGTYPE IDENT\n| ARGTYPE ARGTYPE IDENT\n| IDENT\n| ARGTYPE LPAREN IDENT RPAREN\n| ARGTYPE ARGTYPE LPAREN IDENT RPAREN""" if len(t) == 2: t[0] = t[1] elif len(t) == 3: t[0] = Builtin(t[1], (str(t[2]), )) elif len(t) == 4: t[0] = Builtin(t[1], (Builtin(t[2], (str(t[3]), )), )) elif len(t) == 5: t[0] = Builtin(t[1], (t[3], )) else: t[0] = Builtin(t[1], (Builtin(t[2], (t[4], )), )) def p_fun_args(t): """fun_args : LPAREN\n| fun_args fun_arg\n| fun_args COMMA\n| fun_args RPAREN""" if len(t) == 2: t[0] = list() elif isinstance(t[2], str): t[0] = t[1] else: t[1].append(t[2]) t[0] = t[1] def p_fun(t): """operation : IDENTTYPE FUN NAME fun_args braced_statements | FUN IDENTTYPE NAME fun_args braced_statements | FUN NAME fun_args braced_statements""" args = list() if len(t) == 6: if t[1].lower() == 'fun': itype = t[2] else: itype = t[1] args.append(Builtin(itype, (t[3], ))) args.append(t[5]) for arg in t[4]: args.append(arg) else: args.append(t[2]) args.append(t[4]) for arg in t[3]: args.append(arg) t[0] = Builtin('fun', tuple(args)) ### Vector/Array declarations (i.e. [_a,_b,_c] or [1,2,3,]) def p_vector(t): """vector_part : LBRACKET operand | LBRACKET | vector_part COMMA operand vector : vector_part RBRACKET""" if isinstance(t[1], str): if len(t) == 2: t[0] = Builtin('vector', tuple()) t[0].isarray = True else: t[0] = Builtin('vector', (t[2], )) t[0].isarray = isinstance(t[2], Scalar) or isinstance( t[2], Array) elif t[2] == ',': args = list(t[1].args) if len(args) > 250: args = [Builtin('vector', tuple(args)), t[3]] else: args.append(t[3]) t[1].args = tuple(args) t[0] = t[1] t[0].isarray = t[1].isarray and (isinstance(t[3], Scalar) or isinstance(t[3], Array)) else: if t[1].isarray: t[0] = Data.evaluate(t[1]) else: t[0] = Builtin('vector', t[1].args) ### Switch statement (i.e. switch(_a) {case(42) {statements} case(43) {statements}} ) def p_case(t): """case : CASE LPAREN operand RPAREN braced_statements\n| CASE LPAREN operand RPAREN statement | CASE LPAREN operand RPAREN\n| CASE DEFAULT braced_statements | CASE DEFAULT statement\n| statement""" if len(t) == 4: t[0] = Builtin('default', (t[3], )) elif len(t) == 5: t[0] = Builtin('case', (None, None)) t[0].args = (t[3], ) t[0].doAppendCase = True elif len(t) == 6: t[0] = Builtin('case', (t[3], t[5])) else: t[0] = t[1] def p_cases(t): """cases : case\n| cases case""" def findCaseWithNoStatements(case, parent=None, argidx=0):
def __init__(self): '''Initiate logging, open a file to store tokens, build the lexer.''' self.logger = logging.getLogger("W2L") self.lexer = lex.lex(module=self, reflags=re.DOTALL)
import lex tokens = ["a","b","c","d"] t_a = r'a' t_b = r'b' t_c = r'c' t_d = r'd' # Error handling rule def t_error(t): print("Hay un caracter no valido") t.lexer.skip(1) lex.lex() # Build the lexer lex.input("abcx") while True: tok = lex.token() if not tok: break print (str(tok.value))
def __init__(self, input): self.lexer = lex.lex() self.lexer.input(input)
r"\-?[0-9]+" t.value = int(t.value) return t def t_COMMENT(t): r"//.*" return t def t_CO(t): r":" return t lex.lex() #---------------------------------------------------------- commands = [] symbols = {} def p_stuff(p): """stuff : | statement stuff""" pass def p_statement_comment(p): 'statement : COMMENT'
def assert_tokens(self, inp, want): got = [x.tok for x in preparse(lex(inp))] self.assertEqual(got, want)
def cmd_lex(self, code): return lex(code)
def rest(self): return self.buf[self.count:] import readline if __name__ == "__main__": import proarkhe import grammar import parser import block import exp import lex while True: rep = repreader(["; ", "+ "]) e = grammar.S(parser.pushstream(lex.lex(rep))) try: parsed = e.parse() except EOFError: break except lex.lex.lexerror as lexexp: continue if not parsed: print("Cannot parse: '{}'@{} '{}'".format(rep.buf, rep.count, rep.rest())) elif rep.rest() and not rep.rest().isspace(): # for whatever reason "".isspace() is False print("Garbage after S: '{}' ({})".format(rep.buf, len(rep.buf))) else:
def emmet(string): return _emmet_tokens(parse(lex(string))[0])
from treeOptimizer import treeOptimizer from codeOptimizer import codeOptimizer def print_line(): print('--------------------------------------------------') if __name__ == '__main__': print('Complier Start') print('Lex') print_line() lex_obj = lex('test/case_09.txt') lex_obj.test() print('\nSyn') print_line() syn_obj = syn(lex_obj) syn_obj.build() syn_obj.test() print('\nSemantics') print_line() semantics_obj = semantics(syn_obj) semantics_obj.check() print('\nIrGenerate') print_line()
def t_GEQ(t): r'>' return t def t_NEWLINE(t): r"\n" t.lexer.lineno += 1 pass t_ignore = '\t ' def t_error(t): print("Illegal character '" + t.value[0] + "' at line " + str(t.lexer.lineno)) t.lexer.skip(1) data = r"""""" lexer = lex.lex(debug=0) # lexer.input('') # # while True: # tok = lexer.token() # if not tok: break # # if tok.type == 'ID' or tok.type == 'DECIMAL': # # print(tok.type + "(" + tok.value + ")") # # else: # print(tok.type + "(" + tok.value + ")")
#ignored characters t_ignore = " \t" def t_newline(t): r'\n+' t.lexer.lineno += t.value.count("\n") def t_error(t): print("Illegal character '%s'" % t.value[0]) t.lexer.skip(1) lexer_instance = lex.lex() if __name__ == '__main__': sample_program = """ class Factorial { public static void main(String[] a){ System.out.println(new Fac().ComputeFac(10)); } } class Fac { public int ComputeFac(int num){ in num_aux; if (num < 1) num_aux = 1;
stmt.stmts, block.decls]]): return [node.children[0]] elif any([ parser.istoken(node, t) for t in ["[", "]", "(", ")", ";", "|", ":", ":="] ]): return [] else: return [node] import trex import reader identities = [ trex.trex(parser.pushstream(lex.lex(reader.stringreader(s)))) for s in [ '"0" ; "+" ; x:. ;^>add>', # 0 + x == x 'x:. ; "+" ; "0" ;^>add>', # x + 0 == x 'x:. ; "-" ; "0" ;^>add>', # x - 0 == x 'x:"OMEGA" ; "+" ; . ;^>add>', # OMEGA + x == OMEGA 'x:"OMEGA" ; "-" ; . ;^>add>', # OMEGA - x == OMEGA '. ; "+" ; x:"OMEGA" ;^>add>', # x + OMEGA == OMEGA '"1" ; "*" ; x:. ;^>mul>', # 1 * x == x 'x:. ; "*" ; "1" ;^>mul>', # x * 1 == x 'x:. ; "/" ; "1" ;^>mul>', # x / 1 == x 'x:"0" ; "*" ; . ;^>mul>', # 0 * x == 0 'x:"0" ; "/" ; . ;^>mul>', # 0 / x == 0 '. ; "*" ; x:"0" ;^>mul>' ] ] # x * 0 == 0
def init_parser(): lex.lex(debug=0, optimize=1) yacc.yacc(debug=0, optimize=1)
def t_LGT(t): r'(<=)|[<>]|(>=)' return t def t_ASSIGN(t): r':=' return t t_ignore = ' \t\n' # Error handling rule def t_error(t): print("Illegal character: %s" % t.value[0]) t.lexer.skip(1) # Build the lexer lexer = lex.lex() # data = """ IFFALSE t < 3 GOTO 3 """ # # lexer.input(data) # while True: # tok = lexer.token() # if not tok: # break # No more input # print(tok)
def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0): self.lexer = lex.lex(debug=debug, optimize=optimize, lextab=lextab, reflags=reflags) self.token_stream = None
# String literal t_SCONST = r'\"([^\\\n]|(\\.))*?\"' # Character constant 'c' or L'c' t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\'' def t_error(t): print("Illegal character %s" % repr(t.value[0])) t.skip(1) # return t lexer = lex.lex(optimize=1) if __name__ == "__main__": # # use Use LOC as PETSC_DIR [for reading/writing relevant files] # try: PETSC_DIR = sys.argv[1] htmlmapfile = sys.argv[2] except: raise RuntimeError('Insufficient arguments. Use: ' + sys.argv[0] + 'PETSC_DIR htmlmap') # get the version string for this release try: fd = open(os.path.join(PETSC_DIR, 'include', 'petscversion.h'))
while token is not None: output_file.write("type:" + token.type) output_file.write(" value:" + str(token.value)) output_file.write(" line:" + str(token.lineno)) output_file.write(" position:" + str(token.lexpos)) output_file.write("\n") token = lexer.token() if __name__ == "__main__": if len(argv) != 3: print( "Parametros invalidos.") print( "Uso:") print( " lexer.py archivo_entrada archivo_salida") exit() input_file = open(argv[1], "r") text = input_file.read() input_file.close() lexer = lex.lex(module=lexer_rules) lexer.input(text) output_file = open(argv[2], "w") dump_tokens(lexer, output_file) output_file.close()
def __init__(self): lex.lex(module=self) yacc.yacc(module=self)
def t_FNAME(t): r'[a-zA-Z_][a-zA-Z0-9_]*\w*\(' s = t.value m = re_name.match(s) if m: res = m.group(1) t.value = res return t t_ignore = " \t" def t_error(t): print "Illegal character '%s'" % t.value[0] t.skip(1) lex.lex(lextab='_lextab', optimize=1) ############################################################################# # yacc def _linize_list(list): reslist = [] for item in list: reslist += item return reslist # Parsing rules precedence = ( ('left', 'ADD', 'SUB', 'COMMA', "CONCAT", "EQ", "NE", "LE", "GE", "LT", "GT", 'POWER'), ('left', 'MUL', 'DIV'),
def pascal_lex(data): return lex.lex(data, token_expressions)
def t_mod(t): r'%.*\n' t.lineno += 1 # Comments def t_comment(t): r' /\*(.|\n)*?\*/' t.lineno += t.value.count('\n') def t_error(t): print "Illegal character %s at %d type %s" % (repr(t.value[0]), t.lineno, t.type) t.skip(1) # Build the lexer import lex lex.lex(debug=0) # # Section: Helper classes and functions. # # Global variablers known_basics = {"int" : "pack_int", "enum" : "pack_enum", "unsigned_int" : "pack_uint", "unsigned" : "pack_uint", "hyper" : "pack_hyper", "unsigned_hyper" : "pack_uhyper", "float" : "pack_float", "double" : "pack_double", # Note: xdrlib.py does not have a
import sys from lex import lex from ast import parse if __name__ == "__main__": text = "" print("Type 'exit' or 'quit' to stop") while (True): text = input("> ") if text.lower() in ("exit", "quit"): break tokens = lex(text) #print(tokens) parse_result = parse(tokens) #print(parse_result) if not parse_result: sys.stderr.write("Parse Error!\\n") sys.exit(1) ast = parse_result.value env = {} #print(parse_result) #print(ast) print(ast.eval(env))
def relex(): global lexer mk_quals() lexer = lex.lex()
if type in tree.NAMED_NODE: print 'WARN: %s:%s Not specified title ' % (source, slineno) if not title or len(title) < 0: title = name return Node(type=type, value=value, name=name, title=title, source=source, spos=spos, slineno=slineno) __lexer__ = lex.lex(reflags=re.M) class MutipleFileLexer(object): def __init__(self, f='__string__', startlineno=0, indent=0): self.lexer = __lexer__.clone() self.lexer.include_lexer = None self.lexer.file = f self.lexer.source = f self.lexer.startlineno = startlineno self.lexer.indent = indent self.lexer.mflexer = self self.mflexer = self def active_lexer(self): lexer = self.lexer
redirect_to_file(t.value + " line " + str(t.lineno) + " cols " + str(find_column(contents, t)) + "-" + str(find_column(contents, t) + len(t.value) - 1) + " is " + t.type + "( truncated to " + t.value[:31] + ")") elif t.type == 'T_StringConstant': redirect_to_file("\n*** Error line " + str(t.lineno) + ".") redirect_to_file("*** Unterminated string constant: " + t.value + "\n") else: redirect_to_file("*** Error line" + str(t.lineno) + "\nUnrecognized char '%s'" % str(t.value[0]) + "\n\n") t.lexer.skip(1) lexar = lex.lex() #redirect print to outfile def redirect_to_file(text): #original = sys.stdout #sys.stdout = open(outFileName, 'a+') #print('This is your redirected text:') print(text) #sys.stdout = original #read from file fileName = str(sys.argv[1]) #outFileName = str(sys.argv[2])
def parse(code, grammar, actions, gotos): # TODOd #1: create a list of trees trees = [] input = [] code, lexeme, token = lex(code) newtoken = str(token) input.append(newtoken[6:]) stack = [] stack.append(0) while True: if not input[0]: input.pop() input.append("$") token = "$" else: token = str(input[0]) print("stack: ", end = "") print(stack, end = " ") print("input: ", end = "") print(input, end = " ") state = stack[-1] action = actions[(state, token)] print("action: ", end = "") print(action) if action is None: if state == 1: raise Exception(errorMessage(7)) elif state <= 2: raise Exception(errorMessage(8)) elif state == 10: raise Exception(errorMessage(6)) elif state == 32: raise Exception(errorMessage(11)) elif state >= 22 or state >= 40: raise Exception(errorMessage(9)) elif state > 45: raise Exception(errorMessage(10)) return None # tree building update # shift operation , fix for two digits if action[0] == 's': input.pop(0) stack.append(token) code, lexeme, token = lex(code) newtoken = str(token) input.append(newtoken[6:]) if len(action) > 2: ok = int(action[1:3]) state = ok else: state = int(action[1]) stack.append(state) # TODOd #2: create a new tree, set data to token, and append it to the list of trees tree = Tree() tree.data = token trees.append(tree) # reduce operation elif action[0] == 'r': if len(action) > 2: production = grammar[int(action[1:3])] else: production = grammar[int(action[1])] lhs = getLHS(production) rhs = getRHS(production) for i in range(len(rhs) * 2): stack.pop() state = stack[-1] stack.append(lhs) stack.append(int(gotos[(state, lhs)])) # TODOd #3: create a new tree and set data to lhs newTree = Tree() newTree.data = lhs # TODOd #4: get "len(rhs)" trees from the right of the list of trees and add each of them as child of the new tree you created, preserving the left-right order for tree in trees[-len(rhs):]: newTree.add(tree) # TODOd #5: remove "len(rhs)" trees from the right of the list of trees trees = trees[:-len(rhs)] # TODOd #6: append the new tree to the list of trees trees.append(newTree) # not a shift or reduce operation, must be an "accept" operation else: production = grammar[0] lhs = getLHS(production) rhs = getRHS(production) # TODOd #7: same as reduce but using the 1st rule of the grammar root = Tree() root.data = lhs for tree in trees: root.add(tree) # TODOd #8: return the new tree return root
import lex from Tokens import * from Colour import * cfg_file = sys.argv[1][6:] # Config file prog_name = sys.argv[2] # Code file output_file = sys.argv[3][9:] # Output HTML file enc = createColDict(cfg_file) # Get keyword-to-colour mapping H = '<!DOCTYPE html><html><head><title>'+prog_name+'</title></head><body>' # HTML document output with open(prog_name) as fp: code = fp.read() + '\n' lexer = lex.lex() # Initialize lexer lexer.input(code) actstring = [] # Actual lexemes tokenstring = [] # Token types c_line = lexer.lineno for token in lexer: if (token.lineno != c_line) : # once we get to next line H += getHTML(actstring,tokenstring,enc) # get colour-formatted HTML code for current line actstring = [] tokenstring = [] c_line = token.lineno # update current line number actstring.append(str(token.value)) tokenstring.append(str(token.type)) H += getHTML(actstring,tokenstring,enc) # get colour-formatted HTML code for current line
return True def tokenset(node, order): if order == tree.tnode.WALK_POST and (not node.start and node.children): node.start = node.children[0].start node.end = node.children[-1].end return True import block import exp if __name__ == "__main__": import sys import reader s = S(parser.pushstream(lex.lex(reader.filereader(sys.stdin)))) if not s.parse(): print("Cannot parse") elif not s.atend(): print("Garbage after S") else: print(s.pprint()) print(s.printexp()) s.block = proarkhe.proarkhe() s.visit(parentset) s.walk(tokenset) s.reform([exp.simplify_depth], [exp.simplify_arith]) s.visit(parentset) s.visit(block.blockset) s.visit(block.declsset)
from lex import lex from sintaxe import sintaxe f = open('exemplo.txt', 'r') stext = f.read() tokens = lex(stext) print(sintaxe(tokens).pretty())