def prueba(): lex.input('''program programa {main () {var int c; c=2;}}''') while True: tok = lexer.token() if not tok: break # No more input print(tok)
def main(): lex.lex() cmd, program_file = check_args() if program_file is not None and cmd == 'test': parser = yacc.yacc() with open(program_file) as f: input = f.read() progcode = parser.parse(input) program = Program(progcode) program.run_tests() elif program_file is not None and cmd == 'lex': with open(program_file) as f: input = f.read() lex.input(input) while True: tok = lex.token() if not tok: break print tok elif program_file is not None: parser = yacc.yacc() with open(program_file) as f: input = f.read() progcode = parser.parse(input) program = Program(progcode) program.call_function('main', [5])
def _group_lines(self, data): """Given an input string, this function splits it into lines. Trailing whitespace is removed. Any line ending with \ is grouped with the next line. This function forms the lowest level of the preprocessor---grouping into text into a line-by-line format. """ lex = self.lexer.clone() lines = [x.rstrip() for x in data.splitlines()] for i in range(len(lines)): j = i + 1 while lines[i].endswith('\\') and (j < len(lines)): lines[i] = lines[i][:-1] + lines[j] lines[j] = "" j += 1 data = "\n".join(lines) lex.input(data) lex.lineno = 1 current_line = [] while True: tok = lex.token() if not tok: break current_line.append(tok) if tok.type in self.t_WS and '\n' in tok.value: yield current_line current_line = [] if current_line: yield current_line
def print(self, code): lex.input(code) while True: token = lex.token() if not token: break print(token.type, token.value)
def run_lex(text): lex.lex() lex.input(text) while True: token = lex.token() if not token: break print token
def tokenize(string): lex.input(string) while True: tok = lex.token() if not tok: break yield tok
def __init__(self,input,replacements,outfile2): global outfile outfile = outfile2 self.lexer = lex.lex() lex.input(open(input).read()) self.replacements = replacements self.outfile = outfile2
def test(self, codigo): lex.input(codigo) while True: t = lex.token() if not t: break print(t)
def group_lines(self, input): lex = self.lexer.clone() lines = [x.rstrip() for x in input.splitlines()] for i in xrange(len(lines)): j = i+1 while lines[i].endswith('\\') and (j < len(lines)): lines[i] = lines[i][:-1]+lines[j] lines[j] = "" j += 1 input = "\n".join(lines) lex.input(input) lex.lineno = 1 current_line = [] while True: tok = lex.token() if not tok: break current_line.append(tok) if tok.type in self.t_WS and '\n' in tok.value: yield current_line current_line = [] if current_line: yield current_line
def tokenizer(code): line = 0 endls = [-1] + [i for i, c in enumerate(code) if c == '\n'] lendls = len(endls) tokens = [] lex.input(code) while True: token = lex.token() if token is None: break tokens.append(token) # setting correct line and columns of tokens line = 0 for token in tokens: while line < lendls and token.lexpos > endls[line]: line += 1 token.lineno = line token.lexpos -= endls[line - 1] # setting correct line and columns of error tokens line = 0 for token in errors: while line < lendls and token.lexpos > endls[line]: line += 1 token.lineno = line token.lexpos -= endls[line - 1] return errors, tokens
def group_lines(self, input): lex = self.lexer.clone() lines = [x.rstrip() for x in input.splitlines()] for i in xrange(len(lines)): j = i + 1 while lines[i].endswith('\\') and (j < len(lines)): lines[i] = lines[i][:-1] + lines[j] lines[j] = "" j += 1 input = "\n".join(lines) lex.input(input) lex.lineno = 1 current_line = [] while True: tok = lex.token() if not tok: break current_line.append(tok) if tok.type in self.t_WS and '\n' in tok.value: yield current_line current_line = [] if current_line: yield current_line
def run_on_string(self, data): self.try_log_debug("==== Running on string\n%s...\n", data[:200]) lex = self.lexer.clone() #lexer debugging lex.input(data) if 0: tok = lex.token() while tok: print tok tok = lex.token() lex.lineno = 1 lex.input(data) parser = yacc.yacc(module=self, debug=self.debug, debugfile=self.debugfile, tabmodule=self.tabmodule) #try: #self.parser.restart() #except AttributeError: #pass script = parser.parse(lexer=lex, debug=self.logger) #print script return script
def main(): log.debug(sys.argv[1]) sys.stderr = open(os.devnull, 'w') # lex.lex(debug=True) lex.lex() yacc.yacc() sys.stderr = sys.__stderr__ r = open(sys.argv[1]) code = "" for line in r: code += line.strip() + "\n" logging.debug(code) try: lex.input(code) while True: token = lex.token() if not token: break logging.debug(token) # ast = yacc.parse(code, debug=True) ast = yacc.parse(code) ast.execute() except Exception as e: logging.debug(e) r.close()
def tokens(programa): lex.input(programa) while True: tok = lex.token() if not tok: break lista.append(str(tok.value) + " -> " + str(tok.type)) return lista
def inputfunction(s1): lex.input(s1) for tok in iter(lex.token, None): if (tok.type == tokens[0]): print("compare.py") break if (tok.type == tokens[1]): print("const.py") break if (tok.type == tokens[2]): print("sum.py") break if (tok.type == tokens[3]): print("difference.py") break if (tok.type == tokens[4]): print("quotient.py") break if (tok.type == tokens[5]): print("multiply.py") break if (tok.type == tokens[6]): print("mod.py") break if (tok.type == tokens[7]): print("minus.py") break if (tok.type == tokens[8]): print("print.py") break if (tok.type == tokens[9]): print("scan.py") break
def parse(s): global LATok print("** parsing: ", s) lex.input(s) LATok = lex.token() result = input() print("** result: ", result)
def test(self, code): lex.input(code) while True: tk = lex.token() if not tk: break print(tk)
def ingresarArchivo(nombreArchivo): with open(nombreArchivo, 'r') as f: contents = f.read() lex.input(contents) for tok in iter(lex.token, None): print(repr(tok.type), repr(tok.value))
def main(script): lexer.build() try: scriptfile = open(script, 'r+') scriptdata = scriptfile.read() lex.input(scriptdata) print( chr(27) + "[0;36m" + "Iniciando análise léxica" + chr(27) + "[0m") i = 1 while True: tok = lex.token() if not tok: break print("\t" + str(i) + " - " + "Line: " + str(tok.lineno) + "\t" + str(tok.type) + "\t--> " + str(tok.value)) i += 1 print( chr(27) + "[0;36m" + "Terminando análise léxica\n\n\n" + chr(27) + "[0m") print( chr(27) + "[0;36m" + "Iniciando a análise sintática" + chr(27) + "[0m") x = parserS.parser.parse(scriptdata, tracking=False) print( chr(27) + "[0;36m" + "Terminando a análise sintática" + chr(27) + "[0m") except EnvironmentError as e: print(e)
def test(code): lex.input(code) while True: t = lex.token() if not t: break print(t)
def main(o_in, o_out): lex.lex() with open(o_in, 'r') as f: lex.input(f.read()) bytes_ = parse() with open(o_out, 'w') as f: f.write(format_bytes_to_ram(bytes_))
def refsect(t, s): refsect = lxml.etree.SubElement(refentry, 'refsect1') title = lxml.etree.SubElement(refsect, 'title') title.text = t.title() if verbose: print('%s has %d paragraphs' % (t, len(s.split('\n\n'))), file=sys.stderr) if verbose > 1: dump(s, 'before lexing') # dump out lexer token sequence lex.input(s) for tok in lexer: print(tok, file=sys.stderr) # parse the section text for makedoc markup and the few pieces of texinfo # markup we understand, and output an XML marked-up string xml = parser.parse(s, tracking=True, debug=(verbose > 2)) dump(xml, 'after parsing') xml = '<refsect1>' + xml + '</refsect1>' refsect.extend(lxml.etree.fromstring(xml))
def test(self, code): lex.input(code) while True: t = lex.token() if not t: break print('<' + t.type + ',' + t.value + '>')
def run(self): """Running the parser.""" logging.debug("running parser with filename: [" + self._filename + "]") if self._lexeronly: logging.debug( "doing *ONLY* lexical analysis, skipping syntactical analysis") ## debug output of lexical analysis: (FIXXME: replace with yacc parsing) for line in fileinput.input([self._filename]): logging.info(" processing line: [" + line.strip() + "]") ## Give the lexer some input lex.input(line) # Tokenize while True: token = lex.token() if not token: break # No more input logging.debug(str(token)) else: yacc.parse(open(self._filename).read()) ## report number of errors if self._numerrors > 0: logging.critical("-> " + str(self._numerrors) + " ERRORS found while parsing " + self._filename) else: logging.info("no errors found while parsing " + self._filename)
def refsect(t, s): refsect = lxml.etree.SubElement(refentry, "refsect1") title = lxml.etree.SubElement(refsect, "title") title.text = t.title() if verbose: print("%s has %d paragraphs" % (t, len(s.split("\n\n"))), file=sys.stderr) if verbose > 1: dump(s, "before lexing") # dump out lexer token sequence lex.input(s) for tok in lexer: print(tok, file=sys.stderr) # parse the section text for makedoc markup and the few pieces of texinfo # markup we understand, and output an XML marked-up string xml = parser.parse(s, tracking=True, debug=(verbose > 2)) dump(xml, "after parsing") xml = "<refsect1>" + xml + "</refsect1>" refsect.extend(lxml.etree.fromstring(xml))
def tgrep(deriv, expression, with_context=False, nonrecursive=False, left_to_right=False): '''Performs the given tgrep query on the given tree. If _with_context_ is True, each matched node yields a pair (node, context), and captured nodes are accessible by name using the dict-like context. If the user wants to keep context around, a copy must be made.''' if not expression: raise RuntimeError('No query expression given.') query = expression_cache.get(expression, None) if query is None: initialise() if _tgrep_debug: debug("Lexing %s", expression) lex.input(expression) for tok in iter(lex.token, None): debug("%s %s", tok.type, tok.value) query = yacc.parse(expression) expression_cache[expression] = query # Default traversal method is right to left traversal_method = (single if nonrecursive else nodes if left_to_right else nodes_reversed) context = Context() for node in traversal_method(deriv): context.clear() if query.is_satisfied_by(node, context): if _tgrep_debug: debug("%s matched %s", lrp_repr(node), query) if with_context: yield node, context else: yield node
def parse(s): lex.lexer.lineno = 0 lex.input(s) global in_put in_put = s return list(iter(lex.token, None))
def lextest(data): lex.input(data) while 1: tok = lex.token() if not tok: break print tok
def lexer5525_TestMain(argv=None): """Lexer Test Cases""" # Setup and Check Args if argv is None: argv = sys.argv if len(argv) != 2: sys.stderr.write(str(argv[0]) + " requires two arguments\n") sys.stderr.write(__doc__ + "\n") return 1 inputFilePath = str(argv[1]) if(inputFilePath[-3:] != ".py"): sys.stderr.write(str(argv[0]) + " input file must be of type *.py\n") return 1 inputFile = open(inputFilePath) source = inputFile.read() inputFile.close() lex.input(source) while True: tok = lex.token() if not tok: break sys.stdout.write(str(tok) + "\n") return 0
def tokens(expresion): lex.input(expresion) while True: tok = lex.token() if not tok: break lista.append(str(tok.value) + " -> " + str(tok.type)) return lista
def __init__(self, input, replacements, outfile2): global outfile outfile = outfile2 self.lexer = lex.lex() lex.input(open(input).read()) self.replacements = replacements self.outfile = outfile2
def tokeniser_(self): global indent,indentLevels yield newToken("BLOCKSTART", 0, self.lineno) self.lineno = 0 for line in self.source: # Give it to the lexer! lex.input(line) self.lineno += 1 t = 0 # Every line, reset the token count tseen = 0 # Every line, reset the token count tlist = [] while 1: tok = lex.token() try: toktype = tok.type except AttributeError: toktype = None if toktype is "BLOCKEND": for i in xrange(tok.value): X=newToken("BLOCKEND", (-1*i)-1, self.lineno) tlist.append(X) t += 1 yield newToken("BLOCKEND", (-1*i)-1, self.lineno) elif (tseen == 0 and indent !=0 and toktype not in ["BLOCKSTART", "IGNORE" ]) : "We're switching back to indent level 0 from non-zero." "We need to reinsert the right number of end blocks" currindent = indentLevels[-1] indentLevels = indentLevels[:-1] blocks = 0 indent = 0 while currindent >indent and len(indentLevels)>0: currindent = indentLevels[-1] indentLevels = indentLevels[:-1] blocks +=1 for i in xrange(0,blocks): X=newToken("BLOCKEND", -10+blocks, self.lineno) tlist.append(X) t += 1 yield newToken("BLOCKEND", -10+blocks, self.lineno) X=newToken("BLOCKEND", -10+blocks, self.lineno) tlist.append(X) t += 1 yield newToken("BLOCKEND", -10+blocks, self.lineno) indent = 0 tseen += 1 if not tok: break # No more input on this line # Don't forward IGNORE indentation levels if toktype != "IGNORE" and tok is not None:#and tok.type != "SPACE" : tok.lineno = self.lineno tlist.append(tok) t += 1 yield tok if len(tlist) >0: yield newToken("EOL", 0, self.lineno) yield newToken("BLOCKEND", 0, self.lineno)
def assertTokens(self, input, number, types, values): lex.input(input) tokens = get_tokens() self.assertEqual(len(tokens), number) for tok, type, value in zip(tokens, types, values): self.assertEqual(tok.type, type) self.assertEqual(tok.value, value)
def run_on_string(self,data): self.try_log_debug("==== Running on string\n%s...\n",data[:200]) lex = self.lexer.clone() #lexer debugging lex.input(data) if 0: tok = lex.token() while tok: print tok tok = lex.token() lex.lineno = 1 lex.input(data) parser = yacc.yacc(module=self, debug=self.debug, debugfile=self.debugfile, tabmodule=self.tabmodule) #try: #self.parser.restart() #except AttributeError: #pass script = parser.parse(lexer=lex,debug=self.logger) #print script return script
def main(): global errorFlag if (len(sys.argv) != 2): sys.exit("invalid arguments") fd = open(sys.argv[1], 'r') code = "" for line in fd: code = line.strip() try: lex.input(code) while True: token = lex.token() if not token: break #print(token) ast = yacc.parse(code) ast.execute() except Exception: if (errorFlag == 0): print("SEMANTIC ERROR") errorFlag = 0
def _group_lines(self, data): """Given an input string, this function splits it into lines. Trailing whitespace is removed. Any line ending with \ is grouped with the next line. This function forms the lowest level of the preprocessor---grouping into text into a line-by-line format. """ lex = self.lexer.clone() lines = [x.rstrip() for x in data.splitlines()] for i in range(len(lines)): j = i + 1 while lines[i].endswith("\\") and (j < len(lines)): lines[i] = lines[i][:-1] + lines[j] lines[j] = "" j += 1 data = "\n".join(lines) lex.input(data) lex.lineno = 1 current_line = [] while True: tok = lex.token() if not tok: break current_line.append(tok) if tok.type in self.t_WS and "\n" in tok.value: yield current_line current_line = [] if current_line: yield current_line
def run(self): """Running the parser.""" logging.debug("running parser with filename: [" + self._filename + "]") if self._lexeronly: logging.debug("doing *ONLY* lexical analysis, skipping syntactical analysis") ## debug output of lexical analysis: (FIXXME: replace with yacc parsing) for line in fileinput.input([self._filename]): logging.info(" processing line: [" + line.strip() + "]") ## Give the lexer some input lex.input(line) # Tokenize while True: token = lex.token() if not token: break # No more input logging.debug(str(token)) else: yacc.parse(open(self._filename).read()) ## report number of errors if self._numerrors>0: logging.critical("-> " + str(self._numerrors) + " ERRORS found while parsing " + self._filename) else: logging.info("No errors found while parsing " + self._filename)
def lexer(data): lex.input(data) while True: tok = lex.token() print(tok) if not tok: break
def __init__(self, file): lex = make_lex() lex.input(file.read()) self.token = fix_lex_eof(lex.token) self._indent = [0] self._buffer = [] self.current = None
def testing_MINUS(): lex.input(test_Operations[3]) tokens = list() while True: tok = lex.token() tokens.append(tok) if not tok: break
def lex(text=None): kwargs = {} if __debug__: kwargs = dict(debug=True, debuglog=logging.getLogger(__name__)) lex = ply.lex.lex(reflags=re.UNICODE, **kwargs) if text is not None: lex.input(text) return lex
def dump(lex, buf): i = 0 lex.input(buf) while(True): tok = lex.token() if not tok: break print "[",i,"] ", tok i+=1
def crearDict(self, texto): mod = m.modelo.getInstance() txt = ". " + texto lex.input(txt) for tok in iter(lex.token, None): if (tok.value not in self.aux.keys()): self.aux[tok.value] = None mod.anadirPersonaje(tok.value, tok.value)
def test(self, code): #Entrada para o lexer lex.input(code) while True: t = lex.token() if not t: break #Sem mais entradas print(t)
def do_lex(file): """.""" with open(file, 'r+') as f: data = f.read() lex.lex() lex.input(data) for tok in iter(lex.token, None): print(repr(tok.type), repr(tok.value))
def obtenerPos(self, texto, nombres): self.nombres = nombres for n in nombres: self.resul[n] = list() lex.input(texto) for tok in iter(lex.token, None): print() return self.resul
def lex(text=None): kwargs = {} if __debug__: kwargs["debug"] = True kwargs["debuglog"] = logging.getLogger(__name__) lex = ply.lex.lex(reflags=re.UNICODE, **kwargs) if text is not None: lex.input(text) return lex
def test(expr): lex.input(expr) list_tok = [] while True: tok = lex.token() if not tok: break list_tok.append(tok) return list_tok
def get_tokens_from_cin(lex): while True: inp = raw_input() if inp.strip() == 'quit': break else: inp += '\n' lex.input(inp) print_tokens(lex)
def p_error(p): print "Problems parsing: " print " ** " + str(p.lexer.lexdata) print " " print "Tokens:" lex.input(p.lexer.lexdata) for tok in iter(lex.token, None): print repr(tok.type), repr(tok.value) sys.exit(2)
def print_tokens(f): if isinstance(f, basestring): f = open(f) lex.input(f.read()) while True: tok = lex.token() if not tok: break print tok
def compile(template): lex.lex() # lex.lexer.push_state('mu') lex.input(template) while 1: tok = lex.token() if not tok: break print tok yacc.yacc() return yacc.parse(template)
def test_scanner(arg=sys.argv): data = ' 1+2 1-2 3*4 x blah y := 5 ' lex.input(data) # attempt to get that first token tok = lex.token() while tok: print tok tok = lex.token()
def do_lex(): # Build the lexer lex.lex() return lex.input(sometext) while 1: tok = lex.token() if not tok: break print tok
def main(in_file, out_file): in_data = in_file.read() lex.input(in_data) while True: t = lex.token() if not t: break token_str = get_token_str(t) out_file.write(token_str + '\n') print(token_str)
def analyse_lex(filename): prog = open(filename).read() lex.input(prog) while 1: tok = lex.token() if not tok: break print ("line %d: %s(%s)" % (tok.lineno, tok.type, tok.value)) return not errorOccured
def test_scanner(data) : """ Test the lexer to make sure we don't have any invalid tokens. :param data: string data from either a file or text input. """ lex.input(data) # attempt to get that first token tok = lex.token() while tok: tok = lex.token()
def lexer(txt): l = lex.lex() lex.input(txt) toks = [] while True: tok = l.token() if tok != None: toks.append(tok) else: break return toks