def testBasic(self): proc = dispatchprocessor.DispatchProcessor() setattr(proc, "string", strings.StringInterpreter()) for production, yestable, notable in parseTests: p = Parser("x := %s" % production, "x") for data in yestable: if production == "string": success, results, next = p.parse(data, processor=proc) else: success, results, next = p.parse(data) assert success and (next == len(data)), """Did not parse string %s as a %s result=%s""" % ( repr(data), production, (success, results, next), ) assert results, """Didn't get any results for string %s as a %s result=%s""" % ( repr(data), production, (success, results, next), ) if production == "string": expected = eval(data, {}, {}) assert results[0] == expected, ( """Got different interpreted value for data %s, we got %s, expected %s""" % (repr(data), repr(results[0]), repr(expected)) ) for data in notable: success, results, next = p.parse(data) assert not success, """Parsed %s of %s as a %s result=%s""" % ( repr(data), production, (success, results, next), )
def parse(self, *args, **kwargs): res = Parser.parse(self, *args, **kwargs) l = [r for r in res[1] if isinstance(r, Node)] count = 0 while count < len(l): l += l[count].children count += 1 for e in l: if e.__class__.__name__ == "Inline": url = os.path.join(self.root_path, e.url) if e.children[:]: continue logger.info("Parse inlined vrml {0}".format(url)) e.children = Parser.parse(self, open(url).read())[1] for child in e.children: child._parent = e code = "from parser import Node\n\n" for name, prototype in self.prototypes.items(): obj = prototype() attrs = [(key, getattr(obj, key)) for key in dir(obj) if not ( key.startswith("_") or callable(getattr(obj, key)) or key == "children")] code += "class {0}({1}):\n".format(name, "object")#prototype.__bases__[0].__name__) #print obj, dir(obj), "\n---\n", obj._ftypes, "\n---\n",attrs code += " def __init__(self):\n" for key, value in attrs: code += " self.{0} = {1} #{2}\n".format(key, repr(value), prototype.ftype(key)) code += "\n" f = open("/tmp/robotviewer_protos.py",'w') f.write(code) f.close() logger.debug("internally generated foloowing classes:\n{0}".format(code)) return res[0], res[1], res[2]
def _testSet(self, set, singleName, multiName): """Test multi-line definitions""" decl = """single := %s multiple := %s""" % (singleName, multiName) p = Parser(decl) notset = translate(fulltrans, fulltrans, set) for char in set: if isinstance(char, int): char = chr(char) success, children, next = p.parse(char, singleName) assert success and ( next == 1), """Parser for %s couldn't parse %s""" % (singleName, char) for char in notset: if isinstance(char, int): char = chr(char) success, children, next = p.parse(char, singleName) assert (not success) and ( next == 0), """Parser for %s parsed %s""" % (singleName, char) success, children, next = p.parse(char, multiName) assert (not success) and ( next == 0), """Parser for %s parsed %s""" % (multiName, char) success, children, next = p.parse(set, multiName) assert success and ( next == len(set) ), """Parser for %s couldn't parse full set of chars, failed at %s""" % ( multiName, set[next:])
def testBasic( self ): for production, yestable, notable in parseTests: p = Parser( "x := %s"%production, 'x') for data in yestable: success, results, next = p.parse( data) assert success and (next == len(data)), """Did not parse comment %s as a %s result=%s"""%( repr(data), production, (success, results, next)) assert results, """Didn't get any results for comment %s as a %s result=%s"""%( repr(data), production, (success, results, next)) for data in notable: success, results, next = p.parse( data) assert not success, """Parsed %s of %s as a %s result=%s"""%( next, repr(data), production, results )
def testBasic( self ): for production, processor, yestable, notable in _data: p = Parser( "x := %s"%production, 'x') proc = dispatchprocessor.DispatchProcessor() setattr(proc, production, processor()) for data, length, value in yestable: success, results, next = p.parse( data, processor = proc) assert next == length, """Did not parse string %s of %s as a %s result=%s"""%( repr(data[:length]), repr(data), production, (success, results, next)) assert results[0] == value, """Didn't get expected value from processing value %s, expected %s, got %s"""%( data[:length], value, results[0]) for data in notable: success, results, next = p.parse( data) assert not success, """Parsed %s of %s as a %s result=%s"""%( repr(data[:length]), repr(data), production, (success, results, next))
def testBasic(self): for production, yestable, notable in parseTests: p = Parser("x := %s" % production, 'x') for data in yestable: success, results, next = p.parse(data) assert success and ( next == len(data) ), """Did not parse comment %s as a %s result=%s""" % ( repr(data), production, (success, results, next)) assert results, """Didn't get any results for comment %s as a %s result=%s""" % ( repr(data), production, (success, results, next)) for data in notable: success, results, next = p.parse(data) assert not success, """Parsed %s of %s as a %s result=%s""" % ( next, repr(data), production, results)
def _testSet( self, set, singleName, multiName ): """Test multi-line definitions""" decl = """single := %s multiple := %s"""%( singleName, multiName ) p = Parser(decl) notset = string.translate( fulltrans, fulltrans, set ) for char in set: success, children, next = p.parse( char, singleName) assert success and (next == 1), """Parser for %s couldn't parse %s"""%( singleName, char ) for char in notset: success, children, next = p.parse( char, singleName) assert (not success) and (next == 0), """Parser for %s parsed %s"""%( singleName, char ) success, children, next = p.parse( char, multiName) assert (not success) and (next == 0), """Parser for %s parsed %s"""%( multiName, char ) success, children, next = p.parse( set, multiName) assert success and (next == len(set)), """Parser for %s couldn't parse full set of chars, failed at %s"""%( multiName, set[next:] )
def parse_header(data, verbose=False, *args, **kwargs): """Parse the data using the grammar specified in this module :param str data: delimited data to be parsed for metadata :return list parsed_data: structured metadata """ # the parser if verbose: print("Creating parser object...", file=sys.stderr) parser = Parser(amira_header_grammar) # the processor if verbose: print("Defining dispatch processor...", file=sys.stderr) amira_processor = AmiraDispatchProcessor() # parsing if verbose: print("Parsing data...", file=sys.stderr) success, parsed_data, next_item = parser.parse(data, production='amira', processor=amira_processor) if success: if verbose: print("Successfully parsed data...", file=sys.stderr) return parsed_data else: raise TypeError("Parse: {}\nNext: {}\n".format(parsed_data, next_item))
def testISODate(self): """Test the parsing of ISO date and time formats""" values = [ ("2002-02-03", DateTime.DateTime(2002, 2, 3)), ("2002-02", DateTime.DateTime(2002, 2)), ("2002", DateTime.DateTime(2002)), ("2002-02-03T04:15", DateTime.DateTime(2002, 2, 3, 4, 15)), ("2002-02-03T04:15:16", DateTime.DateTime(2002, 2, 3, 4, 15, 16)), ("2002-02-03T04:15:16+00:00", DateTime.DateTime(2002, 2, 3, 4, 15, 16) - tzOffset), ] p = Parser("d:= ISO_date_time", "d") proc = iso_date.MxInterpreter() for to_parse, date in values: success, children, next = p.parse(to_parse, processor=proc) assert success, """Unable to parse any of the string %s with the ISO date-time parser""" % ( to_parse) assert next == len( to_parse ), """Did not finish parsing string %s with the ISO date-time parser, remainder was %s, found was %s""" % ( to_parse, to_parse[next:], children) assert children[ 0] == date, """Returned different date for string %s than expected, got %s, expected %s""" % ( to_parse, children[0], date)
def typographify(input): """ Run from parent directory. >>> import os >>> os.chdir('typographify') >>> print typographify(open('typographify.txt').read()) <strong>strong</strong> <em>words</em> parsed 17 chars of 17 https://pypi.python.org/pypi/SimpleParse/ Version 2.2 http://www.ibm.com/developerworks/linux/library/l-simple/index.html https://books.google.com/books?id=GxKWdn7u4w8C&pg=PA319&lpg=PA319&dq=simpleparse+standard+input&source=bl&ots=M8x58SCzpT&sig=5DOLvoC5-TZyxxlq3_LHD68gbXY&hl=en&sa=X&ved=0ahUKEwjFjOCurKjMAhVMuYMKHaM4ATUQ6AEIMTAD#v=onepage&q=simpleparse%20standard%20input&f=false """ parser = Parser(open('typographify.def').read(), 'para') taglist = parser.parse(input) text = '' for tag, beg, end, parts in taglist[1]: if tag == 'plain': text += (input[beg:end]) elif tag == 'markup': markup = parts[0] mtag, mbeg, mend = markup[:3] start, stop = codes.get(mtag, ('<!-- unknown -->','<!-- / -->')) text += start + input[mbeg+1:mend-1] + stop text += 'parsed %s chars of %s' % (taglist[-1], len(input)) return text
def typographify(input): """ Run from parent directory. >>> import os >>> os.chdir('typographify') >>> print typographify(open('typographify.txt').read()) <strong>strong</strong> <em>words</em> parsed 17 chars of 17 https://pypi.python.org/pypi/SimpleParse/ Version 2.2 http://www.ibm.com/developerworks/linux/library/l-simple/index.html https://books.google.com/books?id=GxKWdn7u4w8C&pg=PA319&lpg=PA319&dq=simpleparse+standard+input&source=bl&ots=M8x58SCzpT&sig=5DOLvoC5-TZyxxlq3_LHD68gbXY&hl=en&sa=X&ved=0ahUKEwjFjOCurKjMAhVMuYMKHaM4ATUQ6AEIMTAD#v=onepage&q=simpleparse%20standard%20input&f=false """ parser = Parser(open('typographify.def').read(), 'para') taglist = parser.parse(input) text = '' for tag, beg, end, parts in taglist[1]: if tag == 'plain': text += (input[beg:end]) elif tag == 'markup': markup = parts[0] mtag, mbeg, mend = markup[:3] start, stop = codes.get(mtag, ('<!-- unknown -->', '<!-- / -->')) text += start + input[mbeg + 1:mend - 1] + stop text += 'parsed %s chars of %s' % (taglist[-1], len(input)) return text
def main(): oparser = get_parser() opts, args = oparser.parse_args() parser = Parser(open(opts.grammar).read(), opts.root) success, tags, next = parser.parse(open(opts.input).read()) print tags
def main(): oparser = get_parser() opts, args = oparser.parse_args() parser = Parser(open(opts.grammar).read(), opts.root) success, tags, next = parser.parse( open(opts.input).read() ) print tags
def debugparser(self, filename): file = open(filename).read() debugparser = Parser (self.declaration) import pprint info("started debug parsing") pprint.pprint(debugparser.parse(file)) info("completed debug parsing") exit(0)
def testTZ( self ): names = list(timezone_names.timezone_mapping.keys()) names.sort() # tests that the items don't match shorter versions... decl = Parser("""this := (timezone_name, ' '?)+""", 'this') proc = dispatchprocessor.DispatchProcessor() proc.timezone_name = timezone_names.TimeZoneNameInterpreter() text = ' '.join(names) success, result, next = decl.parse( text, processor = proc ) assert success, """Unable to complete parsing the timezone names, stopped parsing at char %s %s"""%(next, text[next:]) assert result == list(map( timezone_names.timezone_mapping.get, names)), """Got different results for interpretation than expected (expected first, recieved second)\n%s\n%s"""%(list(map( timezone_names.timezone_mapping.get, names)), result)
def __init__(self, filename): with open(filename) as f: content = f.read() parser = Parser(declaration) success, tree, nextChar = parser.parse(content, processor=ConfigProcessor(self)) if not success: raise Exception for k, v in tree[0].iteritems(): setattr(self, k, v)
def parse(d): p = Parser(grammar, 'descr') try: success, children, next = p.parse(d) if not success: print 'fail', d else: print 'success', d, next pprint.pprint(children) except SyntaxError, err: print err
def parse(self, txt, *args, **kwargs): # Easter egg. if txt == "2+2": return (True, 5) try: success, children, next = Parser.parse(self, txt, *args, **kwargs) except ParserSyntaxError: return (False, 0.0) if not (success and next == len(txt)): return (False, 0.0) else: return (True, children[0])
def testTZ(self): names = list(timezone_names.timezone_mapping.keys()) names.sort() # tests that the items don't match shorter versions... decl = Parser("""this := (timezone_name, ' '?)+""", 'this') proc = dispatchprocessor.DispatchProcessor() proc.timezone_name = timezone_names.TimeZoneNameInterpreter() text = ' '.join(names) success, result, next = decl.parse(text, processor=proc) assert success, """Unable to complete parsing the timezone names, stopped parsing at char %s %s""" % ( next, text[next:]) assert result == list( map(timezone_names.timezone_mapping.get, names) ), """Got different results for interpretation than expected (expected first, recieved second)\n%s\n%s""" % ( list(map(timezone_names.timezone_mapping.get, names)), result)
class Compiler: def __init__(self): self.parser = Parser(grammar) self.translator = SyntaxTreeProcessor() def compile(self, command): cmd = re.sub('\s', '', command) (success, children, nextchar) = self.parser.parse(cmd) result = self.translator((success, children, nextchar), cmd) python_src = result[1][0] return compile(python_src, '', 'exec')
def main(): try: opts, args = getopt.getopt(sys.argv[1:], 'hn') except getopt.GetoptError: usage() sys.exit(2) # Get any options navFlag = False for o, a in opts: if o == '-h': usage() sys.exit() if o == '-n': navFlag = True # Get the input filename if len(args) != 1: usage() sys.exit(2) else: filename = args[0] # Initialise data base db = freenav.freedb.Freedb() db.delete_airspace() # Initialise parser parser = Parser(tnp.TNP_DECL, 'tnp_file') p = db.get_projection() proj = freenav.projection.Lambert(p['parallel1'], p['parallel2'], p['latitude'], p['longitude']) output_processor = AirProcessor(db, proj) tnp_processor = tnp.TnpProcessor(output_processor) # Read data and parse airdata = open(filename).read() success, parse_result, next_char = parser.parse(airdata, processor=tnp_processor) # Report any syntax errors if not (success and next_char==len(airdata)): print "%s: Syntax error at (or near) line %d" % \ (filename, len(airdata[:next_char].splitlines())+1) sys.exit(1) # Create indices and tidy up db.commit() db.vacuum()
def testBasic(self): proc = dispatchprocessor.DispatchProcessor() setattr(proc, "string", strings.StringInterpreter()) for production, yestable, notable in parseTests: p = Parser("x := %s" % production, 'x') for data in yestable: if production == 'string': success, results, next = p.parse(data, processor=proc) else: success, results, next = p.parse(data) assert success and (next == len( data)), """Did not parse string %s as a %s result=%s""" % ( repr(data), production, (success, results, next)) assert results, """Didn't get any results for string %s as a %s result=%s""" % ( repr(data), production, (success, results, next)) if production == 'string': expected = eval(data, {}, {}) assert results[ 0] == expected, """Got different interpreted value for data %s, we got %s, expected %s""" % ( repr(data), repr(results[0]), repr(expected)) for data in notable: success, results, next = p.parse(data) assert not success, """Parsed %s of %s as a %s result=%s""" % ( repr(data), production, (success, results, next))
def parse(self, *args, **kwargs): res = Parser.parse(self, *args, **kwargs) l = [r for r in res[1] if isinstance(r, Node)] count = 0 while count < len(l): l += l[count].children count += 1 for e in l: if e.__class__.__name__ == "Inline": url = os.path.join(self.root_path, e.url) if e.children[:]: continue logger.info("Parse inlined vrml {0}".format(url)) e.children = Parser.parse(self, open(url).read())[1] for child in e.children: child._parent = e code = "from parser import Node\n\n" for name, prototype in self.prototypes.items(): obj = prototype() attrs = [(key, getattr(obj, key)) for key in dir(obj) if not (key.startswith("_") or callable(getattr(obj, key)) or key == "children")] code += "class {0}({1}):\n".format( name, "object") #prototype.__bases__[0].__name__) #print obj, dir(obj), "\n---\n", obj._ftypes, "\n---\n",attrs code += " def __init__(self):\n" for key, value in attrs: code += " self.{0} = {1} #{2}\n".format( key, repr(value), prototype.ftype(key)) code += "\n" f = open("/tmp/robotviewer_protos.py", 'w') f.write(code) f.close() logger.debug( "internally generated foloowing classes:\n{0}".format(code)) return res[0], res[1], res[2]
def testISODate( self ): """Test the parsing of ISO date and time formats""" values = [ ("2002-02-03", DateTime.DateTime( 2002, 2,3)), ("2002-02",DateTime.DateTime( 2002, 2)), ("2002",DateTime.DateTime( 2002)), ("2002-02-03T04:15", DateTime.DateTime( 2002, 2,3, 4,15)), ("2002-02-03T04:15:16", DateTime.DateTime( 2002, 2,3, 4,15, 16)), ("2002-02-03T04:15:16+00:00", DateTime.DateTime( 2002, 2,3, 4,15, 16)-tzOffset), ] p = Parser ("d:= ISO_date_time", "d") proc = iso_date.MxInterpreter() for to_parse, date in values: success, children, next = p.parse( to_parse, processor=proc) assert success, """Unable to parse any of the string %s with the ISO date-time parser"""% (to_parse) assert next == len(to_parse),"""Did not finish parsing string %s with the ISO date-time parser, remainder was %s, found was %s"""%( to_parse, to_parse [next:],children) assert children [0] == date,"""Returned different date for string %s than expected, got %s, expected %s"""% (to_parse,children [0], date)
def convert(input, definition = 'compilation_unit'): """ Example of converting syntax from ActionScript to C#. >>> print(convert('import com.finegamedesign.anagram.Model;', 'import_definition')) using /*<com>*/Finegamedesign.Anagram/*<Model>*/; Related to grammar unit testing specification (gUnit) https://theantlrguy.atlassian.net/wiki/display/ANTLR3/gUnit+-+Grammar+Unit+Testing """ source = cfg['source'] to = cfg['to'] parser = Parser(grammars[source], definition) input = may_import(None, input, definition, to) taglist = parser.parse(input) taglist = [(definition, 0, taglist[-1], taglist[1])] text = _recurse_tags(taglist, input, source, to) text = may_import(taglist, text, definition, to) text = may_format(definition, text) return text
def convert(input, definition='compilation_unit'): """ Example of converting syntax from ActionScript to C#. >>> print(convert('import com.finegamedesign.anagram.Model;', 'import_definition')) using /*<com>*/Finegamedesign.Anagram/*<Model>*/; Related to grammar unit testing specification (gUnit) https://theantlrguy.atlassian.net/wiki/display/ANTLR3/gUnit+-+Grammar+Unit+Testing """ source = cfg['source'] to = cfg['to'] parser = Parser(grammars[source], definition) input = may_import(None, input, definition, to) taglist = parser.parse(input) taglist = [(definition, 0, taglist[-1], taglist[1])] text = _recurse_tags(taglist, input, source, to) text = may_import(taglist, text, definition, to) text = may_format(definition, text) return text
def buildParseTree(vbtext, starttoken="line", verbose=0, returnpartial=0, returnast=0): """Parse some VB""" parser = Parser(declaration, starttoken) txt = applyPlugins("preProcessVBText", vbtext) nodes = [] while 1: success, tree, next = parser.parse(txt) if not success: if txt.strip(): # << Handle failure >> msg = "Parsing error: %d, '%s'" % (next, txt.split("\n")[0]) if returnpartial: log.error(msg) nodes.append(VBFailedElement('parser_failure', msg)) break else: raise VBParserError(msg) # -- end -- << Handle failure >> break if verbose: print success, next pp(tree) print "." if not returnast: nodes.extend(convertToElements(tree, txt)) else: nodes.append(tree) txt = txt[next:] return nodes
[required false] [type java.lang.Integer] [value 1]] [[name UserName] [required false] [type java.lang.String]]]"]]"] [provider ECIResourceAdapter_Z-CCIDTTNode001(cells/CCITstCell001/nodes/Z-CCIDTTNode001|resources.xml#J2CResourceAdapter_1184069423250)] ''', ]), ] from simpleparse.parser import Parser parser = Parser(grammar) for production, tests in TESTS: print production for test in tests: success, children, nextcharacter = parser.parse(test, production=production) #print success, children, nextcharacter assert success and nextcharacter==len(test) print 'success' print success, children, nextcharacter = parser.parse(TESTS[7][1][0], production=TESTS[7][0]) #print children from simpleparse.dispatchprocessor import DispatchProcessor def getname(val, buf): tup = val[3][0] return buf[tup[1]:tup[2]] class PropertySet(list):
class ExpParser(): def __init__(self): declaration = r''' fun := ( fun_name,'(',')' )/( fun_name,'(',exp_list,')' ) fun_name := [a-zA-Z0-9_-]+ exp_list := exp,(',',exp)* exp := pm_exp pm_exp := md_exp,('+'/'-',md_exp)* md_exp := bracket_exp,('*'/'/',bracket_exp)* bracket_exp := ('(',exp,')')/str_var/number str_var := [a-zA-Z],[a-zA-Z0-9_=\\.]* ''' self.fun_parser = Parser(declaration, "fun") self.exp_parser = Parser(declaration, "exp") def parse_exp(self, str): str = str.replace(" ", "") success, child, nextcharacter = self.fun_parser.parse(str) if success == 1: tag, start, end, subtags = child[0] func_name = str[start:end] exp_list = [] if len(child) > 1: tag, start, end, subtags = child[1] for exp in subtags: exp_list.append(str[exp[1]:exp[2]]) return (func_name, exp_list) else: return (None, None) def get_exp_value(self, str, map): str = str.replace(" ", "") success, child, nextcharacter = self.exp_parser.parse(str) if success == 1: tag, start, end, subtags = child[0] return self._get_value(tag, start, end, subtags, str, map) else: return (None, "parse the exp %s failed" % str) def _get_value(self, tag, start, end, subtags, str, map): if tag == "number": return (string.atof(str[start:end]), "") elif tag == "str_var": temp = str[start:end] if map.has_key(temp): #log.debug("get args %s %f" % (temp,map[temp]) ) if map[temp] == None: return (None, "the value in rrd is expired.") else: return (map[temp], "") else: return (None, "not find %s in map" % temp) elif tag == "exp": for exp in subtags: return self._get_value(exp[0], exp[1], exp[2], exp[3], str, map) elif tag == "pm_exp": re = None for exp in subtags: val, msg = self._get_value(exp[0], exp[1], exp[2], exp[3], str, map) if val != None: if re == None: re = val else: pos = exp[1] - 1 op = str[pos:pos + 1] if op == '+': re = re + val elif op == '-': re = re - val else: return (None, "oper %s is not + or -" % op) else: return (val, msg) return (re, "") elif tag == "md_exp": re = None for exp in subtags: val, msg = self._get_value(exp[0], exp[1], exp[2], exp[3], str, map) if val != None: if re == None: re = val else: pos = exp[1] - 1 op = str[pos:pos + 1] if op == '*': re = re * val elif op == '/': if val == 0.0: return (None, "div value %s is 0 " % exp[0]) re = re / val else: return (None, "oper %s is not * or /" % op) else: return (val, msg) return (re, "") else: if len(subtags) > 0: for exp in subtags: return self._get_value(exp[0], exp[1], exp[2], exp[3], str, map) else: return (None, "unfind tag %s and have no subtags" % tag)
def CorpusPTBReader(ptb_data_path): ptb_sent_file = open("total_ptb.txt", "w") file_pattern = r".*/.*\.mrg" ptb = BracketParseCorpusReader(ptb_data_path, file_pattern) #print (ptb.fileids()) #print ((ptb.sents())) #ptb.sents(fileids= 'brown/cf/cf01.mrg')[0] count = 0 for sent in ptb.sents(): '''sent = "" for word in sent: if "\\" in word or "e_s" in word or "n_s" in word: continue else: sent += word + " " out = sent[:-1]''' if len(sent) < 7: continue out = ' '.join(sent) out = out.lower() # print(len(sent), out) parser = Parser(grammar, 'all') temp_result = parser.parse(out) sub_sent = [] start_index = 0 for num_info in temp_result[1]: sub_sent.append(out[start_index:num_info[1]]) sub_sent.append("NUM" + (str(num_info[2] - num_info[1]))) start_index = num_info[2] sub_sent.append(out[start_index:]) final_out = ''.join(sub_sent) final_out = re.sub(r'\*\-NUM\d ', '', final_out) final_out = re.sub(r'e_s ', '', final_out) final_out = re.sub(r'n_s ', '', final_out) final_out = re.sub(r'e_s', '', final_out) final_out = re.sub(r'n_s', '', final_out) final_out = re.sub(r'\\. ', '', final_out) final_out = re.sub(r'\\.', '', final_out) final_out = re.sub(r'\*. ', '', final_out) final_out = re.sub(r'\*.', '', final_out) final_out = re.sub(r'-. ', '', final_out) final_out = re.sub(r'-.', '', final_out) #final_out = re.sub(r'\**.\* ', '', final_out) #final_out = re.sub(r'\**.\*', '', final_out) final_out = re.sub(r'\*{,3}.\*.. ', '', final_out) final_out = re.sub(r'\*{,3}.\*. ', '', final_out) final_out = re.sub(r'\*.. ', '', final_out) final_out = re.sub(r'\*..', '', final_out) final_out = re.sub(r'\* ', '', final_out) #final_out = re.sub(r'\*', '', final_out) final_out = re.sub(r'- ', '', final_out) final_out = re.sub(r'-', '', final_out) final_out = re.sub(r'; ; ', '; ', final_out) final_out = final_out[:-1] ptb_sent_file.write(final_out) ptb_sent_file.write("\n") #print(final_out) count += 1 #if count == 10000: break #if count > 10: break ptb_sent_file.close() print(count)
def parseInput(input): parser = Parser(declaration) success, children, nextcharacter = parser.parse(input, production="fastg") assert success return children
body := statement* statement := (ts,';',comment,'\n')/equality/nullline nullline := ts,'\n' comment := -'\n'* equality := ts, identifier,ts,'=',ts,identified,ts,'\n' identifier := [a-zA-Z], [a-zA-Z0-9_]* identified := ('"',string,'"')/number/identifier ts := [ \t]* char := -[\134"]+ number := [0-9eE+.-]+ string := (char/escapedchar)* escapedchar := '\134"' / '\134\134' ''' testdata = '''[test1] val=23 val2="23" wherefore="art thou" ; why not log = heavy_wood [test2] loose=lips ''' from simpleparse.parser import Parser import pprint parser = Parser(declaration, "file") if __name__ == "__main__": pprint.pprint(parser.parse(testdata))
def testEOFFail( self ): p = Parser( """this := 'a',EOF""", 'this') success, children, next = p.parse( 'a ' ) assert not success, """EOF matched before end of string"""
shouldParse = [ "(+ 2 3)", "(- 2 3)", "(* 2 3)", "(quote (2 3 4))", "(23s (2.4s 3s 45.3))", "(() () (2 3 4))", "()", '''("thisand that" ())''', '"this"', '''('"this")''', '''("this\n\r" ' those (+ a b) (23s 0xa3 55.3) "s")''', r'''("this\n\r" ' those (+ a b) (23s 0xa3 55.3) "s")''', r'''("this\n\r" ' those (+ a b) (23s 0xa3 55.3] "s")''', '''("this\n\r" ' those (+ a b) (23s 0xa3 55.3\n\n] "s")''', '''(with-pedantry :high It's "Scheme In One Defun".)''', ] import pprint for item in shouldParse: try: success, children, next = parser.parse( item ) if not success: print 'fail', item else: print 'success', item, next pprint.pprint( children ) except SyntaxError, err: print err
element_token := (optional_element / base_element), repetition? repetition := ('*'/'+') optional_element := '[',fo_group,']' >base_element< := (range/string/group/name) <fo_indicator> := '|' name := [a-zA-Z_],[a-zA-Z0-9_]* <ts> := ( ('\n', ?-name) / [ \011]+ / comment )* comment := '#',-'\n'+,'\n' range := string, ts, '...', ts, string """ from simpleparse.parser import Parser from simpleparse.common import strings parser = Parser(declaration) if __name__ == "__main__": from simpleparse.stt.TextTools import print_tags grammar = open("""py_grammar.txt""").read() success, result, next = parser.parse(grammar, 'declarationset') print('success', success, next) print_tags(grammar, result)
class ExpressionBuilder: def __init__(self): self.parser = Parser(grammar, "script") self.buffer = None self.symbols: Dict[str, Symbol] = {} self.operators: Dict[str, Callable] = { "+": Add, "/": Divide, ">": GreaterThan, ">=": GreaterThanOrEqual, "<": LessThan, "<=": LessThanOrEqual, "*": Multiply, "-": subtraction_helper, } self.assertions: Set[Expression] = set() def build(self, script): self.buffer = script success, children, index = self.parser.parse(script + "\n") if not success or index != (len(script) + 1): raise VNNLibParseError(f"Parsing failed at index {index}") for c in children: self.visit(c) return ~Exists(Symbol("X"), And(*self.assertions)) def declare_const(self, name, sort): if name in self.symbols: raise VNNLibParseError( f"Name already exists in symbol table: {name}") if name.startswith("X_"): index = tuple(int(i) for i in name.split("_")[1:]) self.symbols[name] = Symbol("X")[index] elif name.startswith("Y_"): index = tuple(int(i) for i in name.split("_")[1:]) self.symbols[name] = Network("N")(Symbol("X"))[index] else: self.symbols[name] = Parameter(name, type=sort) def visit(self, tree: ParseTree): production = tree[0] visitor = getattr(self, f"visit_{production}", self.generic_visit) return visitor(tree) def generic_visit(self, tree: ParseTree): raise NotImplementedError(f"{tree[0]}") def visit_subtrees(self, tree: ParseTree): subtrees = [] end_index = tree[2] for t in tree[3]: t_ = self.visit(t) if t_ is not None: subtrees.append(t_) end_index = tree[2] else: if end_index == tree[2]: end_index = t[1] if t[1] == tree[1]: tree = (tree[0], t[2], tree[2], tree[3]) tree = (tree[0], tree[1], end_index, subtrees) return tree def visit_command(self, tree: ParseTree): tree = self.visit_subtrees(tree) command = self.buffer[tree[1]:tree[2]].strip("()").split(" ", maxsplit=1)[0] assert command in ["assert", "declare-const"] if command == "declare-const": assert len(tree[3]) == 2 self.declare_const(*tree[3]) elif command == "assert": assert len(tree[3]) == 1 self.assertions.add(tree[3][0]) return tree[3] def visit_identifier(self, tree: ParseTree): tree = self.visit_subtrees(tree) assert len(tree[3]) == 1 return tree[3][0] def visit_numeral(self, tree: ParseTree): num = int(self.buffer[tree[1]:tree[2]]) return Constant(num) def visit_qual_identifier(self, tree: ParseTree): tree = self.visit_subtrees(tree) assert len(tree[3]) == 1 # TODO: not always true if tree[3][0] in self.symbols: return self.symbols[tree[3][0]] elif tree[3][0] in self.operators: return self.operators[tree[3][0]] else: raise VNNLibParseError(f"Unknown identifier: {tree[3][0]}") def visit_sort(self, tree: ParseTree): tree = self.visit_subtrees(tree) assert len(tree[3]) == 1 # TODO: not always true if tree[3][0] == "Real": return float elif tree[3][0] == "Int": return int elif tree[3][0] == "Bool": return bool else: raise NotImplementedError( f"Unimplemented sort: {self.buffer[tree[1]:tree[2]]}") def visit_spec_constant(self, tree: ParseTree): tree = self.visit_subtrees(tree) assert len(tree[3]) == 1 return tree[3][0] def visit_symbol(self, tree: ParseTree): name = self.buffer[tree[1]:tree[2]] return name def visit_term(self, tree: ParseTree): tree = self.visit_subtrees(tree) if len(tree[3]) == 1: return tree[3][0] return tree[3][0](*tree[3][1:]) def visit_ws(self, tree: ParseTree): return None
def format_taglist(input, definition): source = cfg['source'] parser = Parser(grammars[source], definition) taglist = parser.parse(input) return pformat(taglist)
import os, sys from simpleparse.parser import Parser from iss.surveys.parserTests import test_cases from simpleparse.error import ParserSyntaxError with open('iss/surveys/grammar.def') as decl: parser = Parser(decl.read()) errors = 0 sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) #unbuffered stdout for i in range(0, len(test_cases)): input_text, prod, should_succeed = test_cases[i] try: success, children, next_character = parser.parse(input_text, production=prod) assert (success == should_succeed) or (not should_succeed) and ( next_character < len(input_text)) assert (should_succeed and next_character == len(input_text)) or (not should_succeed) print(".", end="") except (AssertionError, ParserSyntaxError): if not should_succeed: print(".", end="") continue errors += 1 if len(sys.argv) == 1 or (not sys.argv[1] in ['-v', '--verbose']):
This is NOT second line This is NOT fifth line """ if __name__ == "__main__": import pprint, time if sys.platform == 'win32': if hasattr(time, 'perf_counter'): clock = time.perf_counter else: clock = time.clock else: if hasattr(time, 'process_time'): clock = time.process_time else: clock = time.clock pprint.pprint(p.parse(file1)) pprint.pprint(p.parse(file2)) testData = "\n" * 30000000 + file1 print('starting parse of file 1 with 1 match at end') t = clock() success, results, next = p.parse(testData, "sets") print('finished parse', clock() - t) print('number of results', len(results)) pprint.pprint(results) print() testData = file1 * (30000000 // len(file1)) print('starting parse of file 1 with ~230,000 matches (slow)') t = clock() success, results, next = p.parse(testData, "sets") print('finished parse', clock() - t) print('number of results', len(results))
body := statement* statement := (ts,semicolon_comment)/equality/nullline nullline := ts,'\n' equality := ts, identifier,ts,'=',ts,identified,ts,'\n' identifier := [a-zA-Z], [a-zA-Z0-9_]* identified := string/number/identifier ts := [ \t]* ''' from simpleparse.parser import Parser parser = Parser(declaration) testEquality = [ "s=3\n", "s = 3\n", ''' s="three\\nthere"\n''', ''' s=three\n''', ] production = "equality" if __name__ == "__main__": for testData in testEquality: success, children, nextcharacter = parser.parse(testData, production=production) assert success and nextcharacter == len( testData ), """Wasn't able to parse %s as a %s (%s chars parsed of %s), returned value was %s""" % ( repr(testData), production, nextcharacter, len(testData), (success, children, nextcharacter))
testdata.append('cn=="Klaus Müller" AND (dob <= dob2) OR (dob == 2000) AND (gender != "M")') testdata.append(' ( ( ( test == ( test ) ) ) ) ') testdata.append('peter == "test"') testdata.append('cn="Klaus Müller" AND (dob < dob2 OR dob = 2000) AND gender != "M"') testdata.append('test') testdata.append('(test)') testdata.append('((test))') testdata.append('test==test') testdata.append('()') testdata.append('(test') testdata.append('cn="Klaus Müller" AND (dob < 1975 OR dob = 2000) AND gender != "M"') from simpleparse.parser import Parser import pprint parser = Parser(declaration, "FILTER") if __name__ =="__main__": for entry in testdata: res = parser.parse(entry); print '-' * 90 if(res[2] != len(entry)): print "FAILED: ", entry pprint.pprint(parser.parse(entry)) print len(entry), res[2] else: print "OK: ", entry
def testEOF( self ): p = Parser( """this := 'a',EOF""", 'this') success, children, next = p.parse( 'a' ) assert success, """EOF didn't match at end of string"""
'''make the config tuple, and adds them to config''' global config, text, lastItem, section_name if tag == 'section_name': section_name = text[start:end] elif tag == 'item': lastItem = text[start:end] elif tag == 'value': config.append((lastItem, text[start:end])) def travel(root, func): if root == None: return tag, start, end, children = root func(tag, start, end) if children != None: for item in children: travel(item, func) if __name__ =="__main__": parser = Parser( declaration, "file" ) success, resultTrees, nextChar = parser.parse(text) output = {} for section in resultTrees: config = [] travel(section, config_maker) output[section_name] = config pprint.pprint(output)
assignment := name, sep_or_comment, '=' , sep_or_comment, object_description+ object_description := nx_number / word / tuple star := '*' category := word object_format := word tuple := '(' , sep_or_comment, ( (nx_number / word), sep_or_comment)+ , ')' # --------------- Comments ---------------- comment := sep, (command_comment / ignore_comment) >standalone_comment< := comment, whitespacechar # Needed so comments adjacent to tokens are returns as token command_comment := '[' , [!&%/\@] , comment_string , ']' ignore_comment := '[', comment_string , ']' <comment_string> := (-[][]+ / nested_comment)* <nested_comment> := ('[' , comment_string, ']') ''' # --------------- The parser ---------------- parser = Parser(dec,'nexus_file') # --------------- Test function ---------------- if __name__ == "__main__": import sys #src = sys.stdin.read() src = sys.stdin.read() taglist = ( parser.parse( src)) pprint.pprint(taglist) print 'Nexus file has %d blocks or out-of-block comments.' % len(taglist[1])
class ExpParser(): def __init__(self): declaration = r''' fun := fun_name,'(',exp_list,')' fun_name := [a-zA-Z0-9_-]+ exp_list := exp,(',',exp)* exp := pm_exp pm_exp := md_exp,('+'/'-',md_exp)* md_exp := bracket_exp,('*'/'/',bracket_exp)* bracket_exp := ('(',exp,')')/str_var/number str_var := [a-zA-Z],[a-zA-Z0-9_-\\.]* ''' self.fun_parser = Parser( declaration, "fun" ) self.exp_parser = Parser( declaration, "exp" ) def parse_exp(self, str): str = str.replace(" ","") success, child, nextcharacter = self.fun_parser.parse(str) if success == 1 : tag,start,end,subtags = child[0] func_name = str[start:end] tag,start,end,subtags = child[1] exp_list = [] for exp in subtags: exp_list.append(str[exp[1]:exp[2]]) return (func_name, exp_list) else: return (None,None) def get_exp_value(self, str, map): str = str.replace(" ","") success, child, nextcharacter = self.exp_parser.parse(str) if success == 1 : tag,start,end,subtags = child[0] return self._get_value(tag,start,end,subtags,str,map) else: return (None,"parse the exp %s failed" % str) def _get_value(self, tag,start,end,subtags,str,map): if tag == "number" : return (string.atof(str[start:end]),"") elif tag == "str_var" : temp = str[start:end] if map.has_key(temp) : #log.debug("get args %s %f" % (temp,map[temp]) ) return (map[temp],"") else: return (None,"not find %s in map" % temp) elif tag == "exp" : for exp in subtags: return self._get_value(exp[0],exp[1],exp[2],exp[3],str,map) elif tag == "pm_exp" : re = None for exp in subtags: val,msg = self._get_value(exp[0],exp[1],exp[2],exp[3],str,map) if val != None : if re == None: re = val else: pos = exp[1]-1 op = str[pos:pos+1] if op == '+' : re = re + val elif op == '-' : re = re - val else: return (None,"oper %s is not + or -" % op) else: return (val,msg) return (re,"") elif tag == "md_exp" : re = None for exp in subtags: val,msg = self._get_value(exp[0],exp[1],exp[2],exp[3],str,map) if val != None : if re == None: re = val else: pos = exp[1]-1 op = str[pos:pos+1] if op == '*' : re = re * val elif op == '/' : if val == 0.0: return (None,"div value %s is 0 " % exp[0]) re = re / val else: return (None,"oper %s is not * or /" % op) else: return (val,msg) return (re,"") else: if len(subtags) > 0 : for exp in subtags: return self._get_value(exp[0],exp[1],exp[2],exp[3],str,map) else: return (None,"unfind tag %s and have no subtags" % tag)
parser = Parser(rollparse.declaration) tests_success = [ "d6", "5d6", "5d6 + d8", "(5d6 + d8)", "6 + (5d6 + d8)", "[5d6 + d8] + 6", "{3d20} + 10" ] prod = "roll" for test in tests_success: success, children, nextcharacter = parser.parse(test, production=prod) assert success and nextcharacter==len(test), """Wasn't able to parse %s as a %s (%s chars parsed of %s), returned value was %s"""%( repr(test), prod, nextcharacter, len(test), (success, children, nextcharacter)) tests_fail = [ "{5d6}+{8d8}", "5d", "3+", "8d8" #this one should actually work ] for test in tests_fail: success, children, nextcharacter = parser.parse(test, production=prod) if not (success and nextcharacter==len(test)): print """Wasn't able to parse %s as a %s (%s chars parsed of %s), returned value was %s\n"""%( repr(test), prod, nextcharacter, len(test), (success, children, nextcharacter))
from simpleparse.stt.TextTools import print_tags shouldParse = [ "(+ 2 3)", "(- 2 3)", "(* 2 3)", "(quote (2 3 4))", "(23s (2.4s 3s 45.3))", "(() () (2 3 4))", "()", '''("thisand that" ())''', '"this"', '''('"this")''', '''("this\n\r" ' those (+ a b) (23s 0xa3 55.3) "s")''', r'''("this\n\r" ' those (+ a b) (23s 0xa3 55.3) "s")''', r'''("this\n\r" ' those (+ a b) (23s 0xa3 55.3] "s")''', '''("this\n\r" ' those (+ a b) (23s 0xa3 55.3\n\n] "s")''', '''(with-pedantry :high It's "Scheme In One Defun".)''', ] import pprint for item in shouldParse: try: success, children, next = parser.parse(item) if not success: print('fail', item) else: print('success', item, next) pprint.pprint(children) except SyntaxError as err: print(err)
try: fid = file(fname, 'rt') except Exception, detail: raise RuntimeError, "Unable to open layout file: %s\n %s" % ( fname, str(detail)) data = fid.read() fid.close() parser = Parser(declaration, "file") # Replace all CR's in data with nothing, to convert DOS line endings # to unix format (all LF's). data = string.replace(data, '\x0D', '') tree = parser.parse(data) # Last element of tree is number of characters parsed if not tree[0]: raise RuntimeError, "Layout file cannot be parsed" if tree[2] != len(data): raise RuntimeError, "Parse error at character %d in layout file" % tree[ 2] Rows = [] for rowspec in tree[1]: if rowspec[0] in ('nullline', 'commentline'): continue assert rowspec[0] == 'rowspec' Rows.append(parseRowSpec(rowspec[3], data))
return None class OPLParsingError(Exception): def __init__(self, *args, **kwargs): Exception.__init__(self, *args, **kwargs) if __name__ == '__main__': #parser = OplParser('../domains/test_domain.opl') #parser.read_ebnf_definition('opl_definitions.ebnf') definitions = open('opl_definitions.ebnf', 'r').read() production = 'file' parser = Parser(definitions, production) domain_str = open('../domains/empty_domain.opl', 'r').read() success, children, nextcharacter = parser.parse(domain_str) assert success and nextcharacter == len( domain_str ), """Wasn't able to parse input as a %s (%s chars parsed of %s), returned value was %s""" % ( production, nextcharacter, len(domain_str), (success, children, nextcharacter)) print """Successfully parsed input as a %s (%s chars parsed of %s), returned value was %s""" % ( production, nextcharacter, len(domain_str), (success, children, nextcharacter)) # domain tag, start, stop, sub_tags = children[0] name = domain_str[sub_tags[0][1]:sub_tags[0][2]] domain = Domain(tag, name) # tag, start, stop, sub_tags = children[0]
if tag == 'section_name': section_name = text[start:end] elif tag == 'item': lastItem = text[start:end] elif tag == 'value': config.append((lastItem, text[start:end])) def travel(root, func): if root == None: return tag, start, end, children = root func(tag, start, end) if children != None: for item in children: travel(item, func) if __name__ == "__main__": parser = Parser(declaration, "file") success, resultTrees, nextChar = parser.parse(text) output = {} for section in resultTrees: config = [] travel(section, config_maker) output[section_name] = config pprint.pprint(output)
return dispatchList(self, children, buffer) def int_w_exp(self, info, buffer): (tag, start, stop, (a, b)) = info base = dispatch(self, a, buffer) exp = dispatch(self, b, buffer) return base**exp int = numbers.IntInterpreter() float = numbers.FloatInterpreter() string = strings.StringInterpreter() string_double_quote = strings.StringInterpreter() string_single_quote = string_double_quote def true(self, tag, buffer): return True def false(self, tag, buffer): return False def null(self, tag, buffer): return None parser = Parser(declaration, "object") if __name__ == "__main__": import sys, json print( json.dumps( parser.parse(open(sys.argv[1]).read(), processor=Processor())))
seq_group := ts, element_token, (ts, element_token)+, ts element_token := (optional_element / base_element), repetition? repetition := ('*'/'+') optional_element := '[',fo_group,']' >base_element< := (range/string/group/name) <fo_indicator> := '|' name := [a-zA-Z_],[a-zA-Z0-9_]* <ts> := ( ('\n', ?-name) / [ \011]+ / comment )* comment := '#',-'\n'+,'\n' range := string, ts, '...', ts, string """ from simpleparse.parser import Parser from simpleparse.common import strings parser = Parser( declaration ) if __name__ == "__main__": from simpleparse.stt.TextTools import print_tags grammar = open("""py_grammar.txt""").read() success, result, next = parser.parse( grammar, 'declarationset') print( 'success', success, next ) print_tags( grammar, result )
body := statement* statement := (ts,';',comment,'\n')/equality/nullline nullline := ts,'\n' comment := -'\n'* equality := ts, identifier,ts,'=',ts,identified,ts,'\n' identifier := [a-zA-Z], [a-zA-Z0-9_]* identified := ('"',string,'"')/number/identifier ts := [ \t]* char := -[\134"]+ number := [0-9eE+.-]+ string := (char/escapedchar)* escapedchar := '\134"' / '\134\134' ''' testdata = '''[test1] val=23 val2="23" wherefore="art thou" ; why not log = heavy_wood [test2] loose=lips ''' from simpleparse.parser import Parser import pprint parser = Parser( declaration, "file" ) if __name__ =="__main__": pprint.pprint( parser.parse( testdata))
def testEOF(self): p = Parser("""this := 'a',EOF""", 'this') success, children, next = p.parse('a') assert success, """EOF didn't match at end of string"""
nhx_tag := '[&&NHX:', *, ']' nhx_val := '[&&NHX:', *, ']' nhx_comment := '[', ']' gp_list := gp_item* gp_item := gp_pipe_struct, ';', '\n' gp_pipe_struct := symb_blob, '|', entrez_blob, '|', uniprot_blob symb_blob := simple_label, ':', simple_label entrez_blob := simple_label, '=', simple_label uniprot_blob := simple_label, '=', simple_label simple_label := [a-zA-Z0-9\-\_]+ ''' #newick_stanza := string, '\n', #simple_label := [a-zA-Z0-9\-\_\=]+ ## if __name__ == '__main__': pthrjs_parser = Parser(pthrjs_ebnf, "file") mpath = '/home/sjcarbon/local/src/svn/geneontology/javascript/_data/' #for fn in ['trial1.txt']: for fn in ['PTHR10004.tree']: f = open(mpath + fn, 'r') f_text = f.read() print f_text print 'result:' print pthrjs_parser.parse(f_text)
""" try: fid = file(fname, 'rt') except Exception, detail: raise RuntimeError, "Unable to open layout file: %s\n %s" % (fname, str(detail)) data = fid.read() fid.close() parser = Parser(declaration, "file") # Replace all CR's in data with nothing, to convert DOS line endings # to unix format (all LF's). data = string.replace(data, '\x0D', '') tree = parser.parse(data) # Last element of tree is number of characters parsed if not tree[0]: raise RuntimeError, "Layout file cannot be parsed" if tree[2] != len(data): raise RuntimeError, "Parse error at character %d in layout file" % tree[2] Rows = [] for rowspec in tree[1]: if rowspec[0] in ('nullline', 'commentline'): continue assert rowspec[0]=='rowspec' Rows.append(parseRowSpec(rowspec[3], data))
def testEOFFail(self): p = Parser("""this := 'a',EOF""", 'this') success, children, next = p.parse('a ') assert not success, """EOF matched before end of string"""