def test1(): grammar = Grammar(""" selector = root item* item = dot identifier ("[" index "]")* index = ~'([1-9][0-9]+)|[0-9]' identifier = ~'[a-zA-Z0-9_-]+' root = "$" dot = "." """) data = '$.a.b[1][2].c[3]' print(grammar.parse(data)) data = '$.a.b[10][2]' print(grammar.parse(data))
def test13(grammar: Grammar): data = '{($.user.id = 2 && $.users[0].email = "nonmatch") || $.actions[2] = "GET"}' print(grammar.parse(data)) data = [ ' { ($.user.id = 1) && ($.users[0].email = "*****@*****.**") } ', '{($.user.id = 2 && $.users[0].email = "nonmatch") || $.actions[2] = "GET"}', '{ $.user.email = "*****@*****.**" || $.coordinates[0][1] = nonmatch && $.actions[2] = nomatch }', '{ ($.user.email = "*****@*****.**" || $.coordinates[0][1] = nonmatch) && $.actions[2] = nomatch }' ] for datum in data: print(grammar.parse(datum))
def text_to_pages(txt: str) -> Tuple[str, List[str]]: """ Convert raw text of a docket to an xml-string, where the nodes are the pages and sections of the docket. i.e. .. code-block:: <docket> <page> <section> </section </page> <page> <section_continued> </section_continued> </page> </docket> """ errors = [] grammar = Grammar(docket_sections) try: nodes = grammar.parse(txt) visitor = CustomVisitorFactory( common_terminals, docket_sections_nonterminals, docket_sections_custom_nodevisitors, ).create_instance() return visitor.visit(nodes), errors except Exception as e: slines = txt.split("\n") logger.error("text_to_pages failed.") errors.append("Could not extract pages from docket text.") return "<docket></docket>", errors
def parse(section_text): section_text += "\n" grammar = Grammar(grammars[0]) visitor = DefendantInfoVisitor() root = grammar.parse(section_text) section_xml = visitor.visit(root) return section_xml
def text_to_pages(txt: str) -> str: """ Convert raw text of a docket to an xml-string, where the nodes are the pages and sections of the docket. i.e. <docket> <page> <section> </section </page> <page> <section_continued> </section_continued> </page> </docket>""" grammar = Grammar(docket_sections) try: nodes = grammar.parse(txt) visitor = CustomVisitorFactory( common_terminals, docket_sections_nonterminals, docket_sections_custom_nodevisitors).create_instance() return visitor.visit(nodes) except Exception as e: slines = txt.split("\n") logging.error("text_to_pages failed.") return "<docket></docket>"
def test_custom_visitor_factory(): text = """Hi there, partner""" grammar = r""" text = greeting punctuation identifier greeting = hi_there? punctuation = comma? identifier = partner? hi_there = "Hi there" comma = ", " partner = "partner" """ grammar = Grammar(grammar) terminals = ["hi_there", "comma", "partner"] nonterminals = ["text", "greeting", "punctuation", "identifier"] custom_visitor = CustomVisitorFactory(terminals, nonterminals, dict()).create_instance() #custom_visitor = custom_visitor.create_instance() root = grammar.parse(text) # print("The parse tree:") # print(root.prettily()) xml = custom_visitor.visit(root) assert xml == "<text> <greeting> Hi there </greeting><punctuation> , </punctuation><identifier> partner </identifier> </text>" # print(xml) # print("Finished.")
def test2(): grammar = Grammar(r""" text = text_quoted / text_simple text_quoted = ~r'"([^"\\]|\\.)*"' text_simple = ~'[a-zA-Z0-9.*_-]+' """) data = '"z,@! \\" ok \\" "' print(grammar.parse(data))
def parse_pdf(pdf: Union[BinaryIO, str], tempdir=None) -> Tuple[Person, Case]: """ Parse the a pdf of a criminal record docket. The 'see' references are to the DocketParse library, which also parses pdf dockets. Args: pdf: a binary reader or a string path to a pdf file. tempdir: The pdf must be written to txt with pdftotext, so we need a temporary directory for it. Returns: The Person to whom the docket relates, and the Case to which the Docket relates. """ # a list of strings errors = [] # pdf to raw text txt = get_text_from_pdf(pdf, tempdir=tempdir) # text to xml sections (see DocketParse.sectionize). This handles page breaks. pages_tree = etree.fromstring(text_to_pages(txt)) sections_tree = sections_from_pages(pages_tree) # parse individual sections with grammars for those sections # TODO add try catch blocks that allow for continuing even after certain parts fail, like # if a single section fails to parse. for section_name, grammar, terminals, nonterminals, custom_visitors in section_grammars: try: section = sections_tree.xpath( f"//section[@name='{section_name}']")[0] # remove blank lines at the ends of the section. section_text = "\n".join( [ln for ln in section.text.split("\n") if ln.strip()]) grammar = Grammar(grammar) try: nodes = grammar.parse(section_text) except Exception as e: slines = section_text.split("\n") errors.append(f" Text for {section_name} failed to parse.") logging.error(f" Text for {section_name} failed to parse.") continue visitor = CustomVisitorFactory(terminals, nonterminals, custom_visitors).create_instance() parsed_section_text = visitor.visit(nodes) parsed_section_xml = etree.fromstring(parsed_section_text) # replace original unparsed section's text w/ the parsed xml. sections_tree.xpath( f"//section[@name='{section_name}']")[0].text = "" sections_tree.xpath(f"//section[@name='{section_name}']" )[0].append(parsed_section_xml) except (Exception, IndexError) as e: # not all dockets have all sections, so not being able to find a section is not # necessarily an error. #slines = section_text.split("\n") logging.info(f" Could not find section {section_name}") #slines = etree.tostring(sections_tree, encoding="unicode").split("\n") # extract Person and Case information from xml. # i.e. defendant_name = section_tree.xpath("//caption/name")[0].text defendant = get_person(sections_tree) case = get_case(sections_tree) return defendant, case, errors
def test_parameters_ok(value): test_grammar = Grammar( grammar.params + grammar.type_ + grammar.symbols + grammar.ident + grammar.ws) tree = test_grammar.parse(value) assert tree is not None
def test_annotationlist_ok(value): test_grammar = Grammar( "start = annotationlist\n" + grammar.annotation + grammar.symbols + grammar.ident + grammar.ws) tree = test_grammar.parse(value) assert tree is not None
def test_enum_ok(value): test_grammar = Grammar( "start=enum\n" + grammar.enum + grammar.ws + grammar.symbols + grammar.ident ) tree = test_grammar.parse(value) assert tree is not None
def test12(grammar: Grammar): data = [ ' { $.eventType = "UpdateTrail" } ', '{ $.sourceIPAddress != 123.123.* }', '{ $.arrayKey[0] = "value" }', '{ $.objectList[1].id = 2 }', '{ $.SomeObject IS NULL }', '{ $.SomeOtherObject NOT EXISTS }', '{ $.ThisFlag IS TRUE } ' ] for datum in data: print(grammar.parse(datum))
def test_param_list_ok(value): test_grammar = Grammar( "start = param_list\n" + grammar.params + grammar.type_ + grammar.symbols + grammar.ident + grammar.ws) tree = test_grammar.parse(value) assert tree is not None
def parse(section_text): grammar = Grammar(grammars[0]) custom_visitor = CustomVisitorFactory(terminals, nonterminals, dict()).create_instance() root = grammar.parse(section_text) # print("Parse tree:") # print(root.prettily()) xml = custom_visitor.visit(root) # print(xml) return xml
def parse(section_text): grammar = Grammar(grammars[0]) custom_visitor = CustomVisitorFactory(terminals, nonterminals, dict()).create_instance() root = grammar.parse(section_text) # print("Parse tree:") # print(root.prettily()) xml = custom_visitor.visit(root) # print(xml) return xml
class MetaParser: def __init__(self, meta_rules=meta_rules): #TODO: 99 automatically insert the _ and end rules, or have literals.. self.meta_grammar = Grammar(meta_rules, 'meta') def __call__(self, text): """ Parse a text following the meta grammar. @return: a cleaned tree without '_' and 'end' rules. """ return clean_node(self.meta_grammar.parse(text), to_prune=['_','end'], txt_leaf=['_basic_word', 'regex'])
class LyricParser(NodeVisitor): """Parses .drmw lyric files for association with Doremi tunes""" def __init__(self, text): NodeVisitor.__init__(self) # start with a new empty lyric self.lyric = Lyric() # add an empty voice to it self.lyric.voices.append(LyricVoice()) # build an abstract syntax tree self.grammar = Grammar(open("lyric-grammar", "r").read()) self.syntax = self.grammar.parse(text) def convert(self): """Convert the syntax tree to our internal representation""" self.visit(self.syntax) # remove any extra empty voices self.lyric.voices = [voice for voice in self.lyric.voices if voice.name != ""] # remove any extra empty verses for voice in self.lyric.voices: voice.verses = voice.verses[:-1] return self.lyric def visit_title(self, node, vc): self.lyric.title = get_string_val(node) def visit_author(self, node, vc): self.lyric.author = get_string_val(node) def visit_meter(self, node, vc): self.lyric.meter = get_string_val(node) def visit_voicespec(self, node, vc): # the current voice is complete, so start a new one self.lyric.voices.append(LyricVoice()) def visit_voice(self, node, vc): self.lyric.voices[-1].name = get_node_val(node, "name") def visit_verse(self, node, vc): # the verse is complete, so start a new one self.lyric.voices[-1].verses.append(Verse()) def visit_word(self, node, vc): self.lyric.voices[-1].verses[-1].words.append(node.text.strip()) def generic_visit(self, node, vc): pass
def parse(section_text): clean_section_text = clean_headers(section_text) # print("====") # print(clean_section_text) # print("====") # print("----") # print(temp_text) # print("-----") grammar = Grammar(grammars[0]) visitor = DispositionVisitor() root = grammar.parse(clean_section_text) reconstituted_xml = visitor.visit(root) return reconstituted_xml
def test_use_regex_library(): grammar = Grammar(r''' unicode_word = ~"[\p{L}]*" ''', use_regex_library=True) text = 'Тест' expected = RegexNode(expr=Regex(pattern=r'[\p{L}]*', use_regex_library=True), full_text=text, start=0, end=4) result = grammar.parse(text=text) eq_(result, expected)
def test_func_body_ok(value): test_grammar = Grammar( "start=func_body\n" + grammar.annotation + grammar.function + grammar.params + grammar.type_ + grammar.qualifier + grammar.ws + grammar.symbols + grammar.ident ) tree = test_grammar.parse(value) assert tree is not None
def test_class_ok(value): test_grammar = Grammar( grammar.class_ + grammar.annotation + grammar.function + grammar.params + grammar.type_ + grammar.qualifier + grammar.ws + grammar.symbols + grammar.ident ) tree = test_grammar.parse(value) assert tree is not None
class PegParser(TemplateParser): class TigrVisitor(NodeVisitor): def visit_line(self, node, visited_children): """ Makes a dict of the section (as key)and the key/value pairs. """ s, *_ = visited_children return s def visit_statement(self, node, visited_children): directive, _, parameter = visited_children if parameter: return directive.text, parameter[0].text else: return directive.text, '' def generic_visit(self, node, visited_children): """ The generic visit method. """ return visited_children or node def __init__(self, drawer): super().__init__(drawer) self.peg_grammar = Grammar(r''' line = statement ws? comment? ws statement = directive ws? parameter? directive = ~"P|X|Y|D|W|N|E|S|U" parameter = ~"-?\d{0,}\.{0,1}\d{0,}" comment = ~"#.*" ws = ~"\s*" ''') self.peg_visitor = self.TigrVisitor() def do_parse_line(self, line_number, line): line_uppercased = line.upper() if not line_uppercased: # skip empty line raise self.SkipParseException() if line_uppercased.startswith('#'): # skip comment line raise self.SkipParseException() try: ast = self.peg_grammar.parse(line_uppercased) command, data = self.peg_visitor.visit(ast) except Exception: raise self.ParseException( 'you have a syntax error at Line {}: {}'.format( line_number, line)) else: return command, data
def test5(grammar: Grammar): data = ' { ( a || ( a )) } ' print(grammar.parse(data)) data = ' { a || ( a ) } ' print(grammar.parse(data)) data = ' { a || a || a } ' print(grammar.parse(data)) data = ' { ( (a) ) || ( a ) } ' print(grammar.parse(data)) data = ' { ( a || ( a || ( a ) ) ) } ' print(grammar.parse(data)) data = ' { (a || (a || ((a || a) || (a || ( a || a)) )) ) || (a) } ' print(grammar.parse(data)) data = ' { ( a ) || a || ( a || ( a ) ) } ' print(grammar.parse(data)) data = ' { ( a ) || a || ( a || a) } ' print(grammar.parse(data))
def test3(grammar: Grammar): data = '(a||(a))' print(grammar.parse(data)) data = 'a||(a)' print(grammar.parse(data)) data = 'a||a||a' print(grammar.parse(data)) data = '(a)||(a)' print(grammar.parse(data)) data = '(a||(a||(a)))' print(grammar.parse(data)) data = '(a||(a||((a||a)||(a||(a||a)))))||(a)' print(grammar.parse(data)) data = '(a)||a||(a||(a))' print(grammar.parse(data)) data = '(a)||a||(a||a)' print(grammar.parse(data))
def test_visitor(): """Assert a tree gets visited correctly.""" grammar = Grammar(r''' bold_text = bold_open text bold_close text = ~'[a-zA-Z 0-9]*' bold_open = '((' bold_close = '))' ''') text = '((o hai))' tree = Node(grammar['bold_text'], text, 0, 9, [Node(grammar['bold_open'], text, 0, 2), Node(grammar['text'], text, 2, 7), Node(grammar['bold_close'], text, 7, 9)]) eq_(grammar.parse(text), tree) result = HtmlFormatter().visit(tree) eq_(result, '<b>o hai</b>')
def test_visitor(): """Assert a tree gets visited correctly.""" grammar = Grammar(r''' bold_text = bold_open text bold_close text = ~'[a-zA-Z 0-9]*' bold_open = '((' bold_close = '))' ''') text = '((o hai))' tree = Node(grammar['bold_text'], text, 0, 9, [Node(grammar['bold_open'], text, 0, 2), Node(grammar['text'], text, 2, 7), Node(grammar['bold_close'], text, 7, 9)]) eq_(grammar.parse(text), tree) result = HtmlFormatter().visit(tree) eq_(result, '<b>o hai</b>')
def test7(grammar: Grammar): data = ' { ( a = 12 || ( b = 34 )) } ' print(grammar.parse(data)) data = ' { a = 12 || ( b = 34 ) } ' print(grammar.parse(data)) data = ' { a = 12 && b != 34 || c = 56 } ' print(grammar.parse(data)) data = ' { ( ( a = 12) ) || ( b != 34 ) } ' print(grammar.parse(data)) data = ' { ( a = 12 && ( b = 34 && ( c != 56 ) ) ) } ' print(grammar.parse(data)) data = ' { (a = 12 || (b IS TRUE && ((c = 56 && a=78) || (b = 2 && ( a =1 || c = 2)) )) ) || (b = 4) } ' print(grammar.parse(data)) data = ' { ( a = 12 ) || a != 2 || ( a IS NULL|| ( a NOT EXISTS ) ) } ' print(grammar.parse(data)) data = ' { ( a NOT EXISTS ) || a IS TRUE || ( a IS FALSE || a NOT EXISTS) } ' print(grammar.parse(data))
def test(): grammar = Grammar( #pattern = "{" ws text ws "=" ws text ws "}" """ #pattern6 = _ "{" pattern5 "}" _ #pattern6 = "(" pattern4 (logical pattern4)* #pattern5 = pattern4 (logical pattern4)* #pattern6 = pattern5 / pattern4 #pattern7 = _ pattern6 _ # (a || (b || c)) # ((a || b) || c) # ((a || b || c)) pattern7 = "(" pattern7 (logical pattern6)* ")" pattern6 = pattern5 / pattern4 # (a || b || c) pattern5 = "(" pattern4 ")" # a || b || c pattern4 = pattern3 (logical pattern3)* pattern3 = _ (pattern2 / pattern1) _ pattern2 = "(" _ (pattern2/pattern1) _ ")" pattern1 = _ (compare_eq / compare_ne) _ compare_eq = text _ "=" _ text compare_ne = text _ "!=" _ text logical = "&&" / "||" _ = ~"[ \t]"* text = ~"[a-zA-Z0-9_-]+" item = dot identifier ("[" index "]")* index = ~"[0-9]|[1-9][0+9]+" identifier = ~"[a-zA-Z0-9_-]+" selector = root item* root = "$" dot = "." """) #data = ' { ( -a_bc= 123 ) || abc = 123 } ' data = '(((-a_bc=123))||abc=12)' print(grammar.parse(data))
def parse(path): """ Parse a pdf docket into an xml document. This xml document will be of the form: <docket> <page> <caption> </caption> <body> <section name='a'> </section> <section name='b'> </section> ... </body> <footer> </footer> </page> <page> ... </page> </docket> ... This xml most closely resembles the original docket. (The caveat is that section names are removed from the text and turned into name attributes of the section xml elements). But some sections extend across pages, and this xml schema leves these sections separated from each other. TODO: Turn this into a real .xsd schema definition. """ print("Starting parse {}".format(path)) # start = datetime.now() # docket_text = pdf_to_text(path) pdf2text_time = (datetime.now()-start).microseconds # start = datetime.now() # grammar = Grammar(grammar_list[0]) create_grammar_time = (datetime.now()-start).microseconds # visitor = DocketVisitor() start = datetime.now() # root = grammar.parse(docket_text) parse_grammar_time = (datetime.now()-start).microseconds # start = datetime.now() # results = visitor.visit(root) node_visitor_time = (datetime.now()-start).microseconds # logging.info("{}, {}, {}, {}".format(pdf2text_time, create_grammar_time, parse_grammar_time, node_visitor_time)) return results
def __init__(self, tune_fn): NodeVisitor.__init__(self) # start with an empty tune, voice, note, and list of modifiers self.tune = Tune() self.voice = Voice() self.note = Note() self.note_modifiers = [] # at the outset, we are not in a voice's content self.in_content = False # set up the actual parser grammar = Grammar(open("doremi-grammar", "r").read()) # read and parse the tune tune_text = codecs.open(tune_fn, "r", "utf-8").read() self.syntax = grammar.parse(tune_text)
def test11(grammar: Grammar): test10(grammar) data = ' { ( $.a = "12" || ( $.b = 34 )) } ' print(grammar.parse(data)) data = ' { ( $.a.bc IS NULL || ( $.b.xy = "ab" )) } ' print(grammar.parse(data)) data = ' { ( $.a[0] NOT EXISTS || ( $.b[2][1] = "34" )) } ' print(grammar.parse(data)) data = ' { ( $.a[0]<=12 || ( $.b[2][1].c = "34" )) } ' print(grammar.parse(data)) data = ' { ($.a[0] <="12" || ( $.b[10][1] =34 )) } ' print(grammar.parse(data)) data = ' { $.a.b ="12" && $.b != 3.4.5 || $.c = 56 } ' print(grammar.parse(data)) data = ' { $.a="12" && $.b != 3.4.5 || $.c = 56 } ' print(grammar.parse(data))
def test10(grammar: Grammar): data = ' { ( $.a = 12 || ( $.b = 34 )) } ' print(grammar.parse(data)) data = ' { ( $.a.bc IS NULL || ( $.b.xy = ab )) } ' print(grammar.parse(data)) data = ' { ( $.a[0] NOT EXISTS || ( $.b[2][1] = 34 )) } ' print(grammar.parse(data)) data = ' { ( $.a[0]<=12 || ( $.b[2][1].c = 34 )) } ' print(grammar.parse(data)) data = ' { ($.a[0] <= 12 || ( $.b[10][1] =34 )) } ' print(grammar.parse(data)) data = ' { $.a.b = 12 && $.b != 34 || $.c = 56 } ' print(grammar.parse(data))
def test_custom_visitor_factory(): text = """Hi there, partner""" grammar = r""" text = greeting punctuation identifier greeting = hi_there? punctuation = comma? identifier = partner? hi_there = "Hi there" comma = ", " partner = "partner" """ grammar = Grammar(grammar) terminals = ["hi_there", "comma", "partner"] nonterminals = ["text", "greeting", "punctuation", "identifier"] custom_visitor = CustomVisitorFactory(terminals, nonterminals, dict()).create_instance() #custom_visitor = custom_visitor.create_instance() root = grammar.parse(text) # print("The parse tree:") # print(root.prettily()) xml = custom_visitor.visit(root) assert xml=="<text> <greeting> Hi there </greeting><punctuation> , </punctuation><identifier> partner </identifier> </text>" # print(xml) # print("Finished.")
def stringify_list(self, list): output = "" for element in list: output += element return output # End of Class test_num = 0 # grammar = Grammar(grammars[test_num]) # root = grammar.parse(texts[0]) # print("Parsed okay.") # visitor = CaseInfoVisitor() # results = visitor.visit(root) # print(results) # for r in results: # print(r) # print(root.prettily()) #with open("./sample_dockets/CP-51-CR-0000001-2011.txt") as f: with open("./sample_dockets/CP-51-CR-0005727-2011.txt") as f: grammar = Grammar(grammars[test_num]) root = grammar.parse(f.read()) visitor = DocketVisitor_2() print("Parse succeeded.") with open("output2.txt", 'w+') as f2: f2.write(visitor.visit(root)) f2.close() f.close()
def test_qualifier_notok(): test_grammar = Grammar(r""" start = qualifierlist """ + grammar.qualifier + grammar.ws) with pytest.raises(IncompleteParseError): test_grammar.parse("public, static, final")
def test_block_line_comment_ok(value): test_grammar = Grammar(r""" start = line_comment """ + grammar.ws) tree = test_grammar.parse(value) assert tree is not None
class Parser(object): def __init__(self): super().__init__() self.line_endings = [] self.grammar = Grammar(tscript_grammar) def lint(self, script): script += '\n' # just incase the end of the script lacks a \n otherwise the *line* will not match self.line_endings = [i for i, c in enumerate(script) if c == '\n'] try: ast = self.grammar.parse(script) except IncompleteParseError as e: return 'Incomplete Parsing on line: {0} column: {1}'.format( e.line(), e.column()) except ParseError as e: return 'Error Parsing on line: {0} column: {1}'.format( e.line(), e.column()) try: self._eval(ast) except Exception as e: return 'Exception Parsing "{0}"'.format(e) return None def parse(self, script): script += '\n' # just incase the end of the script lacks a \n otherwise the *line* will not match self.line_endings = [i for i, c in enumerate(script) if c == '\n'] try: ast = self.grammar.parse(script) except IncompleteParseError as e: raise ParserError(e.line(), e.column(), 'Incomplete Parse') except ParseError as e: raise ParserError(e.line(), e.column(), 'Error Parsing') return (Types.SCOPE, {'_children': self._eval(ast)}) def _eval(self, node): if node.expr_name[0:3] in ('ws_', 'nl_', 'em_'): # ignore wite space raise IsEmpty() try: handler = getattr(self, node.expr_name) except AttributeError: handler = self.anonymous return handler(node) def anonymous(self, node): if len(node.children) < 1: raise IsEmpty() return self._eval(node.children[0]) def lines(self, node): if not node.children: return [] result = [] for child in node.children: try: result.append(self._eval(child)) except IsEmpty: pass return result def line(self, node): for i in range(0, len(self.line_endings)): if self.line_endings[i] > node.start: break return (Types.LINE, self._eval(node.children[0]), i + 1) def expression(self, node): return self._eval(node.children[1]) def value_expression(self, node): return self._eval(node.children[1]) def constant_expression(self, node): return self._eval(node.children[1]) def block(self, node): options = self._eval(node.children[1]) options['_children'] = self._eval(node.children[3]) return (Types.SCOPE, options) def paramater_map(self, node): children = node.children[0].children if len(children) == 0: return {} children = children[0].children groups = [children[1:]] for item in children[0]: groups.append(item.children) result = {} for item in groups: try: result[item[1].text] = self._eval(item[4]) except IsEmpty: raise Exception( 'Paramater values are not allowed to be IsEmpty') return result def const_paramater_map(self, node): result = self.paramater_map(node) for key in result.keys(): if result[key][0] != Types.CONSTANT: raise Exception( 'Expected Constant paramater, got type "{0}"'.format( result[key][0])) result[key] = result[key][1] return result def jump_point(self, node): return (Types.JUMP_POINT, node.children[1].text) def goto(self, node): return (Types.GOTO, node.children[1].text) def time(self, node): # days:hours:mins:seconds parts = [int(i) for i in node.text.split(':')] if len(parts) == 4: return (Types.CONSTANT, timedelta(days=parts[0], hours=parts[1], minutes=parts[2], seconds=parts[3])) elif len(parts) == 3: return (Types.CONSTANT, timedelta(hours=parts[0], minutes=parts[1], seconds=parts[2])) else: return (Types.CONSTANT, timedelta(minutes=parts[0], seconds=parts[1])) def number_float(self, node): return (Types.CONSTANT, float(node.text)) def number_int(self, node): return (Types.CONSTANT, int(node.text)) def boolean(self, node): return (Types.CONSTANT, True if node.text.lower() == 'true' else False) def text(self, node): return (Types.CONSTANT, node.children[0].children[1].text) def none(self, node): return (Types.CONSTANT, None) def exists(self, node): return (Types.EXISTS, self._eval(node.children[2])) def array(self, node): children = node.children[1].children if len(children) == 0: return (Types.ARRAY, []) children = children[0].children values = [] for item in children[0]: values.append(self._eval(item)) values.append(self._eval(children[1])) return (Types.ARRAY, values) def map(self, node): values = self._eval(node.children[1]) return (Types.MAP, values) def variable(self, node): if len(node.children[1].children) > 0: module = node.children[1].children[0].children[0].text else: module = None return (Types.VARIABLE, { 'module': module, 'name': node.children[2].text }) def array_map_item(self, node): variable = self._eval(node.children[0]) if variable[0] != Types.VARIABLE: raise Exception('Can only index variables') index = self._eval(node.children[2]) return (Types.ARRAY_MAP_ITEM, { 'module': variable[1]['module'], 'name': variable[1]['name'], 'index': index }) def infix(self, node): return (Types.INFIX, { 'operator': node.children[2].text, 'left': self._eval(node.children[1]), 'right': self._eval(node.children[3]) }) def not_(self, node): # we are going to abuse the INFIX functino for this one return (Types.INFIX, { 'operator': 'not', 'left': self._eval(node.children[1]), 'right': (Types.CONSTANT, None) }) def other(self, node): return (Types.OTHER, node.children[0].text) def whiledo(self, node): return (Types.WHILE, { 'condition': self._eval(node.children[1]), 'expression': self._eval(node.children[4]) }) def ifelse(self, node): branches = [] branches.append({ 'condition': self._eval(node.children[1]), 'expression': self._eval(node.children[4]) }) for item in node.children[5].children: branches.append({ 'condition': self._eval(item.children[2]), 'expression': self._eval(item.children[5]) }) if len(node.children[6].children) > 0: branches.append({ 'condition': None, 'expression': self._eval(node.children[6].children[0].children[3]) }) return (Types.IFELSE, branches) def function(self, node): params = self._eval(node.children[4]) if len(node.children[1].children) > 0: module = node.children[1].children[0].children[0].text else: module = None return (Types.FUNCTION, { 'module': module, 'name': node.children[2].text, 'paramaters': params }) def assignment(self, node): target = self._eval(node.children[0]) return (Types.ASSIGNMENT, { 'target': target, 'value': self._eval(node.children[3]) })
Defendant eligible for work release. Probation Max of 3.00 Years 12/20/2011 3 years All conditions previously imposed to remain. """, """ Manufacture or Deliver Shreeves-Johns, Karen 07/13/2011 Probation Max of 3.00 Years 07/13/2011 3 years Defendant is to pay imposed mandatory court costs. To submit to random drug screens. To pursue a prescribed secular course of study or vocational training. Case relisted for status of compliance on 9/22/11 courtroom 605. Shreeves-Johns, Karen 12/20/2011 Confinement Min of 11.00 Months 15.00 Days 12/20/2011 Max of 23.00 Months 11 1/2 - 23 months Defendant eligible for work release. Probation Max of 3.00 Years 12/20/2011 3 years All conditions previously imposed to remain. """, ] grammar = Grammar(grammars[0]) root = grammar.parse(texts[0]) print("parsed.") visitor = DetailsVisitor() print(visitor.visit(root))
def test14(grammar: Grammar): data = '{ ($.user.id = 2 && $.user.id = "a \\" b") || $.a="1"}' print(grammar.parse(data))