def defineParsers(): #Enable a fast parsing mode with caching. ParserElement.enablePackrat() #end of line terminates statements, so it is not regular whitespace ParserElement.setDefaultWhitespaceChars('\t ') func_call = Forward() #forward declaration because this is a recursive rule #The "terminal" rules symbol = Word(alphas+'_-', alphanums+'_-') .setParseAction(action_symbol) q_symbol = quotedString .setParseAction(action_q_symbol) bracket_term = Literal("(").suppress() - func_call \ + Literal(")").suppress() word = symbol | q_symbol | bracket_term #The function call #Parse: "foo | bar | baz" or "foo" pipeline = (word + ZeroOrMore("|" - word)) .setParseAction(action_pipeline) #Parse "foo|bar op1 op2 op3" func_call << (pipeline - ZeroOrMore(word)) .setParseAction(action_func_call) #High level structure of program line = LineEnd() | func_call - LineEnd() #empty line or function call program = ZeroOrMore(line) + StringEnd() #multiple lines are a program #define the comments program.ignore('%' + restOfLine) #no tab expansion program.parseWithTabs() #return additional func_call parser to make testing more easy return program, func_call
class TOMLParser(object): def __init__(self): key_name = Word(re.sub(r"[\[\]=\"]", "", printables)) kgrp_name = Word(re.sub(r"[\[\]\.]", "", printables)) basic_int = Optional("-") + ("0" | Word(nums)) types = dict( string = QuotedString("\"", escChar="\\"), integer = Combine(basic_int), float = Combine(basic_int + "." + Word(nums)), datetime = Regex(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z"), boolean = Keyword("true") | Keyword("false"), array = Forward(), ) pure_array = Or(delimitedList(type_) for type_ in types.values()) types["array"] << Group(Suppress("[") + Optional(pure_array) + Suppress("]")) value = Or(type_ for type_ in types.values()) keyvalue = key_name + Suppress("=") + value + Suppress(LineEnd()) keygroup_namespace = kgrp_name + ZeroOrMore(Suppress(".") + kgrp_name) keygroup = "[" + keygroup_namespace + "]" + LineEnd() comments = pythonStyleComment self._toplevel = ZeroOrMore(keyvalue | keygroup) self._toplevel.ignore(comments) for k, v in types.items(): v.setParseAction(getattr(self, "_parse_"+k)) keyvalue.setParseAction(self._parse_keyvalue) keygroup_namespace.setParseAction(self._parse_keygroup_namespace) def _parse_string(self, src, loc, toks): match = re.search(r"(?<!\\)(\\[^0tnr\"\\])", toks[0]) if match: raise ParseException("Reserved escape sequence \"%s\"" % match.group(), loc) return unescape(toks[0]) _parse_integer = lambda self, tok: int(tok[0]) _parse_float = lambda self, tok: float(tok[0]) _parse_boolean = lambda self, tok: bool(tok[0]) ISO8601 = "%Y-%m-%dT%H:%M:%SZ" def _parse_datetime(self, src, loc, toks): try: return datetime.strptime(toks[0], self.ISO8601) except ValueError: # this informative error message will never make it out because # pyparsing catches ParseBaseException and reraises on its own. # oh well. raise ParseException("invalid datetime \"%s\"" % toks[0], loc) _parse_array = lambda self, tok: [tok[0]] def _parse_keyvalue(self, s, loc, toks): k, v = toks.asList() if k in self._cur: raise ParseException("key %s already exists" % k, loc) self._cur[k] = v def _parse_keygroup_namespace(self, s, loc, toks): cur = self._root for subname in toks: subspace = cur.get(subname, {}) if not isinstance(subspace, dict): raise ParseException("key %s already exists" % subname, loc) cur = cur.setdefault(subname, subspace) self._cur = cur def parse(self, s): self._root = {} self._cur = self._root self._toplevel.parseWithTabs() self._toplevel.parseString(s, parseAll=True) return self._root
def __init__(self, processor, baseiri, strict=False): """ See class docstring. """ # pylint: disable=R0914,R0915 self.reset(processor, baseiri, strict) PrefixedName = PNAME_LN | PNAME_NS Iri = IRIREF | PrefixedName BNode = BLANK_NODE_LABEL | ANON RDFLiteral = STRING + Optional(LANGTAG("langtag") | Group(Suppress("^^") + Iri)("datatype")) Object = Forward() Collection = Suppress("(") + ZeroOrMore(Object) + Suppress(")") PredicateObjectList = Forward() BlankNodePropertyList = Suppress("[") + PredicateObjectList + Suppress("]") TtlLiteral = RDFLiteral | NUMERIC_LITERAL | BOOLEAN_LITERAL Subject = Iri | BNode | Collection | VARIABLE # added for LD Patch Predicate = Iri Object << ( # pylint: disable=W0104 Iri | BNode | Collection | BlankNodePropertyList | TtlLiteral | VARIABLE ) # added for LD Patch Verb = Predicate | Keyword("a") ObjectList = Group(Object + ZeroOrMore(COMMA + Object)) PredicateObjectList << ( # pylint: disable=W0106 Verb + ObjectList + ZeroOrMore(SEMICOLON + Optional(Verb + ObjectList)) ) Triples = (Subject + PredicateObjectList) | (BlankNodePropertyList + Optional(PredicateObjectList)) Value = Iri | TtlLiteral | VARIABLE InvPredicate = Suppress("^") + Predicate Step = Suppress("/") + (Predicate | InvPredicate | INDEX) Filter = Forward() Constraint = Filter | UNICITY_CONSTRAINT Path = Group(OneOrMore(Step | Constraint)) Filter << ( Suppress("[") # pylint: disable=W0106 + Group(ZeroOrMore(Step | Constraint))("path") # = Path (*) + Optional(Suppress("=") + Object)("value") + Suppress("]") ) # (*) we can not reuse the Path rule defined above, # because we want to set a name for that component Turtle = Triples + ZeroOrMore(PERIOD + Triples) + Optional(PERIOD) Graph = Suppress("{") + Optional(Turtle) + Suppress("}") Prefix = Literal("@prefix") + PNAME_NS + IRIREF + PERIOD if not strict: SparqlPrefix = CaselessKeyword("prefix") + PNAME_NS + IRIREF Prefix = Prefix | SparqlPrefix Bind = BIND_CMD + VARIABLE + Value + Optional(Path) + PERIOD Add = ADD_CMD + Graph + PERIOD AddNew = ADDNEW_CMD + Graph + PERIOD Delete = DELETE_CMD + Graph + PERIOD DeleteExisting = DELETEEXISTING_CMD + Graph + PERIOD Cut = CUT_CMD + VARIABLE + PERIOD UpdateList = UPDATELIST_CMD + Subject + Predicate + SLICE + Collection + PERIOD Statement = Prefix | Bind | Add | AddNew | Delete | DeleteExisting | Cut | UpdateList Patch = ZeroOrMore(Statement) if not strict: Patch.ignore("#" + restOfLine) # Comment Patch.parseWithTabs() self.grammar = Patch IRIREF.setParseAction(self._parse_iri) PrefixedName.setParseAction(self._parse_pname) RDFLiteral.setParseAction(self._parse_turtleliteral) Collection.setParseAction(self._parse_collection) BlankNodePropertyList.setParseAction(self._parse_bnpl) Verb.setParseAction(self._parse_verb) ObjectList.setParseAction(self._parse_as_list) Triples.setParseAction(self._parse_tss) InvPredicate.setParseAction(self._parse_invpredicate) Filter.setParseAction(self._parse_filter) Path.setParseAction(self._parse_as_list) Prefix.setParseAction(self._do_prefix) Bind.setParseAction(self._do_bind) Add.setParseAction(self._do_add) AddNew.setParseAction(self._do_add_new) Delete.setParseAction(self._do_delete) DeleteExisting.setParseAction(self._do_delete_existing) Cut.setParseAction(self._do_cut) UpdateList.setParseAction(self._do_updatelist)