def parse(self, f, bnode_context=None): """ Parse f as an N-Triples file. :type f: :term:`file object` :param f: the N-Triples source :type bnode_context: `dict`, optional :param bnode_context: a dict mapping blank node identifiers (e.g., ``a`` in ``_:a``) to `~rdflib.term.BNode` instances. An empty dict can be passed in to define a distinct context for a given call to `parse`. """ if not hasattr(f, "read"): raise ParseError("Item to parse must be a file-like object.") if not hasattr(f, "encoding") and not hasattr(f, "charbuffer"): # someone still using a bytestream here? f = codecs.getreader("utf-8")(f) self.file = f self.buffer = "" while True: self.line = self.readline() if self.line is None: break try: self.parseline(bnode_context=bnode_context) except ParseError: raise ParseError("Invalid line: {}".format(self.line)) return self.sink
def eat(self, pattern): m = pattern.match(self.line) if not m: # @@ Why can't we get the original pattern? # print(dir(pattern)) # print repr(self.line), type(self.line) raise ParseError("Failed to eat %s at %s" % (pattern.pattern, self.line)) self.line = self.line[m.end() :] return m
def parsestring(self, s, **kwargs): """Parse s as an N-Triples string.""" if not isinstance(s, (str, bytes, bytearray)): raise ParseError("Item to parse must be a string instance.") if isinstance(s, (bytes, bytearray)): f = codecs.getreader("utf-8")(BytesIO(s)) else: f = StringIO(s) self.parse(f, **kwargs)
def unquote(s): """Unquote an N-Triples string.""" if not validate: if isinstance(s, str): # nquads s = decodeUnicodeEscape(s) else: s = s.decode("unicode-escape") return s else: result = [] while s: m = r_safe.match(s) if m: s = s[m.end():] result.append(m.group(1)) continue m = r_quot.match(s) if m: s = s[2:] result.append(quot[m.group(1)]) continue m = r_uniquot.match(s) if m: s = s[m.end():] u, U = m.groups() codepoint = int(u or U, 16) if codepoint > 0x10FFFF: raise ParseError("Disallowed codepoint: %08X" % codepoint) result.append(chr(codepoint)) elif s.startswith("\\"): raise ParseError("Illegal escape at: %s..." % s[:10]) else: raise ParseError("Illegal literal character: %r" % s[0]) return "".join(result)
def parseline(self, bnode_context=None): self.eat(r_wspace) if (not self.line) or self.line.startswith("#"): return # The line is empty or a comment subject = self.subject(bnode_context) self.eat(r_wspaces) predicate = self.predicate() self.eat(r_wspaces) object_ = self.object(bnode_context) self.eat(r_tail) if self.line: raise ParseError("Trailing garbage: {}".format(self.line)) self.sink.triple(subject, predicate, object_)
def literal(self): if self.peek('"'): lit, lang, dtype = self.eat(r_literal).groups() if lang: lang = lang else: lang = None if dtype: dtype = unquote(dtype) dtype = uriquote(dtype) dtype = URI(dtype) else: dtype = None if lang and dtype: raise ParseError("Can't have both a language and a datatype") lit = unquote(lit) return Literal(lit, lang, dtype) return False
def object(self, bnode_context=None): objt = self.uriref() or self.nodeid(bnode_context) or self.literal() if objt is False: raise ParseError("Unrecognised object type") return objt
def predicate(self): pred = self.uriref() if not pred: raise ParseError("Predicate must be uriref") return pred
def subject(self, bnode_context=None): # @@ Consider using dictionary cases subj = self.uriref() or self.nodeid(bnode_context) if not subj: raise ParseError("Subject must be uriref or nodeID") return subj