Exemple #1
0
    def parse(self, f, bnode_context=None):
        """
        Parse f as an N-Triples file.

        :type f: :term:`file object`
        :param f: the N-Triples source
        :type bnode_context: `dict`, optional
        :param bnode_context: a dict mapping blank node identifiers (e.g., ``a`` in ``_:a``)
                              to `~rdflib.term.BNode` instances. An empty dict can be
                              passed in to define a distinct context for a given call to
                              `parse`.
        """

        if not hasattr(f, "read"):
            raise ParseError("Item to parse must be a file-like object.")

        if not hasattr(f, "encoding") and not hasattr(f, "charbuffer"):
            # someone still using a bytestream here?
            f = codecs.getreader("utf-8")(f)

        self.file = f
        self.buffer = ""
        while True:
            self.line = self.readline()
            if self.line is None:
                break
            try:
                self.parseline(bnode_context=bnode_context)
            except ParseError:
                raise ParseError("Invalid line: {}".format(self.line))
        return self.sink
Exemple #2
0
 def eat(self, pattern):
     m = pattern.match(self.line)
     if not m:  # @@ Why can't we get the original pattern?
         # print(dir(pattern))
         # print repr(self.line), type(self.line)
         raise ParseError("Failed to eat %s at %s" % (pattern.pattern, self.line))
     self.line = self.line[m.end() :]
     return m
Exemple #3
0
 def parsestring(self, s, **kwargs):
     """Parse s as an N-Triples string."""
     if not isinstance(s, (str, bytes, bytearray)):
         raise ParseError("Item to parse must be a string instance.")
     if isinstance(s, (bytes, bytearray)):
         f = codecs.getreader("utf-8")(BytesIO(s))
     else:
         f = StringIO(s)
     self.parse(f, **kwargs)
Exemple #4
0
def unquote(s):
    """Unquote an N-Triples string."""
    if not validate:

        if isinstance(s, str):  # nquads
            s = decodeUnicodeEscape(s)
        else:
            s = s.decode("unicode-escape")

        return s
    else:
        result = []
        while s:
            m = r_safe.match(s)
            if m:
                s = s[m.end():]
                result.append(m.group(1))
                continue

            m = r_quot.match(s)
            if m:
                s = s[2:]
                result.append(quot[m.group(1)])
                continue

            m = r_uniquot.match(s)
            if m:
                s = s[m.end():]
                u, U = m.groups()
                codepoint = int(u or U, 16)
                if codepoint > 0x10FFFF:
                    raise ParseError("Disallowed codepoint: %08X" % codepoint)
                result.append(chr(codepoint))
            elif s.startswith("\\"):
                raise ParseError("Illegal escape at: %s..." % s[:10])
            else:
                raise ParseError("Illegal literal character: %r" % s[0])
        return "".join(result)
Exemple #5
0
    def parseline(self, bnode_context=None):
        self.eat(r_wspace)
        if (not self.line) or self.line.startswith("#"):
            return  # The line is empty or a comment

        subject = self.subject(bnode_context)
        self.eat(r_wspaces)

        predicate = self.predicate()
        self.eat(r_wspaces)

        object_ = self.object(bnode_context)
        self.eat(r_tail)

        if self.line:
            raise ParseError("Trailing garbage: {}".format(self.line))
        self.sink.triple(subject, predicate, object_)
Exemple #6
0
 def literal(self):
     if self.peek('"'):
         lit, lang, dtype = self.eat(r_literal).groups()
         if lang:
             lang = lang
         else:
             lang = None
         if dtype:
             dtype = unquote(dtype)
             dtype = uriquote(dtype)
             dtype = URI(dtype)
         else:
             dtype = None
         if lang and dtype:
             raise ParseError("Can't have both a language and a datatype")
         lit = unquote(lit)
         return Literal(lit, lang, dtype)
     return False
Exemple #7
0
 def object(self, bnode_context=None):
     objt = self.uriref() or self.nodeid(bnode_context) or self.literal()
     if objt is False:
         raise ParseError("Unrecognised object type")
     return objt
Exemple #8
0
 def predicate(self):
     pred = self.uriref()
     if not pred:
         raise ParseError("Predicate must be uriref")
     return pred
Exemple #9
0
 def subject(self, bnode_context=None):
     # @@ Consider using dictionary cases
     subj = self.uriref() or self.nodeid(bnode_context)
     if not subj:
         raise ParseError("Subject must be uriref or nodeID")
     return subj