def parse_string(self, content): class Sink(object): def triple(sink, s, p, o): self.triples.add((s, p, o)) p = ntriples.NTriplesParser(sink=Sink()) p.parsestring(content)
def parse(self, uri): class Sink(object): def triple(sink, s, p, o): self.triples.add((s, p, o)) p = ntriples.NTriplesParser(sink=Sink()) u = urllib.urlopen(uri) p.parse(u) u.close()
def _import_data(src, graph_uri): """Imports RDF/NTriples from a single source, either local file or via URL.""" nt_parser = ntriples.NTriplesParser( sink=HBaseSink(server_port=HBASE_THRIFT_PORT, graph_uri=graph_uri)) sink = nt_parser.parse(src) sink.wrapup( ) # needed as the number of triples can be smaller than batch size (!) src.close() return sink.length
def import_dataset(self, uri): parser = ntriples.NTriplesParser(sink=GKSink()) u = urllib.urlopen(uri) s = parser.parse(u) u.close() for ng in s.ng: print ng print s.ng[ng] self.put_ng(ng, s.ng[ng])
def ntriples_to_dict(file): class TripleDict(dict): def triple(self, s, p, o): if isinstance(s, ntriples.URI): subject = unicode(s) elif isinstance(s, ntriples.bNode): subject = u'_:%s' % s else: raise ValueError('Unknown subject type: %s' % type(s)) predicate = unicode(p) predicates = self.get(subject, {}) values = predicates.get(predicate, []) value = {'value' : unicode(o)} if isinstance(o, ntriples.URI): value['type'] = u'uri' elif isinstance(o, ntriples.bNode): value['type'] = u'bnode' elif isinstance(o, ntriples.Literal): lang, dtype, literal = o.split(' ', 2) value['type'] = u'literal' value['value'] = literal if lang != 'None': value['lang'] = lang elif dtype != 'None': value['datatype'] = dtype else: raise ValueError('Unknown object type: %s' % type(o)) values.append(value) self[subject] = predicates predicates[predicate] = values parser = ntriples.NTriplesParser(TripleDict()) result = parser.parse(file) return dict(result)