def _nq_row(triple, context): if isinstance(triple[2], Literal): return "%s %s %s %s .\n" % (triple[0].n3(), triple[1].n3(), _quoteLiteral(triple[2]), context.n3()) else: return "%s %s %s %s .\n" % (triple[0].n3(), triple[1].n3(), triple[2].n3(), context.n3())
def _nq_row(triple, context): if isinstance(triple[2], Literal): return u"%s %s %s %s .\n" % ( triple[0].n3(), triple[1].n3(), _xmlcharref_encode(_quoteLiteral(triple[2])), context.n3()) else: return u"%s %s %s %s .\n" % (triple[0].n3(), triple[1].n3(), _xmlcharref_encode( triple[2].n3()), context.n3())
def _nq_row(triple,context): if isinstance(triple[2], Literal): return u"%s %s %s %s .\n" % (triple[0].n3(), triple[1].n3(), _xmlcharref_encode(_quoteLiteral(triple[2])), context.n3()) else: return u"%s %s %s %s .\n" % (triple[0].n3(), triple[1].n3(), _xmlcharref_encode(triple[2].n3()), context.n3())
def n3(node: Union[URIRef, BNode, Literal]) -> str: """Method takes an RDFLib node of type BNode, URIRef, or Literal and serializes it to meet the RDF 1.1 NTriples format. Src: https://github.com/RDFLib/rdflib/blob/c11f7b503b50b7c3cdeec0f36261fa09b0615380/rdflib/plugins/serializers/nt.py Args: node: An RDFLib Returns: serialized_node: A string containing the serialized """ if isinstance(node, Literal): serialized_node = "%s" % _quoteLiteral(node) else: serialized_node = "%s" % node.n3() return serialized_node
def serializeQuad(g): """ replacement for graph.serialize(format='nquads') Still broken in rdflib 4.2.2: graph.serialize(format='nquads') returns empty string for my graph in TestGraphFromQuads.testSerializes. """ out = [] for s, p, o, c in g.quads((None, None, None)): if isinstance(c, Graph): # still not sure why this is Graph sometimes, # already URIRef other times c = c.identifier if '[' in c.n3(): raise ValueError(str(c)) ntObject = _quoteLiteral(o) if isinstance(o, Literal) else o.n3() out.append("%s %s %s %s .\n" % (s.n3(), p.n3(), ntObject, c.n3())) return ''.join(out)
def _statement_encode(self, xxx_todo_changeme, context): """helper function to encode triples to sesame statement uri's""" (s, p, o) = xxx_todo_changeme query = {} url = self.url + '/statements' if s: query['subj'] = s.n3().encode('utf8') if p: query['pred'] = p.n3() if o: query['obj'] = _quoteLiteral(o.n3()) # o.n3() # quote_plus(o.n3().encode("utf-8")) if context: # TODO FIXME what about bnodes like _:adf23123 query['context'] = "<%s>" % context if query: url = url + "?" + urlencode(query) return url
def _statement_encode(self, xxx_todo_changeme, context): """helper function to encode triples to sesame statement uri's""" (s, p, o) = xxx_todo_changeme query = {} url = self.url + '/statements' if s: query['subj'] = s.n3().encode('utf8') if p: query['pred'] = p.n3() if o: query['obj'] = _quoteLiteral(o.n3()) # o.n3() # quote_plus(o.n3().encode("utf-8")) if context: # TODO FIXME what about bnodes like _:adf23123 query['context'] = "<%s>" % context if query: url = url + "?" + urlencode(query) return url
def load(self, file_name, if_exists=ACTION_SKIP, method=METHOD_UPLOAD, batch_size=20): file_path = os.path.abspath(file_name) exists = os.path.isfile(file_path) print("Trying to load {}".format(file_path)) if not exists: return False, "{} file not found".format(file_path) _ , file_extension = os.path.splitext(file_path) file_extension = file_extension.strip('.') if file_extension not in ['nt', 'ttl']: return False, "Invalid extension {} : only ttl and nt files are supported now".format(file_extension) # Load the TTL file g = Graph() g.parse(file_path, format=file_extension) print("Parsed. Size {}".format(len(g))) # Check if it contains the mandatory dataset definition # TODO: This should be expanded to check also if author name, # title, description, and licence are defined results = g.triples( (None, RDF.type, DATASET_TYPE_URI) ) found = False dataset_uri = None for triple in results: if not found: found = True dataset_uri, _ , _ = triple else : return False, "Multiple declarations of type {} . Only one expected".format(DATASET_TYPE_URI) if not found: return False, "Missing declarations of type {} . One expected".format(DATASET_TYPE_URI) try: if not self.exists(dataset_uri): print("Creating graph {}".format(dataset_uri)) success, message = self.create(dataset_uri) if not success: return False, message else : print("Dataset {} exists".format(dataset_uri)) if if_exists == ACTION_SKIP: return True, "Skipping import" if if_exists == ACTION_DELETE: success, message = self.delete(dataset_uri) if not success: return False, message success, message = self.create(dataset_uri) if not success: return False, message print("Deleted contents of {}".format(dataset_uri)) except Unauthorized as err : return False, "Failed to connect to the data: access not allowed" except urllib.error.HTTPError as err: return False, "Failed to connect to the data: {}".format(err) success, message = True, "OK" if method == METHOD_UPLOAD and len(g) < 1000: print("Uploading contents of {}".format(file_path)) files = { 'file': (os.path.basename(file_path), open(file_path,'rb'), 'text/turtle'), } response = requests.post("{}?graph={}".format(self.upload_endpoint, dataset_uri), files=files, auth=(self.endpoint_user, self.endpoint_pwd) ) success, message = response.status_code in [200, 201], response.text else : print("Serializing iterative insertions") triples=[] count_inserted=0 for sb,pr,obj in g: if isinstance(obj, Literal): triples.append("{} {} {}".format(sb.n3(), pr.n3(), _quoteLiteral(obj))) else: triples.append("{} {} {}".format(sb.n3(), pr.n3(), obj.n3())) if len(triples) >= batch_size: success, message = self.insert(dataset_uri, triples) if not success: return False, message count_inserted+=1 triples=[] if count_inserted%10 == 1: print("Inserted {} ".format(count_inserted*batch_size)) sleep(3) if len(triples) > 0: success, message = self.insert(dataset_uri, triples) return success, message