Ejemplo n.º 1
0
def _nq_row(triple, context):
    if isinstance(triple[2], Literal):
        return "%s %s %s %s .\n" % (triple[0].n3(), triple[1].n3(),
                                    _quoteLiteral(triple[2]), context.n3())
    else:
        return "%s %s %s %s .\n" % (triple[0].n3(), triple[1].n3(),
                                    triple[2].n3(), context.n3())
Ejemplo n.º 2
0
def _nq_row(triple, context):
    if isinstance(triple[2], Literal):
        return u"%s %s %s %s .\n" % (
            triple[0].n3(), triple[1].n3(),
            _xmlcharref_encode(_quoteLiteral(triple[2])), context.n3())
    else:
        return u"%s %s %s %s .\n" % (triple[0].n3(), triple[1].n3(),
                                     _xmlcharref_encode(
                                         triple[2].n3()), context.n3())
Ejemplo n.º 3
0
def _nq_row(triple,context):
    if isinstance(triple[2], Literal): 
        return u"%s %s %s %s .\n" % (triple[0].n3(),
                                triple[1].n3(),
                                _xmlcharref_encode(_quoteLiteral(triple[2])), 
                                context.n3())
    else: 
        return u"%s %s %s %s .\n" % (triple[0].n3(),
                                triple[1].n3(),
                                _xmlcharref_encode(triple[2].n3()), 
                                context.n3())
Ejemplo n.º 4
0
def n3(node: Union[URIRef, BNode, Literal]) -> str:
    """Method takes an RDFLib node of type BNode, URIRef, or Literal and serializes it to meet the RDF 1.1 NTriples
    format.

    Src: https://github.com/RDFLib/rdflib/blob/c11f7b503b50b7c3cdeec0f36261fa09b0615380/rdflib/plugins/serializers/nt.py

    Args:
        node: An RDFLib

    Returns:
        serialized_node: A string containing the serialized
    """

    if isinstance(node, Literal): serialized_node = "%s" % _quoteLiteral(node)
    else: serialized_node = "%s" % node.n3()

    return serialized_node
Ejemplo n.º 5
0
def serializeQuad(g):
    """
    replacement for graph.serialize(format='nquads')

    Still broken in rdflib 4.2.2: graph.serialize(format='nquads')
    returns empty string for my graph in
    TestGraphFromQuads.testSerializes.
    """
    out = []
    for s, p, o, c in g.quads((None, None, None)):
        if isinstance(c, Graph):
            # still not sure why this is Graph sometimes,
            # already URIRef other times
            c = c.identifier
        if '[' in c.n3():
            raise ValueError(str(c))
        ntObject = _quoteLiteral(o) if isinstance(o, Literal) else o.n3()
        out.append("%s %s %s %s .\n" % (s.n3(), p.n3(), ntObject, c.n3()))
    return ''.join(out)
Ejemplo n.º 6
0
 def _statement_encode(self, xxx_todo_changeme, context):
     """helper function to encode triples to sesame statement uri's"""
     (s, p, o) = xxx_todo_changeme
     query = {}
     url = self.url + '/statements'
     if s:
         query['subj'] = s.n3().encode('utf8')
     if p:
         query['pred'] = p.n3()
     if o:
         query['obj'] = _quoteLiteral(o.n3())
         # o.n3()
         # quote_plus(o.n3().encode("utf-8"))
     if context:
         # TODO FIXME what about bnodes like _:adf23123
         query['context'] = "<%s>" % context
     if query:
         url = url + "?" + urlencode(query)
     return url
Ejemplo n.º 7
0
 def _statement_encode(self, xxx_todo_changeme, context):
     """helper function to encode triples to sesame statement uri's"""
     (s, p, o) = xxx_todo_changeme
     query = {}
     url = self.url + '/statements'
     if s:
         query['subj'] = s.n3().encode('utf8')
     if p:
         query['pred'] = p.n3()
     if o:
         query['obj'] = _quoteLiteral(o.n3())
         # o.n3()
         # quote_plus(o.n3().encode("utf-8"))
     if context:
         # TODO FIXME what about bnodes like _:adf23123
         query['context'] = "<%s>" % context
     if query:
         url = url + "?" + urlencode(query)
     return url
Ejemplo n.º 8
0
    def load(self, file_name, if_exists=ACTION_SKIP, method=METHOD_UPLOAD, batch_size=20):
        file_path = os.path.abspath(file_name)
        exists = os.path.isfile(file_path)
        print("Trying to load {}".format(file_path))

        if not exists:
            return False, "{} file not found".format(file_path)

        _ , file_extension = os.path.splitext(file_path)
        file_extension = file_extension.strip('.')
        if file_extension not in ['nt', 'ttl']:
            return False, "Invalid extension {} : only ttl and nt files are supported now".format(file_extension)

        # Load the TTL file
        g = Graph()
        g.parse(file_path, format=file_extension)
        print("Parsed. Size {}".format(len(g)))
        # Check if it contains the mandatory dataset definition
        # TODO: This should be expanded to check also if author name,
        #      title, description, and licence are defined
        results = g.triples( (None, RDF.type, DATASET_TYPE_URI) )
        found = False
        dataset_uri = None

        for triple in results:
            if not found:
                found = True
                dataset_uri, _ , _ = triple
            else :
                return False, "Multiple declarations of type {} . Only one expected".format(DATASET_TYPE_URI)

        if not found:
            return False, "Missing declarations of type {} . One expected".format(DATASET_TYPE_URI)

        try:
            if not self.exists(dataset_uri):
                print("Creating graph {}".format(dataset_uri))
                success, message = self.create(dataset_uri)
                if not success:
                    return False, message
            else :
                print("Dataset {} exists".format(dataset_uri))
                if if_exists == ACTION_SKIP:
                    return True, "Skipping import"
                if if_exists == ACTION_DELETE:
                    success, message = self.delete(dataset_uri)
                    if not success:
                        return False, message

                    success, message = self.create(dataset_uri)
                    if not success:
                        return False, message

                    print("Deleted contents of {}".format(dataset_uri))

        except Unauthorized as err :
            return False, "Failed to connect to the data: access not allowed"
        except urllib.error.HTTPError as err:
            return False, "Failed to connect to the data: {}".format(err)

        success, message = True, "OK"
        if method == METHOD_UPLOAD and len(g) < 1000:
            print("Uploading contents of {}".format(file_path))
            files = { 'file': (os.path.basename(file_path), open(file_path,'rb'), 'text/turtle'), }
            response = requests.post("{}?graph={}".format(self.upload_endpoint, dataset_uri),
                                        files=files,
                                        auth=(self.endpoint_user, self.endpoint_pwd)
                                    )
            success, message = response.status_code in [200, 201], response.text
        else :
            print("Serializing iterative insertions")
            triples=[]
            count_inserted=0
            for sb,pr,obj in g:
                if isinstance(obj, Literal):
                    triples.append("{} {} {}".format(sb.n3(), pr.n3(), _quoteLiteral(obj)))
                else:
                    triples.append("{} {} {}".format(sb.n3(), pr.n3(), obj.n3()))

                if len(triples) >= batch_size:
                    success, message = self.insert(dataset_uri, triples)
                    if not success:
                        return False, message
                    count_inserted+=1
                    triples=[]

                    if count_inserted%10 == 1:
                        print("Inserted {} ".format(count_inserted*batch_size))
                        sleep(3)

            if len(triples) > 0:
                success, message = self.insert(dataset_uri, triples)

        return success, message