def evaluate(self, bindings: InputBindings = dict(), bnode_suffix: Tuple[int, int] = (0, 0), as_nt: bool = False) -> Iterable[ExpansionResults]: """Evaluate the argument using an optional set of bindings. Args: * bindings: set of bindings used for evaluation. * bnode_suffix: Pair of suffixes used for creating unique blank nodes. * as_nt: True if the RDF triples produced should be in n-triples format, False to use the rdflib format. Yields: RDF triples, in rdflib or n-triples format. """ term = self._value if type(term) == BNode and bnode_suffix is not None: term = BNode(f"{term}_{bnode_suffix[0]}_{bnode_suffix[1]}") return term.n3() if as_nt else term
class rdfSubject(object): db = ConjunctiveGraph() """Default graph for access to instances of this type""" rdf_type = None """rdf:type of instances of this class""" def __init__(self, resUri=None, **kwargs): """The constructor tries hard to do return you an rdfSubject :param resUri: the "resource uri". If `None` then create an instance with a BNode resUri. Can be given as one of: * an instance of an rdfSubject * an instance of a BNode or a URIRef * an n3 uriref string like: "<urn:isbn:1234567890>" * an n3 bnode string like: "_:xyz1234" :param kwargs: is a set of values that will be set using the keys to find the appropriate descriptor""" if not resUri: # create a bnode self.resUri = BNode() if self.rdf_type: self.db.add((self.resUri, RDF.type, self.rdf_type)) elif isinstance(resUri, (BNode, URIRef)): # use the identifier passed self.resUri = resUri if self.rdf_type \ and not list(self.db.triples( (self.resUri, RDF.type, self.rdf_type))): self.db.add((self.resUri, RDF.type, self.rdf_type)) elif isinstance(resUri, rdfSubject): # use the resUri of the subject self.resUri = resUri.resUri self.db = resUri.db elif isinstance(resUri, (str, unicode)): # create one from a <uri> or if resUri[0] == "<" and resUri[-1] == ">": # _:bnode string self.resUri = URIRef(resUri[1:-1]) elif resUri.startswith("_:"): self.resUri = BNode(resUri[2:]) if self.rdf_type: self.db.add((self.resUri, RDF.type, self.rdf_type)) else: raise AttributeError("cannot construct rdfSubject from %s" % (str(resUri))) if kwargs: self._set_with_dict(kwargs) def n3(self): """n3 repr of this node""" return self.resUri.n3() @classmethod def _getdescriptor(cls, key): """__get_descriptor returns the descriptor for the key. It essentially cls.__dict__[key] with recursive calls to super""" # NOT SURE if mro is the way to do this or if we should call # super() or bases? for kls in cls.mro(): if key in kls.__dict__: return kls.__dict__[key] raise AttributeError("descriptor %s not found for class %s" % (key, cls)) # short term hack. Need to go to a sqlalchemy 0.4 style query method # obj.query.get_by should map to obj.get_by ..same for fetch_by @classmethod def query(cls): return cls @classmethod def get_by(cls, **kwargs): """Class Method, returns a single instance of the class by a single kwarg. the keyword must be a descriptor of the class. example: .. code-block:: python bigBlue = Company.get_by(symbol='IBM') :Note: the keyword should map to an rdf predicate that is of type owl:InverseFunctional""" if len(kwargs) != 1: raise ValueError("get_by wanted exactly 1 but got %i args\n" + "Maybe you wanted filter_by" % (len(kwargs))) key, value = kwargs.items()[0] if isinstance(value, (URIRef, BNode, Literal)): o = value else: o = Literal(value) pred = cls._getdescriptor(key).pred uri = cls.db.value(None, pred, o) if uri: return cls(uri) else: raise LookupError("%s = %s not found" % (key, value)) @classmethod def filter_by(cls, **kwargs): """Class method returns a generator over classs instances meeting the kwargs conditions. Each keyword must be a class descriptor filter by RDF.type == cls.rdf_type is implicit Order helps, the first keyword should be the most restrictive """ filters = [] for key, value in kwargs.items(): pred = cls._getdescriptor(key).pred # try to make the value be OK for the triple query as an object if isinstance(value, Identifier): obj = value else: obj = Literal(value) filters.append((pred, obj)) # make sure we filter by type if not (RDF.type, cls.rdf_type) in filters: filters.append((RDF.type, cls.rdf_type)) pred, obj = filters[0] log.debug("Checking %s, %s" % (pred, obj)) for sub in cls.db.subjects(pred, obj): log.debug("maybe %s" % sub) for pred, obj in filters[1:]: log.debug("Checking %s, %s" % (pred, obj)) try: cls.db.triples((sub, pred, obj)).next() except: log.warn("No %s" % sub) break else: yield cls(sub) @classmethod def ClassInstances(cls): """return a generator for instances of this rdf:type you can look in MyClass.rdf_type to see the predicate being used""" beenthere = set([]) for i in cls.db.subjects(RDF.type, cls.rdf_type): if not i in beenthere: yield cls(i) beenthere.add(i) @classmethod def GetRandom(cls): """for develoment just returns a random instance of this class""" from random import choice xii = list(cls.ClassInstances()) return choice(xii) def __hash__(self): return hash("ranD0Mi$h_" + self.n3()) def __cmp__(self, other): if other is None: return False else: return cmp(self.n3(), other.n3()) def __repr__(self): return """%s('%s')""" % (self.__class__.__name__, self.n3().encode('utf-8')) if rdflibversion.startswith('2'): def __str__(self): return str(self.resUri) def __getitem__(self, pred): log.debug("Getting with __getitem__ %s for %s" % (pred, self.n3())) val = self.db.value(self.resUri, pred) if isinstance(val, Literal): val = val.toPython() elif isinstance(val, (BNode, URIRef)): val = rdfSubject(val) return val def __delitem__(self, pred): log.debug("Deleting with __delitem__ %s for %s" % (pred, self)) for s, p, o in self.db.triples((self.resUri, pred, None)): self.db.remove((s, p, o)) # finally if the object in the triple was a bnode # cascade delete the thing it referenced # ?? FIXME Do we really want to cascade if it's an rdfSubject?? if isinstance(o, (BNode, rdfSubject)): rdfSubject(o)._remove(db=self.db, cascade='bnode') def _set_with_dict(self, kv): """ :param kv: a dict for each key,value pair in dict kv set self.key = value """ for key, value in kv.items(): descriptor = self.__class__._getdescriptor(key) descriptor.__set__(self, value) def _remove(self, db=None, cascade='bnode', bnodeCheck=True, objectCascade=False): """ Remove all triples where this rdfSubject is the subject of the triple :param db: limit the remove operation to this graph :param cascade: must be one of: * none -- remove none * bnode -- (default) remove all unreferenced bnodes * all -- remove all unreferenced bnode(s) AND uri(s) :param bnodeCheck: boolean * True -- (default) check bnodes and raise exception if there are still references to this node * False -- do not check. This can leave orphaned object reference in triples. Use only if you are resetting the value in the same transaction :param objectCascade: boolean * False -- (default) do nothing * True -- delete also all triples where this refSubject is the object of the triple. """ noderef = self.resUri log.debug("Called remove on %s" % self) if not db: db = self.db # we cannot delete a bnode if it is still referenced, # i.e. if it is the o of a s,p,o if bnodeCheck and isinstance(noderef, BNode): for s, p, o in db.triples((None, None, noderef)): raise RDFAlchemyError( "Cannot delete BNode %s because %s still references it" % (noderef.n3(), s.n3())) # determine an appropriate test for cascade decisions if cascade == 'bnode': # we cannot delete a bnode if there are still references to it def test(node): if isinstance(node, (URIRef, Literal)): return False for s, p, o in db.triples((None, None, node)): return False return True elif cascade == 'none': def f1(node): return False test = f1 elif cascade == 'all': def f2(node): if isinstance(node, Literal): return False for s, p, o in db.triples((None, None, node)): return False return True test = f2 else: raise AttributeError("unknown cascade argument") for s, p, o in db.triples((noderef, None, None)): db.remove((s, p, o)) if test(o): rdfSubject(o)._remove(db=db, cascade=cascade) if objectCascade: for s, p, o in db.triples((None, None, noderef)): db.remove((s, p, o)) def _rename(self, name, db=None): """rename a node """ if not db: db = self.db if not (isinstance(name, (BNode, URIRef))): raise AttributeError("cannot rename to %s" % name) for s, p, o in db.triples((self.resUri, None, None)): db.remove((s, p, o)) db.add((name, p, o)) for s, p, o in db.triples((None, None, self.resUri)): db.set((s, p, name)) self.resUri = name def _ppo(self, db=None): """Like pretty print... Return a 'pretty predicate,object' of self returning all predicate object pairs with qnames""" db = db or self.db for p, o in db.predicate_objects(self.resUri): print "%20s = %s" % (db.qname(p), str(o)) print " " def md5_term_hash(self): """Not sure what good this method is but it's defined for rdflib.Identifiers so it's here for now""" return self.resUri.md5_term_hash()
class rdfSubject(object): db = ConjunctiveGraph() """Default graph for access to instances of this type""" rdf_type = None """rdf:type of instances of this class""" def __init__(self, resUri=None, **kwargs): """The constructor tries hard to do return you an rdfSubject :param resUri: the "resource uri". If `None` then create an instance with a BNode resUri. Can be given as one of: * an instance of an rdfSubject * an instance of a BNode or a URIRef * an n3 uriref string like: "<urn:isbn:1234567890>" * an n3 bnode string like: "_:xyz1234" :param kwargs: is a set of values that will be set using the keys to find the appropriate descriptor""" if not resUri: # create a bnode self.resUri = BNode() if self.rdf_type: self.db.add((self.resUri, RDF.type, self.rdf_type)) elif isinstance(resUri, (BNode, URIRef)): # use the identifier passed self.resUri = resUri if self.rdf_type \ and not list(self.db.triples( (self.resUri, RDF.type, self.rdf_type))): self.db.add((self.resUri, RDF.type, self.rdf_type)) elif isinstance(resUri, rdfSubject): # use the resUri of the subject self.resUri = resUri.resUri self.db = resUri.db elif isinstance(resUri, (str, unicode)): # create one from a <uri> or if resUri[0] == "<" and resUri[-1] == ">": # _:bnode string self.resUri = URIRef(resUri[1:-1]) elif resUri.startswith("_:"): self.resUri = BNode(resUri[2:]) if self.rdf_type: self.db.add((self.resUri, RDF.type, self.rdf_type)) else: raise AttributeError("cannot construct rdfSubject from %s" % ( str(resUri))) if kwargs: self._set_with_dict(kwargs) def n3(self): """n3 repr of this node""" return self.resUri.n3() @classmethod def _getdescriptor(cls, key): """__get_descriptor returns the descriptor for the key. It essentially cls.__dict__[key] with recursive calls to super""" # NOT SURE if mro is the way to do this or if we should call # super() or bases? for kls in cls.mro(): if key in kls.__dict__: return kls.__dict__[key] raise AttributeError( "descriptor %s not found for class %s" % (key, cls)) # short term hack. Need to go to a sqlalchemy 0.4 style query method # obj.query.get_by should map to obj.get_by ..same for fetch_by @classmethod def query(cls): return cls @classmethod def get_by(cls, **kwargs): """Class Method, returns a single instance of the class by a single kwarg. the keyword must be a descriptor of the class. example: .. code-block:: python bigBlue = Company.get_by(symbol='IBM') :Note: the keyword should map to an rdf predicate that is of type owl:InverseFunctional""" if len(kwargs) != 1: raise ValueError( "get_by wanted exactly 1 but got %i args\n" + "Maybe you wanted filter_by" % (len(kwargs))) key, value = kwargs.items()[0] if isinstance(value, (URIRef, BNode, Literal)): o = value else: o = Literal(value) pred = cls._getdescriptor(key).pred uri = cls.db.value(None, pred, o) if uri: return cls(uri) else: raise LookupError("%s = %s not found" % (key, value)) @classmethod def filter_by(cls, **kwargs): """Class method returns a generator over classs instances meeting the kwargs conditions. Each keyword must be a class descriptor filter by RDF.type == cls.rdf_type is implicit Order helps, the first keyword should be the most restrictive """ filters = [] for key, value in kwargs.items(): pred = cls._getdescriptor(key).pred # try to make the value be OK for the triple query as an object if isinstance(value, Identifier): obj = value else: obj = Literal(value) filters.append((pred, obj)) # make sure we filter by type if not (RDF.type, cls.rdf_type) in filters: filters.append((RDF.type, cls.rdf_type)) pred, obj = filters[0] log.debug("Checking %s, %s" % (pred, obj)) for sub in cls.db.subjects(pred, obj): log.debug("maybe %s" % sub) for pred, obj in filters[1:]: log.debug("Checking %s, %s" % (pred, obj)) try: cls.db.triples((sub, pred, obj)).next() except: log.warn("No %s" % sub) break else: yield cls(sub) @classmethod def ClassInstances(cls): """return a generator for instances of this rdf:type you can look in MyClass.rdf_type to see the predicate being used""" beenthere = set([]) for i in cls.db.subjects(RDF.type, cls.rdf_type): if not i in beenthere: yield cls(i) beenthere.add(i) @classmethod def GetRandom(cls): """for develoment just returns a random instance of this class""" from random import choice xii = list(cls.ClassInstances()) return choice(xii) def __hash__(self): return hash("ranD0Mi$h_" + self.n3()) def __cmp__(self, other): if other is None: return False else: return cmp(self.n3(), other.n3()) def __repr__(self): return """%s('%s')""" % ( self.__class__.__name__, self.n3().encode('utf-8')) if rdflibversion.startswith('2'): def __str__(self): return str(self.resUri) def __getitem__(self, pred): log.debug("Getting with __getitem__ %s for %s" % (pred, self.n3())) val = self.db.value(self.resUri, pred) if isinstance(val, Literal): val = val.toPython() elif isinstance(val, (BNode, URIRef)): val = rdfSubject(val) return val def __delitem__(self, pred): log.debug("Deleting with __delitem__ %s for %s" % (pred, self)) for s, p, o in self.db.triples((self.resUri, pred, None)): self.db.remove((s, p, o)) # finally if the object in the triple was a bnode # cascade delete the thing it referenced # ?? FIXME Do we really want to cascade if it's an rdfSubject?? if isinstance(o, (BNode, rdfSubject)): rdfSubject(o)._remove(db=self.db, cascade='bnode') def _set_with_dict(self, kv): """ :param kv: a dict for each key,value pair in dict kv set self.key = value """ for key, value in kv.items(): descriptor = self.__class__._getdescriptor(key) descriptor.__set__(self, value) def _remove( self, db=None, cascade='bnode', bnodeCheck=True, objectCascade=False): """ Remove all triples where this rdfSubject is the subject of the triple :param db: limit the remove operation to this graph :param cascade: must be one of: * none -- remove none * bnode -- (default) remove all unreferenced bnodes * all -- remove all unreferenced bnode(s) AND uri(s) :param bnodeCheck: boolean * True -- (default) check bnodes and raise exception if there are still references to this node * False -- do not check. This can leave orphaned object reference in triples. Use only if you are resetting the value in the same transaction :param objectCascade: boolean * False -- (default) do nothing * True -- delete also all triples where this refSubject is the object of the triple. """ noderef = self.resUri log.debug("Called remove on %s" % self) if not db: db = self.db # we cannot delete a bnode if it is still referenced, # i.e. if it is the o of a s,p,o if bnodeCheck and isinstance(noderef, BNode): for s, p, o in db.triples((None, None, noderef)): raise RDFAlchemyError( "Cannot delete BNode %s because %s still references it" % ( noderef.n3(), s.n3())) # determine an appropriate test for cascade decisions if cascade == 'bnode': # we cannot delete a bnode if there are still references to it def test(node): if isinstance(node, (URIRef, Literal)): return False for s, p, o in db.triples((None, None, node)): return False return True elif cascade == 'none': def f1(node): return False test = f1 elif cascade == 'all': def f2(node): if isinstance(node, Literal): return False for s, p, o in db.triples((None, None, node)): return False return True test = f2 else: raise AttributeError("unknown cascade argument") for s, p, o in db.triples((noderef, None, None)): db.remove((s, p, o)) if test(o): rdfSubject(o)._remove(db=db, cascade=cascade) if objectCascade: for s, p, o in db.triples((None, None, noderef)): db.remove((s, p, o)) def _rename(self, name, db=None): """rename a node """ if not db: db = self.db if not (isinstance(name, (BNode, URIRef))): raise AttributeError("cannot rename to %s" % name) for s, p, o in db.triples((self.resUri, None, None)): db.remove((s, p, o)) db.add((name, p, o)) for s, p, o in db.triples((None, None, self.resUri)): db.set((s, p, name)) self.resUri = name def _ppo(self, db=None): """Like pretty print... Return a 'pretty predicate,object' of self returning all predicate object pairs with qnames""" db = db or self.db for p, o in db.predicate_objects(self.resUri): print "%20s = %s" % (db.qname(p), str(o)) print " " def md5_term_hash(self): """Not sure what good this method is but it's defined for rdflib.Identifiers so it's here for now""" return self.resUri.md5_term_hash()
def bnode(self, data: str = "") -> str: bnode = BNode() return bnode.n3()
def graph_from_opendatasoft(g, dataset_dict, portal_url): # available: title, description, language, theme, keyword, license, publisher, references # additional: created, issued, creator, contributor, accrual periodicity, spatial, temporal, granularity, data quality identifier = dataset_dict['datasetid'] uri = '{0}/explore/dataset/{1}'.format(portal_url.rstrip('/'), identifier) # dataset subject dataset_ref = URIRef(uri) for prefix, namespace in namespaces.iteritems(): g.bind(prefix, namespace) g.add((dataset_ref, RDF.type, DCAT.Dataset)) # identifier g.add((dataset_ref, DCT.identifier, Literal(identifier))) data = dataset_dict['metas'] # Basic fields items = [ ('title', DCT.title, None), ('description', DCT.description, None), ] _add_triples_from_dict(g, data, dataset_ref, items) # Lists items = [ ('language', DCT.language, None), ('theme', DCAT.theme, None), ('keyword', DCAT.keyword, None), ] _add_list_triples_from_dict(g, data, dataset_ref, items) # publisher publisher_name = data.get('publisher') if publisher_name: # BNode: dataset_ref + DCT.publisher + publisher_name bnode_hash = hashlib.sha1(dataset_ref.n3() + DCT.publisher.n3() + publisher_name) publisher_details = BNode(bnode_hash.hexdigest()) g.add((publisher_details, RDF.type, FOAF.Organization)) g.add((dataset_ref, DCT.publisher, publisher_details)) g.add((publisher_details, FOAF.name, Literal(publisher_name))) # TODO any additional publisher information available? look for fields # Dates items = [ #('metadata_processed', DCT.issued, ['metadata_created']), ('modified', DCT.modified, ['metadata_processed', 'metadata_modified']), ] _add_date_triples_from_dict(g, data, dataset_ref, items) # references references = data.get('references') if references and isinstance(references, basestring) and bool(urlparse.urlparse(references).netloc): references = references.strip() if is_valid_uri(references): g.add((dataset_ref, RDFS.seeAlso, URIRef(references))) else: g.add((dataset_ref, RDFS.seeAlso, Literal(references))) # store licenses for distributions license = data.get('license') # distributions if dataset_dict.get('has_records'): exports = [('csv', 'text/csv'), ('json', 'application/json'), ('xls', 'application/vnd.ms-excel')] if 'geo' in dataset_dict.get('features', []): exports.append(('geojson', 'application/vnd.geo+json')) exports.append(('kml', 'application/vnd.google-earth.kml+xml')) # TODO shape files? # exports.append(('shp', 'application/octet-stream')) for format, mimetype in exports: # URL url = portal_url.rstrip('/') + '/api/records/1.0/download?dataset=' + identifier + '&format=' + format # BNode: dataset_ref + url id_string = dataset_ref.n3() + url bnode_hash = hashlib.sha1(id_string.encode('utf-8')) distribution = BNode(bnode_hash.hexdigest()) g.add((dataset_ref, DCAT.distribution, distribution)) g.add((distribution, RDF.type, DCAT.Distribution)) if is_valid_uri(url): g.add((distribution, DCAT.accessURL, URIRef(url))) else: g.add((distribution, DCAT.accessURL, Literal(url))) # License if license: # BNode: distribution + url id_string = distribution.n3() + license bnode_hash = hashlib.sha1(id_string.encode('utf-8')) l = BNode(bnode_hash.hexdigest()) g.add((distribution, DCT.license, l)) g.add((l, RDF.type, DCT.LicenseDocument)) g.add((l, RDFS.label, Literal(license))) # Format # BNode: distribution + format + mimetype id_string = distribution.n3() + format + mimetype bnode_hash = hashlib.sha1(id_string.encode('utf-8')) f = BNode(bnode_hash.hexdigest()) g.add((distribution, DCT['format'], f)) g.add((f, RDF.type, DCT.MediaTypeOrExtent)) g.add((f, RDFS.label, Literal(format))) g.add((f, RDF.value, Literal(mimetype))) g.add((distribution, DCAT.mediaType, Literal(mimetype))) # Dates items = [ #('issued', DCT.issued, None), ('data_processed', DCT.modified, None), ] _add_date_triples_from_dict(g, data, distribution, items) # attachments for attachment in dataset_dict.get('attachments', []): # BNode: dataset_ref + url id_string = dataset_ref.n3() + attachment bnode_hash = hashlib.sha1(id_string.encode('utf-8')) distribution = BNode(bnode_hash.hexdigest()) g.add((dataset_ref, DCAT.distribution, distribution)) g.add((distribution, RDF.type, DCAT.Distribution)) if license: # BNode: distribution + url id_string = distribution.n3() + license bnode_hash = hashlib.sha1(id_string.encode('utf-8')) l = BNode(bnode_hash.hexdigest()) g.add((distribution, DCT.license, l)) g.add((l, RDF.type, DCT.LicenseDocument)) g.add((l, RDFS.label, Literal(license))) # Simple values items = [ ('title', DCT.title, None), ('mimetype', DCT.mediaType, None), ('format', DCT['format'], None), ] _add_triples_from_dict(g, attachment, distribution, items) # URL if attachment.get('id'): url = portal_url.rstrip('/') + '/api/datasets/1.0/' + identifier + '/attachments/' + attachment.get('id') g.add((distribution, DCT.accessURL, Literal(url))) return dataset_ref
def convert_socrata(g, data, portal_url): dataset_ref = None # add additional info if isinstance(data, dict): try: identifier = data['id'] uri = '{0}/dataset/{1}'.format(portal_url.rstrip('/'), identifier) dataset_ref = URIRef(uri) g.add((dataset_ref, RDF.type, DCAT.Dataset)) # identifier g.add((dataset_ref, DCT.identifier, Literal(identifier))) # Basic fields items = [ ('name', DCT.title, None), ('description', DCT.description, None), ('frequency', DCT.accrualPeriodicity, None), ('webUri', DCAT.landingPage, None), ] _add_triples_from_dict(g, data, dataset_ref, items) # Dates if isinstance(data.get('createdAt'), int): # dates are integers created = data.get('createdAt') if created: g.add( (dataset_ref, DCT.issued, Literal(datetime.datetime.utcfromtimestamp(created)))) updated = data.get('metadataUpdatedAt') if not updated: updated = data.get('updatedAt') if updated: g.add( (dataset_ref, DCT.modified, Literal(datetime.datetime.utcfromtimestamp(updated)))) else: # dates are strings items = [('createdAt', DCT.modified, None), ('metadataUpdatedAt', DCT.modified, ['updatedAt'])] _add_date_triples_from_dict(g, data, dataset_ref, items) license = data.get('license') # Lists items = [ ('tags', DCAT.keyword, None), ] _add_list_triples_from_dict(g, data, dataset_ref, items) # owner if 'owner' in data and isinstance( data['owner'], dict) and 'displayName' in data['owner']: owner = data['owner']['displayName'] # add owner as publisher # BNode: dataset_ref + DCT.publisher + owner bnode_hash = hashlib.sha1( (dataset_ref.n3() + DCT.publisher.n3() + owner).encode('utf-8')) publisher_details = BNode(bnode_hash.hexdigest()) g.add((publisher_details, RDF.type, FOAF.Organization)) g.add((dataset_ref, DCT.publisher, publisher_details)) g.add((publisher_details, FOAF.name, Literal(owner))) # author if 'tableAuthor' in data and isinstance( data['tableAuthor'], dict) and 'displayName' in data['tableAuthor']: author = data['tableAuthor']['displayName'] # BNode: dataset_ref + VCARD.fn + author bnode_hash = hashlib.sha1((dataset_ref.n3() + VCARD.fn.n3() + author).encode('utf-8')) contact_details = BNode(bnode_hash.hexdigest()) g.add((contact_details, RDF.type, VCARD.Organization)) g.add((dataset_ref, DCAT.contactPoint, contact_details)) g.add((contact_details, VCARD.fn, Literal(author))) # publisher if 'attribution' in data and data['attribution']: publisher = data['attribution'] publisher_details = get_valid_uri( data.get('attributionLink'), dataset_ref.n3() + DCT.publisher.n3() + publisher) g.add((publisher_details, RDF.type, FOAF.Organization)) g.add((dataset_ref, DCT.publisher, publisher_details)) g.add((publisher_details, FOAF.name, Literal(publisher))) # distributions distribution_endpoint = data.get('dataUri') if not distribution_endpoint: distribution_endpoint = '{0}/resource/{1}'.format( portal_url.rstrip('/'), identifier) if distribution_endpoint: exports = [('csv', 'text/csv'), ('json', 'application/json'), ('xml', 'text/xml')] for format, mimetype in exports: # URL url = distribution_endpoint + '.' + format # BNode: dataset_ref + url id_string = dataset_ref.n3() + url bnode_hash = hashlib.sha1(id_string.encode('utf-8')) distribution = BNode(bnode_hash.hexdigest()) g.add((dataset_ref, DCAT.distribution, distribution)) g.add((distribution, RDF.type, DCAT.Distribution)) if is_valid_uri(url): g.add((distribution, DCAT.accessURL, URIRef(url))) else: g.add((distribution, DCAT.accessURL, Literal(url))) # License if license: # BNode: distribution + url id_string = distribution.n3() + license bnode_hash = hashlib.sha1(id_string.encode('utf-8')) l = BNode(bnode_hash.hexdigest()) g.add((distribution, DCT.license, l)) g.add((l, RDF.type, DCT.LicenseDocument)) g.add((l, RDFS.label, Literal(license))) # Format # BNode: distribution + format + mimetype id_string = distribution.n3() + format + mimetype bnode_hash = hashlib.sha1(id_string.encode('utf-8')) f = BNode(bnode_hash.hexdigest()) g.add((distribution, DCT['format'], f)) g.add((f, RDF.type, DCT.MediaTypeOrExtent)) g.add((f, RDFS.label, Literal(format))) g.add((f, RDF.value, Literal(mimetype))) g.add((distribution, DCAT.mediaType, Literal(mimetype))) # Dates items = [('dataUpdatedAt', DCT.modified, None)] _add_date_triples_from_dict(g, data, distribution, items) except Exception as e: pass return dataset_ref