コード例 #1
0
ファイル: argument.py プロジェクト: Callidon/pyOTTR
    def evaluate(self, bindings: InputBindings = dict(), bnode_suffix: Tuple[int, int] = (0, 0), as_nt: bool = False) -> Iterable[ExpansionResults]:
        """Evaluate the argument using an optional set of bindings.

        Args:
          * bindings: set of bindings used for evaluation.
          * bnode_suffix: Pair of suffixes used for creating unique blank nodes.
          * as_nt: True if the RDF triples produced should be in n-triples format, False to use the rdflib format.

        Yields:
          RDF triples, in rdflib or n-triples format.
        """
        term = self._value
        if type(term) == BNode and bnode_suffix is not None:
            term = BNode(f"{term}_{bnode_suffix[0]}_{bnode_suffix[1]}")
        return term.n3() if as_nt else term
コード例 #2
0
ファイル: rdfSubject.py プロジェクト: joke2k/RDFAlchemy
class rdfSubject(object):
    db = ConjunctiveGraph()
    """Default graph for access to instances of this type"""
    rdf_type = None
    """rdf:type of instances of this class"""
    def __init__(self, resUri=None, **kwargs):
        """The constructor tries hard to do return you an rdfSubject

        :param resUri: the "resource uri". If `None` then create an instance
        with a BNode resUri. Can be given as one of:

           * an instance of an rdfSubject
           * an instance of a BNode or a URIRef
           * an n3 uriref string like: "<urn:isbn:1234567890>"
           * an n3 bnode string like: "_:xyz1234"
        :param kwargs: is a set of values that will be set using the keys to
        find the appropriate descriptor"""

        if not resUri:  # create a bnode
            self.resUri = BNode()
            if self.rdf_type:
                self.db.add((self.resUri, RDF.type, self.rdf_type))

        elif isinstance(resUri, (BNode, URIRef)):  # use the identifier passed
            self.resUri = resUri
            if self.rdf_type \
                and not list(self.db.triples(
                    (self.resUri, RDF.type, self.rdf_type))):
                self.db.add((self.resUri, RDF.type, self.rdf_type))

        elif isinstance(resUri, rdfSubject):  # use the resUri of the subject
            self.resUri = resUri.resUri
            self.db = resUri.db

        elif isinstance(resUri, (str, unicode)):  # create one from a <uri> or
            if resUri[0] == "<" and resUri[-1] == ">":  # _:bnode string
                self.resUri = URIRef(resUri[1:-1])
            elif resUri.startswith("_:"):
                self.resUri = BNode(resUri[2:])

            if self.rdf_type:
                self.db.add((self.resUri, RDF.type, self.rdf_type))

        else:
            raise AttributeError("cannot construct rdfSubject from %s" %
                                 (str(resUri)))

        if kwargs:
            self._set_with_dict(kwargs)

    def n3(self):
        """n3 repr of this node"""
        return self.resUri.n3()

    @classmethod
    def _getdescriptor(cls, key):
        """__get_descriptor returns the descriptor for the key.
        It essentially cls.__dict__[key] with recursive calls to super"""
        # NOT SURE if mro is the way to do this or if we should call
        # super() or bases?
        for kls in cls.mro():
            if key in kls.__dict__:
                return kls.__dict__[key]
        raise AttributeError("descriptor %s not found for class %s" %
                             (key, cls))

    # short term hack.  Need to go to a sqlalchemy 0.4 style query method
    # obj.query.get_by should map to obj.get_by  ..same for fetch_by
    @classmethod
    def query(cls):
        return cls

    @classmethod
    def get_by(cls, **kwargs):
        """Class Method, returns a single instance of the class
        by a single kwarg.  the keyword must be a descriptor of the
        class.
        example:

        .. code-block:: python

            bigBlue = Company.get_by(symbol='IBM')

        :Note:
            the keyword should map to an rdf predicate
            that is of type owl:InverseFunctional"""
        if len(kwargs) != 1:
            raise ValueError("get_by wanted exactly 1 but got  %i args\n" +
                             "Maybe you wanted filter_by" % (len(kwargs)))
        key, value = kwargs.items()[0]
        if isinstance(value, (URIRef, BNode, Literal)):
            o = value
        else:
            o = Literal(value)
        pred = cls._getdescriptor(key).pred
        uri = cls.db.value(None, pred, o)
        if uri:
            return cls(uri)
        else:
            raise LookupError("%s = %s not found" % (key, value))

    @classmethod
    def filter_by(cls, **kwargs):
        """Class method returns a generator over classs instances
        meeting the kwargs conditions.

        Each keyword must be a class descriptor

        filter by RDF.type == cls.rdf_type is implicit

        Order helps, the first keyword should be the most restrictive
        """
        filters = []
        for key, value in kwargs.items():
            pred = cls._getdescriptor(key).pred
            # try to make the value be OK for the triple query as an object
            if isinstance(value, Identifier):
                obj = value
            else:
                obj = Literal(value)
            filters.append((pred, obj))
        # make sure we filter by type
        if not (RDF.type, cls.rdf_type) in filters:
            filters.append((RDF.type, cls.rdf_type))
        pred, obj = filters[0]
        log.debug("Checking %s, %s" % (pred, obj))
        for sub in cls.db.subjects(pred, obj):
            log.debug("maybe %s" % sub)
            for pred, obj in filters[1:]:
                log.debug("Checking %s, %s" % (pred, obj))
                try:
                    cls.db.triples((sub, pred, obj)).next()
                except:
                    log.warn("No %s" % sub)
                    break
            else:
                yield cls(sub)

    @classmethod
    def ClassInstances(cls):
        """return a generator for instances of this rdf:type
        you can look in MyClass.rdf_type to see the predicate being used"""
        beenthere = set([])
        for i in cls.db.subjects(RDF.type, cls.rdf_type):
            if not i in beenthere:
                yield cls(i)
                beenthere.add(i)

    @classmethod
    def GetRandom(cls):
        """for develoment just returns a random instance of this class"""
        from random import choice
        xii = list(cls.ClassInstances())
        return choice(xii)

    def __hash__(self):
        return hash("ranD0Mi$h_" + self.n3())

    def __cmp__(self, other):
        if other is None:
            return False
        else:
            return cmp(self.n3(), other.n3())

    def __repr__(self):
        return """%s('%s')""" % (self.__class__.__name__,
                                 self.n3().encode('utf-8'))

    if rdflibversion.startswith('2'):

        def __str__(self):
            return str(self.resUri)

    def __getitem__(self, pred):
        log.debug("Getting with __getitem__ %s for %s" % (pred, self.n3()))
        val = self.db.value(self.resUri, pred)
        if isinstance(val, Literal):
            val = val.toPython()
        elif isinstance(val, (BNode, URIRef)):
            val = rdfSubject(val)
        return val

    def __delitem__(self, pred):
        log.debug("Deleting with __delitem__ %s for %s" % (pred, self))
        for s, p, o in self.db.triples((self.resUri, pred, None)):
            self.db.remove((s, p, o))
            # finally if the object in the triple was a bnode
            # cascade delete the thing it referenced
            # ?? FIXME Do we really want to cascade if it's an rdfSubject??
            if isinstance(o, (BNode, rdfSubject)):
                rdfSubject(o)._remove(db=self.db, cascade='bnode')

    def _set_with_dict(self, kv):
        """
        :param kv: a dict

          for each key,value pair in dict kv
               set self.key = value

        """
        for key, value in kv.items():
            descriptor = self.__class__._getdescriptor(key)
            descriptor.__set__(self, value)

    def _remove(self,
                db=None,
                cascade='bnode',
                bnodeCheck=True,
                objectCascade=False):
        """
        Remove all triples where this rdfSubject is the subject of the triple

        :param db: limit the remove operation to this graph
        :param cascade: must be one of:

            * none --  remove none
            * bnode -- (default) remove all unreferenced bnodes
            * all -- remove all unreferenced bnode(s) AND uri(s)

        :param bnodeCheck: boolean

            * True -- (default) check bnodes and raise exception if there are
              still references to this node
            * False --  do not check.  This can leave orphaned object reference
              in triples.  Use only if you are resetting the value in
              the same transaction
        :param objectCascade: boolean
            * False -- (default) do nothing
            * True -- delete also all triples where this refSubject is the
            object of the triple.
        """
        noderef = self.resUri
        log.debug("Called remove on %s" % self)
        if not db:
            db = self.db

        # we cannot delete a bnode if it is still referenced,
        # i.e. if it is the o of a s,p,o
        if bnodeCheck and isinstance(noderef, BNode):
            for s, p, o in db.triples((None, None, noderef)):
                raise RDFAlchemyError(
                    "Cannot delete BNode %s because %s still references it" %
                    (noderef.n3(), s.n3()))

        # determine an appropriate test for cascade decisions
        if cascade == 'bnode':
            # we cannot delete a bnode if there are still references to it
            def test(node):
                if isinstance(node, (URIRef, Literal)):
                    return False
                for s, p, o in db.triples((None, None, node)):
                    return False
                return True
        elif cascade == 'none':

            def f1(node):
                return False

            test = f1
        elif cascade == 'all':

            def f2(node):
                if isinstance(node, Literal):
                    return False
                for s, p, o in db.triples((None, None, node)):
                    return False
                return True

            test = f2
        else:
            raise AttributeError("unknown cascade argument")

        for s, p, o in db.triples((noderef, None, None)):
            db.remove((s, p, o))
            if test(o):
                rdfSubject(o)._remove(db=db, cascade=cascade)

        if objectCascade:
            for s, p, o in db.triples((None, None, noderef)):
                db.remove((s, p, o))

    def _rename(self, name, db=None):
        """rename a node """
        if not db:
            db = self.db
        if not (isinstance(name, (BNode, URIRef))):
            raise AttributeError("cannot rename to %s" % name)
        for s, p, o in db.triples((self.resUri, None, None)):
            db.remove((s, p, o))
            db.add((name, p, o))
        for s, p, o in db.triples((None, None, self.resUri)):
            db.set((s, p, name))
        self.resUri = name

    def _ppo(self, db=None):
        """Like pretty print...
        Return a 'pretty predicate,object' of self
        returning all predicate object pairs with qnames"""
        db = db or self.db
        for p, o in db.predicate_objects(self.resUri):
            print "%20s = %s" % (db.qname(p), str(o))
        print " "

    def md5_term_hash(self):
        """Not sure what good this method is but it's defined for
        rdflib.Identifiers so it's here for now"""
        return self.resUri.md5_term_hash()
コード例 #3
0
ファイル: rdfSubject.py プロジェクト: olberger/RDFAlchemy
class rdfSubject(object):
    db = ConjunctiveGraph()
    """Default graph for access to instances of this type"""
    rdf_type = None
    """rdf:type of instances of this class"""

    def __init__(self, resUri=None, **kwargs):
        """The constructor tries hard to do return you an rdfSubject

        :param resUri: the "resource uri". If `None` then create an instance
        with a BNode resUri. Can be given as one of:

           * an instance of an rdfSubject
           * an instance of a BNode or a URIRef
           * an n3 uriref string like: "<urn:isbn:1234567890>"
           * an n3 bnode string like: "_:xyz1234"
        :param kwargs: is a set of values that will be set using the keys to
        find the appropriate descriptor"""

        if not resUri:  # create a bnode
            self.resUri = BNode()
            if self.rdf_type:
                self.db.add((self.resUri, RDF.type, self.rdf_type))

        elif isinstance(resUri, (BNode, URIRef)):  # use the identifier passed
            self.resUri = resUri
            if self.rdf_type \
                and not list(self.db.triples(
                    (self.resUri, RDF.type, self.rdf_type))):
                self.db.add((self.resUri, RDF.type, self.rdf_type))

        elif isinstance(resUri, rdfSubject):  # use the resUri of the subject
            self.resUri = resUri.resUri
            self.db = resUri.db

        elif isinstance(resUri, (str, unicode)):   # create one from a <uri> or
            if resUri[0] == "<" and resUri[-1] == ">":  # _:bnode string
                self.resUri = URIRef(resUri[1:-1])
            elif resUri.startswith("_:"):
                self.resUri = BNode(resUri[2:])

            if self.rdf_type:
                self.db.add((self.resUri, RDF.type, self.rdf_type))

        else:
            raise AttributeError("cannot construct rdfSubject from %s" % (
                str(resUri)))

        if kwargs:
            self._set_with_dict(kwargs)

    def n3(self):
        """n3 repr of this node"""
        return self.resUri.n3()

    @classmethod
    def _getdescriptor(cls, key):
        """__get_descriptor returns the descriptor for the key.
        It essentially cls.__dict__[key] with recursive calls to super"""
        # NOT SURE if mro is the way to do this or if we should call
        # super() or bases?
        for kls in cls.mro():
            if key in kls.__dict__:
                return kls.__dict__[key]
        raise AttributeError(
            "descriptor %s not found for class %s" % (key, cls))

    # short term hack.  Need to go to a sqlalchemy 0.4 style query method
    # obj.query.get_by should map to obj.get_by  ..same for fetch_by
    @classmethod
    def query(cls):
        return cls

    @classmethod
    def get_by(cls, **kwargs):
        """Class Method, returns a single instance of the class
        by a single kwarg.  the keyword must be a descriptor of the
        class.
        example:

        .. code-block:: python

            bigBlue = Company.get_by(symbol='IBM')

        :Note:
            the keyword should map to an rdf predicate
            that is of type owl:InverseFunctional"""
        if len(kwargs) != 1:
            raise ValueError(
                "get_by wanted exactly 1 but got  %i args\n" +
                "Maybe you wanted filter_by" % (len(kwargs)))
        key, value = kwargs.items()[0]
        if isinstance(value, (URIRef, BNode, Literal)):
            o = value
        else:
            o = Literal(value)
        pred = cls._getdescriptor(key).pred
        uri = cls.db.value(None, pred, o)
        if uri:
            return cls(uri)
        else:
            raise LookupError("%s = %s not found" % (key, value))

    @classmethod
    def filter_by(cls, **kwargs):
        """Class method returns a generator over classs instances
        meeting the kwargs conditions.

        Each keyword must be a class descriptor

        filter by RDF.type == cls.rdf_type is implicit

        Order helps, the first keyword should be the most restrictive
        """
        filters = []
        for key, value in kwargs.items():
            pred = cls._getdescriptor(key).pred
            # try to make the value be OK for the triple query as an object
            if isinstance(value, Identifier):
                obj = value
            else:
                obj = Literal(value)
            filters.append((pred, obj))
        # make sure we filter by type
        if not (RDF.type, cls.rdf_type) in filters:
            filters.append((RDF.type, cls.rdf_type))
        pred, obj = filters[0]
        log.debug("Checking %s, %s" % (pred, obj))
        for sub in cls.db.subjects(pred, obj):
            log.debug("maybe %s" % sub)
            for pred, obj in filters[1:]:
                log.debug("Checking %s, %s" % (pred, obj))
                try:
                    cls.db.triples((sub, pred, obj)).next()
                except:
                    log.warn("No %s" % sub)
                    break
            else:
                yield cls(sub)

    @classmethod
    def ClassInstances(cls):
        """return a generator for instances of this rdf:type
        you can look in MyClass.rdf_type to see the predicate being used"""
        beenthere = set([])
        for i in cls.db.subjects(RDF.type, cls.rdf_type):
            if not i in beenthere:
                yield cls(i)
                beenthere.add(i)

    @classmethod
    def GetRandom(cls):
        """for develoment just returns a random instance of this class"""
        from random import choice
        xii = list(cls.ClassInstances())
        return choice(xii)

    def __hash__(self):
        return hash("ranD0Mi$h_" + self.n3())

    def __cmp__(self, other):
        if other is None:
            return False
        else:
            return cmp(self.n3(), other.n3())

    def __repr__(self):
        return """%s('%s')""" % (
            self.__class__.__name__, self.n3().encode('utf-8'))

    if rdflibversion.startswith('2'):
        def __str__(self):
            return str(self.resUri)

    def __getitem__(self, pred):
        log.debug("Getting with __getitem__ %s for %s" % (pred, self.n3()))
        val = self.db.value(self.resUri, pred)
        if isinstance(val, Literal):
            val = val.toPython()
        elif isinstance(val, (BNode, URIRef)):
            val = rdfSubject(val)
        return val

    def __delitem__(self, pred):
        log.debug("Deleting with __delitem__ %s for %s" % (pred, self))
        for s, p, o in self.db.triples((self.resUri, pred, None)):
            self.db.remove((s, p, o))
            # finally if the object in the triple was a bnode
            # cascade delete the thing it referenced
            # ?? FIXME Do we really want to cascade if it's an rdfSubject??
            if isinstance(o, (BNode, rdfSubject)):
                rdfSubject(o)._remove(db=self.db, cascade='bnode')

    def _set_with_dict(self, kv):
        """
        :param kv: a dict

          for each key,value pair in dict kv
               set self.key = value

        """
        for key, value in kv.items():
            descriptor = self.__class__._getdescriptor(key)
            descriptor.__set__(self, value)

    def _remove(
            self, db=None, cascade='bnode',
            bnodeCheck=True, objectCascade=False):
        """
        Remove all triples where this rdfSubject is the subject of the triple

        :param db: limit the remove operation to this graph
        :param cascade: must be one of:

            * none --  remove none
            * bnode -- (default) remove all unreferenced bnodes
            * all -- remove all unreferenced bnode(s) AND uri(s)

        :param bnodeCheck: boolean

            * True -- (default) check bnodes and raise exception if there are
              still references to this node
            * False --  do not check.  This can leave orphaned object reference
              in triples.  Use only if you are resetting the value in
              the same transaction
        :param objectCascade: boolean
            * False -- (default) do nothing
            * True -- delete also all triples where this refSubject is the
            object of the triple.
        """
        noderef = self.resUri
        log.debug("Called remove on %s" % self)
        if not db:
            db = self.db

        # we cannot delete a bnode if it is still referenced,
        # i.e. if it is the o of a s,p,o
        if bnodeCheck and isinstance(noderef, BNode):
            for s, p, o in db.triples((None, None, noderef)):
                raise RDFAlchemyError(
                    "Cannot delete BNode %s because %s still references it" % (
                    noderef.n3(), s.n3()))

        # determine an appropriate test for cascade decisions
        if cascade == 'bnode':
            # we cannot delete a bnode if there are still references to it
            def test(node):
                if isinstance(node, (URIRef, Literal)):
                    return False
                for s, p, o in db.triples((None, None, node)):
                    return False
                return True
        elif cascade == 'none':

            def f1(node):
                return False
            test = f1
        elif cascade == 'all':

            def f2(node):
                if isinstance(node, Literal):
                    return False
                for s, p, o in db.triples((None, None, node)):
                    return False
                return True
            test = f2
        else:
            raise AttributeError("unknown cascade argument")

        for s, p, o in db.triples((noderef, None, None)):
            db.remove((s, p, o))
            if test(o):
                rdfSubject(o)._remove(db=db, cascade=cascade)

        if objectCascade:
            for s, p, o in db.triples((None, None, noderef)):
                db.remove((s, p, o))

    def _rename(self, name, db=None):
        """rename a node """
        if not db:
            db = self.db
        if not (isinstance(name, (BNode, URIRef))):
            raise AttributeError("cannot rename to %s" % name)
        for s, p, o in db.triples((self.resUri, None, None)):
            db.remove((s, p, o))
            db.add((name, p, o))
        for s, p, o in db.triples((None, None, self.resUri)):
            db.set((s, p, name))
        self.resUri = name

    def _ppo(self, db=None):
        """Like pretty print...
        Return a 'pretty predicate,object' of self
        returning all predicate object pairs with qnames"""
        db = db or self.db
        for p, o in db.predicate_objects(self.resUri):
            print "%20s = %s" % (db.qname(p), str(o))
        print " "

    def md5_term_hash(self):
        """Not sure what good this method is but it's defined for
        rdflib.Identifiers so it's here for now"""
        return self.resUri.md5_term_hash()
コード例 #4
0
 def bnode(self, data: str = "") -> str:
     bnode = BNode()
     return bnode.n3()
コード例 #5
0
def graph_from_opendatasoft(g, dataset_dict, portal_url):
    # available: title, description, language, theme, keyword, license, publisher, references
    # additional: created, issued, creator, contributor, accrual periodicity, spatial, temporal, granularity, data quality

    identifier = dataset_dict['datasetid']
    uri = '{0}/explore/dataset/{1}'.format(portal_url.rstrip('/'), identifier)

    # dataset subject
    dataset_ref = URIRef(uri)
    for prefix, namespace in namespaces.iteritems():
        g.bind(prefix, namespace)

    g.add((dataset_ref, RDF.type, DCAT.Dataset))

    # identifier
    g.add((dataset_ref, DCT.identifier, Literal(identifier)))
    data = dataset_dict['metas']
    # Basic fields
    items = [
        ('title', DCT.title, None),
        ('description', DCT.description, None),
    ]
    _add_triples_from_dict(g, data, dataset_ref, items)

    #  Lists
    items = [
        ('language', DCT.language, None),
        ('theme', DCAT.theme, None),
        ('keyword', DCAT.keyword, None),
    ]
    _add_list_triples_from_dict(g, data, dataset_ref, items)

    # publisher
    publisher_name = data.get('publisher')
    if publisher_name:
        # BNode: dataset_ref + DCT.publisher + publisher_name
        bnode_hash = hashlib.sha1(dataset_ref.n3() + DCT.publisher.n3() + publisher_name)
        publisher_details = BNode(bnode_hash.hexdigest())

        g.add((publisher_details, RDF.type, FOAF.Organization))
        g.add((dataset_ref, DCT.publisher, publisher_details))
        g.add((publisher_details, FOAF.name, Literal(publisher_name)))
        # TODO any additional publisher information available? look for fields

    # Dates
    items = [
        #('metadata_processed', DCT.issued, ['metadata_created']),
        ('modified', DCT.modified, ['metadata_processed', 'metadata_modified']),
    ]
    _add_date_triples_from_dict(g, data, dataset_ref, items)

    # references
    references = data.get('references')
    if references and isinstance(references, basestring) and bool(urlparse.urlparse(references).netloc):
        references = references.strip()
        if is_valid_uri(references):
            g.add((dataset_ref, RDFS.seeAlso, URIRef(references)))
        else:
            g.add((dataset_ref, RDFS.seeAlso, Literal(references)))

    # store licenses for distributions
    license = data.get('license')

    # distributions
    if dataset_dict.get('has_records'):
        exports = [('csv', 'text/csv'), ('json', 'application/json'), ('xls', 'application/vnd.ms-excel')]
        if 'geo' in dataset_dict.get('features', []):
            exports.append(('geojson', 'application/vnd.geo+json'))
            exports.append(('kml', 'application/vnd.google-earth.kml+xml'))
            # TODO shape files?
            # exports.append(('shp', 'application/octet-stream'))
        for format, mimetype in exports:
            # URL
            url = portal_url.rstrip('/') + '/api/records/1.0/download?dataset=' + identifier + '&format=' + format

            # BNode: dataset_ref + url
            id_string = dataset_ref.n3() + url
            bnode_hash = hashlib.sha1(id_string.encode('utf-8'))
            distribution = BNode(bnode_hash.hexdigest())

            g.add((dataset_ref, DCAT.distribution, distribution))
            g.add((distribution, RDF.type, DCAT.Distribution))

            if is_valid_uri(url):
                g.add((distribution, DCAT.accessURL, URIRef(url)))
            else:
                g.add((distribution, DCAT.accessURL, Literal(url)))

            # License
            if license:
                # BNode: distribution + url
                id_string = distribution.n3() + license
                bnode_hash = hashlib.sha1(id_string.encode('utf-8'))
                l = BNode(bnode_hash.hexdigest())

                g.add((distribution, DCT.license, l))
                g.add((l, RDF.type, DCT.LicenseDocument))
                g.add((l, RDFS.label, Literal(license)))

            # Format
            # BNode: distribution + format + mimetype
            id_string = distribution.n3() + format + mimetype
            bnode_hash = hashlib.sha1(id_string.encode('utf-8'))
            f = BNode(bnode_hash.hexdigest())

            g.add((distribution, DCT['format'], f))
            g.add((f, RDF.type, DCT.MediaTypeOrExtent))
            g.add((f, RDFS.label, Literal(format)))
            g.add((f, RDF.value, Literal(mimetype)))
            g.add((distribution, DCAT.mediaType, Literal(mimetype)))


            # Dates
            items = [
                #('issued', DCT.issued, None),
                ('data_processed', DCT.modified, None),
            ]
            _add_date_triples_from_dict(g, data, distribution, items)

    # attachments
    for attachment in dataset_dict.get('attachments', []):
        # BNode: dataset_ref + url
        id_string = dataset_ref.n3() + attachment
        bnode_hash = hashlib.sha1(id_string.encode('utf-8'))
        distribution = BNode(bnode_hash.hexdigest())

        g.add((dataset_ref, DCAT.distribution, distribution))
        g.add((distribution, RDF.type, DCAT.Distribution))
        if license:
            # BNode: distribution + url
            id_string = distribution.n3() + license
            bnode_hash = hashlib.sha1(id_string.encode('utf-8'))
            l = BNode(bnode_hash.hexdigest())

            g.add((distribution, DCT.license, l))
            g.add((l, RDF.type, DCT.LicenseDocument))
            g.add((l, RDFS.label, Literal(license)))

        #  Simple values
        items = [
            ('title', DCT.title, None),
            ('mimetype', DCT.mediaType, None),
            ('format', DCT['format'], None),
        ]
        _add_triples_from_dict(g, attachment, distribution, items)

        # URL
        if attachment.get('id'):
            url = portal_url.rstrip('/') + '/api/datasets/1.0/' + identifier + '/attachments/' + attachment.get('id')
            g.add((distribution, DCT.accessURL, Literal(url)))
    return dataset_ref
コード例 #6
0
def convert_socrata(g, data, portal_url):
    dataset_ref = None
    # add additional info
    if isinstance(data, dict):
        try:
            identifier = data['id']
            uri = '{0}/dataset/{1}'.format(portal_url.rstrip('/'), identifier)
            dataset_ref = URIRef(uri)
            g.add((dataset_ref, RDF.type, DCAT.Dataset))

            # identifier
            g.add((dataset_ref, DCT.identifier, Literal(identifier)))
            # Basic fields
            items = [
                ('name', DCT.title, None),
                ('description', DCT.description, None),
                ('frequency', DCT.accrualPeriodicity, None),
                ('webUri', DCAT.landingPage, None),
            ]
            _add_triples_from_dict(g, data, dataset_ref, items)

            # Dates
            if isinstance(data.get('createdAt'), int):
                # dates are integers
                created = data.get('createdAt')
                if created:
                    g.add(
                        (dataset_ref, DCT.issued,
                         Literal(datetime.datetime.utcfromtimestamp(created))))
                updated = data.get('metadataUpdatedAt')
                if not updated:
                    updated = data.get('updatedAt')
                if updated:
                    g.add(
                        (dataset_ref, DCT.modified,
                         Literal(datetime.datetime.utcfromtimestamp(updated))))
            else:
                # dates are strings
                items = [('createdAt', DCT.modified, None),
                         ('metadataUpdatedAt', DCT.modified, ['updatedAt'])]
                _add_date_triples_from_dict(g, data, dataset_ref, items)

            license = data.get('license')

            #  Lists
            items = [
                ('tags', DCAT.keyword, None),
            ]
            _add_list_triples_from_dict(g, data, dataset_ref, items)

            # owner
            if 'owner' in data and isinstance(
                    data['owner'], dict) and 'displayName' in data['owner']:
                owner = data['owner']['displayName']
                # add owner as publisher
                # BNode: dataset_ref + DCT.publisher + owner
                bnode_hash = hashlib.sha1(
                    (dataset_ref.n3() + DCT.publisher.n3() +
                     owner).encode('utf-8'))
                publisher_details = BNode(bnode_hash.hexdigest())

                g.add((publisher_details, RDF.type, FOAF.Organization))
                g.add((dataset_ref, DCT.publisher, publisher_details))
                g.add((publisher_details, FOAF.name, Literal(owner)))
            # author
            if 'tableAuthor' in data and isinstance(
                    data['tableAuthor'],
                    dict) and 'displayName' in data['tableAuthor']:
                author = data['tableAuthor']['displayName']
                # BNode: dataset_ref + VCARD.fn + author
                bnode_hash = hashlib.sha1((dataset_ref.n3() + VCARD.fn.n3() +
                                           author).encode('utf-8'))
                contact_details = BNode(bnode_hash.hexdigest())

                g.add((contact_details, RDF.type, VCARD.Organization))
                g.add((dataset_ref, DCAT.contactPoint, contact_details))
                g.add((contact_details, VCARD.fn, Literal(author)))
            # publisher
            if 'attribution' in data and data['attribution']:
                publisher = data['attribution']

                publisher_details = get_valid_uri(
                    data.get('attributionLink'),
                    dataset_ref.n3() + DCT.publisher.n3() + publisher)

                g.add((publisher_details, RDF.type, FOAF.Organization))
                g.add((dataset_ref, DCT.publisher, publisher_details))
                g.add((publisher_details, FOAF.name, Literal(publisher)))

            # distributions
            distribution_endpoint = data.get('dataUri')
            if not distribution_endpoint:
                distribution_endpoint = '{0}/resource/{1}'.format(
                    portal_url.rstrip('/'), identifier)
            if distribution_endpoint:
                exports = [('csv', 'text/csv'), ('json', 'application/json'),
                           ('xml', 'text/xml')]
                for format, mimetype in exports:
                    # URL
                    url = distribution_endpoint + '.' + format

                    # BNode: dataset_ref + url
                    id_string = dataset_ref.n3() + url
                    bnode_hash = hashlib.sha1(id_string.encode('utf-8'))
                    distribution = BNode(bnode_hash.hexdigest())

                    g.add((dataset_ref, DCAT.distribution, distribution))
                    g.add((distribution, RDF.type, DCAT.Distribution))

                    if is_valid_uri(url):
                        g.add((distribution, DCAT.accessURL, URIRef(url)))
                    else:
                        g.add((distribution, DCAT.accessURL, Literal(url)))

                    # License
                    if license:
                        # BNode: distribution + url
                        id_string = distribution.n3() + license
                        bnode_hash = hashlib.sha1(id_string.encode('utf-8'))
                        l = BNode(bnode_hash.hexdigest())

                        g.add((distribution, DCT.license, l))
                        g.add((l, RDF.type, DCT.LicenseDocument))
                        g.add((l, RDFS.label, Literal(license)))

                    # Format
                    # BNode: distribution + format + mimetype
                    id_string = distribution.n3() + format + mimetype
                    bnode_hash = hashlib.sha1(id_string.encode('utf-8'))
                    f = BNode(bnode_hash.hexdigest())

                    g.add((distribution, DCT['format'], f))
                    g.add((f, RDF.type, DCT.MediaTypeOrExtent))
                    g.add((f, RDFS.label, Literal(format)))
                    g.add((f, RDF.value, Literal(mimetype)))
                    g.add((distribution, DCAT.mediaType, Literal(mimetype)))

                    # Dates
                    items = [('dataUpdatedAt', DCT.modified, None)]
                    _add_date_triples_from_dict(g, data, distribution, items)
        except Exception as e:
            pass
    return dataset_ref