Ejemplo n.º 1
0
def add_metadata_for_subject(rdf_graph, subject_uri, namespaces, nidm_obj):
    """
    Cycles through triples for a particular subject and adds them to the nidm_obj

    :param rdf_graph: RDF graph object
    :param subject_uri: URI of subject to query for additional metadata
    :param namespaces: Namespaces in NIDM document
    :param nidm_obj: NIDM object to add metadata
    :return: None

    """
    #Cycle through remaining metadata and add attributes
    for predicate, objects in rdf_graph.predicate_objects(subject=subject_uri):
        #if find qualified association
        if predicate == URIRef(Constants.PROV['qualifiedAssociation']):
            #need to get associated prov:Agent uri, add person information to graph
            for agent in rdf_graph.objects(
                    subject=subject_uri,
                    predicate=Constants.PROV['wasAssociatedWith']):
                #add person to graph and also add all metadata
                person = nidm_obj.add_person(uuid=agent)
                #now add metadata for person
                add_metadata_for_subject(rdf_graph=rdf_graph,
                                         subject_uri=agent,
                                         namespaces=namespaces,
                                         nidm_obj=person)

            #get role information
            for bnode in rdf_graph.objects(
                    subject=subject_uri,
                    predicate=Constants.PROV['qualifiedAssociation']):
                #for bnode, query for object which is role?  How?
                #term.BNode.__dict__()

                #create temporary resource for this bnode
                r = Resource(rdf_graph, bnode)
                #get the object for this bnode with predicate Constants.PROV['hadRole']
                for r_obj in r.objects(predicate=Constants.PROV['hadRole']):
                    #create qualified names for objects
                    obj_nm, obj_term = split_uri(r_obj._identifier)
                    for uris in namespaces:
                        if uris.uri == URIRef(obj_nm):
                            #create qualified association in graph
                            nidm_obj.add_qualified_association(
                                person=person,
                                role=pm.QualifiedName(uris, obj_term))

        else:
            if validators.url(objects):
                #create qualified names for objects
                obj_nm, obj_term = split_uri(objects)
                for uris in namespaces:
                    if uris.uri == URIRef(obj_nm):
                        #prefix = uris.prefix
                        nidm_obj.add_attributes(
                            {predicate: pm.QualifiedName(uris, obj_term)})
            else:

                nidm_obj.add_attributes(
                    {predicate: get_RDFliteral_type(objects)})
Ejemplo n.º 2
0
    def project_uris_by_title(self, user_graph, user_uri):
        projectsByTitle = defaultdict(list)
        bind_namespaces(user_graph)
        for row in user_graph.query("""
                SELECT DISTINCT ?project ?title WHERE {
                    ?user ore:aggregates ?project .
                    OPTIONAL {?project dc:title ?title .}
                }
            """,
                                    initNs=ns,
                                    initBindings={'user': URIRef(user_uri)}):
            project_uri = uris.uri('semantic_store_projects', uri=row[0])
            project_graph = Graph(store=rdfstore(), identifier=project_uri)

            project_resource = Resource(project_graph, URIRef(row[0]))
            titles = list(project_resource.objects(predicate=NS.dc['title']))

            if len(titles) == 0 and row[1]:
                # The project graph doesn't have a title triple, but the user graph does, so use that
                projectsByTitle[unicode(row[1])].append(row[0])
            else:
                # Use the project graph's title triples (preferred)
                for title in titles:
                    projectsByTitle[unicode(title)].append(row[0])

        return projectsByTitle
Ejemplo n.º 3
0
    def find_manifestation(self, cellarid, celexid):
        cellarurl = "http://publications.europa.eu/resource/cellar/%s?language=%s" % (cellarid, self.languages[0])
        graph = self.get_treenotice_graph(cellarurl, celexid)
        if graph is None:
            return None, None, None, None
        
        # find the root URI -- it might be on the form
        # "http://publications.europa.eu/resource/celex/%s", but can
        # also take other forms (at least for legislation)
        # At the same time, find all expressions of this work (ie language versions).
        CDM = Namespace("http://publications.europa.eu/ontology/cdm#")
        CMR = Namespace("http://publications.europa.eu/ontology/cdm/cmr#")
        root = None
        candidateexpressions = {}
        for expression, work in graph.subject_objects(CDM.expression_belongs_to_work):
            assert root is None or work == root
            root = work
            expression = Resource(graph, expression)
            lang = expression.value(CDM.expression_uses_language)
            lang = str(lang.identifier).rsplit("/", 1)[1].lower()
            if lang in self.config.languages:
                candidateexpressions[lang] = expression

        if not candidateexpressions:
            self.log.warning("%s: Found no suitable languages" % celexid)
            self.dump_graph(celexid, graph)
            return None, None, None, None

        for lang in self.config.languages:
            if lang in candidateexpressions:
                expression = candidateexpressions[lang]
                candidateitem = {}
                # we'd like to order the manifestations in some preference order -- fmx4 > xhtml > html > pdf
                for manifestation in expression.objects(CDM.expression_manifested_by_manifestation):
                    manifestationtype = str(manifestation.value(CDM.type))
                    # there might be multiple equivalent
                    # manifestations, eg
                    # ...celex/62001CJ0101.SWE.fmx4,
                    # ...ecli/ECLI%3AEU%3AC%3A2003%3A596.SWE.fmx4 and
                    # ...cellar/bcc476ae-43f8-4668-8404-09fad89c202a.0011.01. Try
                    # to find out if that is the case, and get the "root" manifestation
                    rootmanifestations = list(manifestation.subjects(OWL.sameAs))
                    if rootmanifestations:
                        manifestation = rootmanifestations[0]
                    items = list(manifestation.subjects(CDM.item_belongs_to_manifestation))
                    if len(items) == 1: 
                        candidateitem[manifestationtype] = items[0]
                    elif len(items) == 2:
                        # NOTE: for at least 32016L0680, there can be
                        # two items of the fmx4 manifestation, where
                        # one (DOC_1) is bad (eg only a reference to
                        # the pdf file) and the other (DOC_2) is
                        # good. The heuristic for choosing the good
                        # one: if the owl:sameAs property ends in .xml
                        # but not .doc.xml...
                        for item in items:
                            # this picks a random object if there are
                            # two or more owl:sameAs triples, but the
                            # heuristic seems to work with all
                            # owl:sameAs objects
                            sameas = str(item.value(OWL.sameAs).identifier)
                            if sameas.endswith(".xml") and not sameas.endswith(".doc.xml"):
                                candidateitem[manifestationtype] = item
                                break

                if candidateitem:
                    for t in ("fmx4", "xhtml", "html", "pdf", "pdfa1a"):
                        if t in candidateitem:
                            item = candidateitem[t]
                            mimetype = str(item.value(CMR.manifestationMimeType))
                            self.log.info("%s: Has manifestation %s (%s) in language %s" % (celexid, t,mimetype, lang))
                            # we might need this even outside of
                            # debugging (eg when downloading
                            # eurlexcaselaw, the main document lacks
                            # keywords, classifications, instruments
                            # cited etc.
                            self.dump_graph(celexid, graph) 
                            return lang, t, mimetype, str(item.identifier)
                else:
                    if candidateitem:
                        self.log.warning("%s: Language %s had no suitable manifestations" %
                                         (celexid, lang))
        self.log.warning("%s: No language (tried %s) had any suitable manifestations" % (celexid, ", ".join(candidateexpressions.keys())))
        self.dump_graph(celexid, graph)
        return None, None, None, None