def add_metadata_for_subject(rdf_graph, subject_uri, namespaces, nidm_obj): """ Cycles through triples for a particular subject and adds them to the nidm_obj :param rdf_graph: RDF graph object :param subject_uri: URI of subject to query for additional metadata :param namespaces: Namespaces in NIDM document :param nidm_obj: NIDM object to add metadata :return: None """ #Cycle through remaining metadata and add attributes for predicate, objects in rdf_graph.predicate_objects(subject=subject_uri): #if find qualified association if predicate == URIRef(Constants.PROV['qualifiedAssociation']): #need to get associated prov:Agent uri, add person information to graph for agent in rdf_graph.objects( subject=subject_uri, predicate=Constants.PROV['wasAssociatedWith']): #add person to graph and also add all metadata person = nidm_obj.add_person(uuid=agent) #now add metadata for person add_metadata_for_subject(rdf_graph=rdf_graph, subject_uri=agent, namespaces=namespaces, nidm_obj=person) #get role information for bnode in rdf_graph.objects( subject=subject_uri, predicate=Constants.PROV['qualifiedAssociation']): #for bnode, query for object which is role? How? #term.BNode.__dict__() #create temporary resource for this bnode r = Resource(rdf_graph, bnode) #get the object for this bnode with predicate Constants.PROV['hadRole'] for r_obj in r.objects(predicate=Constants.PROV['hadRole']): #create qualified names for objects obj_nm, obj_term = split_uri(r_obj._identifier) for uris in namespaces: if uris.uri == URIRef(obj_nm): #create qualified association in graph nidm_obj.add_qualified_association( person=person, role=pm.QualifiedName(uris, obj_term)) else: if validators.url(objects): #create qualified names for objects obj_nm, obj_term = split_uri(objects) for uris in namespaces: if uris.uri == URIRef(obj_nm): #prefix = uris.prefix nidm_obj.add_attributes( {predicate: pm.QualifiedName(uris, obj_term)}) else: nidm_obj.add_attributes( {predicate: get_RDFliteral_type(objects)})
def project_uris_by_title(self, user_graph, user_uri): projectsByTitle = defaultdict(list) bind_namespaces(user_graph) for row in user_graph.query(""" SELECT DISTINCT ?project ?title WHERE { ?user ore:aggregates ?project . OPTIONAL {?project dc:title ?title .} } """, initNs=ns, initBindings={'user': URIRef(user_uri)}): project_uri = uris.uri('semantic_store_projects', uri=row[0]) project_graph = Graph(store=rdfstore(), identifier=project_uri) project_resource = Resource(project_graph, URIRef(row[0])) titles = list(project_resource.objects(predicate=NS.dc['title'])) if len(titles) == 0 and row[1]: # The project graph doesn't have a title triple, but the user graph does, so use that projectsByTitle[unicode(row[1])].append(row[0]) else: # Use the project graph's title triples (preferred) for title in titles: projectsByTitle[unicode(title)].append(row[0]) return projectsByTitle
def find_manifestation(self, cellarid, celexid): cellarurl = "http://publications.europa.eu/resource/cellar/%s?language=%s" % (cellarid, self.languages[0]) graph = self.get_treenotice_graph(cellarurl, celexid) if graph is None: return None, None, None, None # find the root URI -- it might be on the form # "http://publications.europa.eu/resource/celex/%s", but can # also take other forms (at least for legislation) # At the same time, find all expressions of this work (ie language versions). CDM = Namespace("http://publications.europa.eu/ontology/cdm#") CMR = Namespace("http://publications.europa.eu/ontology/cdm/cmr#") root = None candidateexpressions = {} for expression, work in graph.subject_objects(CDM.expression_belongs_to_work): assert root is None or work == root root = work expression = Resource(graph, expression) lang = expression.value(CDM.expression_uses_language) lang = str(lang.identifier).rsplit("/", 1)[1].lower() if lang in self.config.languages: candidateexpressions[lang] = expression if not candidateexpressions: self.log.warning("%s: Found no suitable languages" % celexid) self.dump_graph(celexid, graph) return None, None, None, None for lang in self.config.languages: if lang in candidateexpressions: expression = candidateexpressions[lang] candidateitem = {} # we'd like to order the manifestations in some preference order -- fmx4 > xhtml > html > pdf for manifestation in expression.objects(CDM.expression_manifested_by_manifestation): manifestationtype = str(manifestation.value(CDM.type)) # there might be multiple equivalent # manifestations, eg # ...celex/62001CJ0101.SWE.fmx4, # ...ecli/ECLI%3AEU%3AC%3A2003%3A596.SWE.fmx4 and # ...cellar/bcc476ae-43f8-4668-8404-09fad89c202a.0011.01. Try # to find out if that is the case, and get the "root" manifestation rootmanifestations = list(manifestation.subjects(OWL.sameAs)) if rootmanifestations: manifestation = rootmanifestations[0] items = list(manifestation.subjects(CDM.item_belongs_to_manifestation)) if len(items) == 1: candidateitem[manifestationtype] = items[0] elif len(items) == 2: # NOTE: for at least 32016L0680, there can be # two items of the fmx4 manifestation, where # one (DOC_1) is bad (eg only a reference to # the pdf file) and the other (DOC_2) is # good. The heuristic for choosing the good # one: if the owl:sameAs property ends in .xml # but not .doc.xml... for item in items: # this picks a random object if there are # two or more owl:sameAs triples, but the # heuristic seems to work with all # owl:sameAs objects sameas = str(item.value(OWL.sameAs).identifier) if sameas.endswith(".xml") and not sameas.endswith(".doc.xml"): candidateitem[manifestationtype] = item break if candidateitem: for t in ("fmx4", "xhtml", "html", "pdf", "pdfa1a"): if t in candidateitem: item = candidateitem[t] mimetype = str(item.value(CMR.manifestationMimeType)) self.log.info("%s: Has manifestation %s (%s) in language %s" % (celexid, t,mimetype, lang)) # we might need this even outside of # debugging (eg when downloading # eurlexcaselaw, the main document lacks # keywords, classifications, instruments # cited etc. self.dump_graph(celexid, graph) return lang, t, mimetype, str(item.identifier) else: if candidateitem: self.log.warning("%s: Language %s had no suitable manifestations" % (celexid, lang)) self.log.warning("%s: No language (tried %s) had any suitable manifestations" % (celexid, ", ".join(candidateexpressions.keys()))) self.dump_graph(celexid, graph) return None, None, None, None