def eval_get_goldstandard(self, basefile): goldstandard = Graph() goldstandard_rdf = util.relpath( os.path.dirname(__file__) + "/../res/eut/goldstandard.n3") goldstandard.load(goldstandard_rdf, format="n3") pred = util.ns['ir'] + 'isRelevantFor' res = {} store = TripleStore(self.config.storetype, self.config.storelocation, self.config.storerepository) sq_templ = """PREFIX eurlex:<http://lagen.nu/eurlex#> SELECT ?party ?casenum ?celexnum WHERE { <%s> eurlex:party ?party ; eurlex:casenum ?casenum ; eurlex:celexnum ?celexnum . }""" self.log.debug( "Loading gold standard relevance judgments for %s" % basefile) for article in self._articles(basefile): res[article] = [] for o in goldstandard.objects(URIRef(article), URIRef(pred)): res[article].append(str(o)) # Make sure the case exists and is the case we're looking for sq = sq_templ % str(o) parties = store.select(sq, format="python") if parties: pass # self.log.debug(" %s: %s (%s)" % # (parties[0]['celexnum'], # parties[0]['casenum'], # " v ".join([x['party'] for x in parties]))) else: self.log.warning("Can't find %s in triple store!" % o) self.log.debug(" Gold standard for %s: %s relevant docs" % (article, len(res[article]))) res[article].sort() return res
def eval_get_goldstandard(self, basefile): goldstandard = Graph() goldstandard_rdf = util.relpath( os.path.dirname(__file__) + "/../res/eut/goldstandard.n3") goldstandard.load(goldstandard_rdf, format="n3") pred = util.ns['ir'] + 'isRelevantFor' res = {} store = TripleStore(self.config.storetype, self.config.storelocation, self.config.storerepository) sq_templ = """PREFIX eurlex:<http://lagen.nu/eurlex#> SELECT ?party ?casenum ?celexnum WHERE { <%s> eurlex:party ?party ; eurlex:casenum ?casenum ; eurlex:celexnum ?celexnum . }""" self.log.debug("Loading gold standard relevance judgments for %s" % basefile) for article in self._articles(basefile): res[article] = [] for o in goldstandard.objects(URIRef(article), URIRef(pred)): res[article].append(str(o)) # Make sure the case exists and is the case we're looking for sq = sq_templ % str(o) parties = store.select(sq, format="python") if parties: pass # self.log.debug(" %s: %s (%s)" % # (parties[0]['celexnum'], # parties[0]['casenum'], # " v ".join([x['party'] for x in parties]))) else: self.log.warning("Can't find %s in triple store!" % o) self.log.debug(" Gold standard for %s: %s relevant docs" % (article, len(res[article]))) res[article].sort() return res
def _sameas(self): sameas = Graph() sameas_rdf = util.relpath( os.path.dirname(__file__) + "/../res/eut/sameas.n3") sameas.load(sameas_rdf, format="n3") return sameas
def construct_annotations(self, uri): start = time() keyword = basefile.split("/", 1)[1] # note: infile is e.g. parsed/K/Konsument.xht2, but outfile is generated/Konsument.html infile = util.relpath(self._xmlFileName(basefile)) outfile = util.relpath(self._htmlFileName(keyword)) # Use SPARQL queries to create a rdf graph (to be used by the # xslt transform) containing enough information about all # cases using this term, as well as the wiki authored # dct:description for this term. # For proper SPARQL escaping, we need to change å to \u00E5 # etc (there probably is a neater way of doing this). esckeyword = '' for c in keyword: if ord(c) > 127: esckeyword += '\\u%04X' % ord(c) else: esckeyword += c escuri = keyword_to_uri(esckeyword) sq = """ PREFIX dct:<http://purl.org/dc/terms/> PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> PREFIX rinfo:<http://rinfo.lagrummet.se/taxo/2007/09/rinfo/pub#> SELECT ?desc WHERE { ?uri dct:description ?desc . ?uri rdfs:label "%s"@sv } """ % esckeyword wikidesc = self._store_select(sq) log.debug('%s: Selected %s descriptions (%.3f sec)', basefile, len(wikidesc), time() - start) sq = """ PREFIX dct:<http://purl.org/dc/terms/> PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> PREFIX rinfo:<http://rinfo.lagrummet.se/taxo/2007/09/rinfo/pub#> SELECT DISTINCT ?uri ?label WHERE { GRAPH <urn:x-local:sfs> { { ?uri dct:subject <%s> . ?baseuri dct:title ?label . ?uri dct:isPartOf ?x . ?x dct:isPartOf ?baseuri } UNION { ?uri dct:subject <%s> . ?baseuri dct:title ?label . ?uri dct:isPartOf ?x . ?x dct:isPartOf ?y . ?y dct:isPartOf ?baseuri } UNION { ?uri dct:subject <%s> . ?baseuri dct:title ?label . ?uri dct:isPartOf ?x . ?x dct:isPartOf ?y . ?x dct:isPartOf ?z . ?z dct:isPartOf ?baseuri } UNION { ?uri dct:subject <%s> . ?baseuri dct:title ?label . ?uri dct:isPartOf ?x . ?x dct:isPartOf ?y . ?x dct:isPartOf ?z . ?z dct:isPartOf ?w . ?w dct:isPartOf ?baseuri } } } """ % (escuri, escuri, escuri, escuri) # print sq legaldefinitioner = self._store_select(sq) log.debug('%s: Selected %d legal definitions (%.3f sec)', basefile, len(legaldefinitioner), time() - start) sq = """ PREFIX dct:<http://purl.org/dc/terms/> PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> PREFIX rinfo:<http://rinfo.lagrummet.se/taxo/2007/09/rinfo/pub#> PREFIX rinfoex:<http://lagen.nu/terms#> SELECT ?uri ?id ?desc WHERE { { GRAPH <urn:x-local:dv> { { ?uri dct:description ?desc . ?uri dct:identifier ?id . ?uri dct:subject <%s> } UNION { ?uri dct:description ?desc . ?uri dct:identifier ?id . ?uri dct:subject "%s"@sv } } } UNION { GRAPH <urn:x-local:arn> { ?uri dct:description ?desc . ?uri rinfoex:arendenummer ?id . ?uri dct:subject "%s"@sv } } } """ % (escuri, esckeyword, esckeyword) # Maybe we should handle <urn:x-local:arn> triples here as well? rattsfall = self._store_select(sq) log.debug('%s: Selected %d legal cases (%.3f sec)', basefile, len(rattsfall), time() - start) root_node = etree.Element("rdf:RDF") for prefix in util.ns: etree._namespace_map[util.ns[prefix]] = prefix root_node.set("xmlns:" + prefix, util.ns[prefix]) main_node = etree.SubElement(root_node, "rdf:Description") main_node.set("rdf:about", keyword_to_uri(keyword)) for d in wikidesc: desc_node = etree.SubElement(main_node, "dct:description") xhtmlstr = "<xht2:div xmlns:xht2='%s'>%s</xht2:div>" % ( util.ns['xht2'], d['desc']) xhtmlstr = xhtmlstr.replace( ' xmlns="http://www.w3.org/2002/06/xhtml2/"', '') desc_node.append(etree.fromstring(xhtmlstr.encode('utf-8'))) for r in rattsfall: subject_node = etree.SubElement(main_node, "dct:subject") rattsfall_node = etree.SubElement(subject_node, "rdf:Description") rattsfall_node.set("rdf:about", r['uri']) id_node = etree.SubElement(rattsfall_node, "dct:identifier") id_node.text = r['id'] desc_node = etree.SubElement(rattsfall_node, "dct:description") desc_node.text = r['desc'] for l in legaldefinitioner: subject_node = etree.SubElement(main_node, "rinfoex:isDefinedBy") rattsfall_node = etree.SubElement(subject_node, "rdf:Description") rattsfall_node.set("rdf:about", l['uri']) id_node = etree.SubElement(rattsfall_node, "rdfs:label") # id_node.text = "%s %s" % (l['uri'].split("#")[1], l['label']) id_node.text = self.sfsmgr.display_title(l['uri']) # FIXME: construct graph return graph