Ejemplo n.º 1
0
    def eval_get_goldstandard(self, basefile):
        goldstandard = Graph()
        goldstandard_rdf = util.relpath(
            os.path.dirname(__file__) + "/../res/eut/goldstandard.n3")
        goldstandard.load(goldstandard_rdf, format="n3")

        pred = util.ns['ir'] + 'isRelevantFor'
        res = {}
        store = TripleStore(self.config.storetype,
                            self.config.storelocation,
                            self.config.storerepository)
        sq_templ = """PREFIX eurlex:<http://lagen.nu/eurlex#>
                      SELECT ?party ?casenum ?celexnum WHERE {
                          <%s> eurlex:party ?party ;
                               eurlex:casenum ?casenum ;
                               eurlex:celexnum ?celexnum .
                      }"""

        self.log.debug(
            "Loading gold standard relevance judgments for %s" % basefile)
        for article in self._articles(basefile):
            res[article] = []
            for o in goldstandard.objects(URIRef(article), URIRef(pred)):
                res[article].append(str(o))
                # Make sure the case exists and is the case we're looking for
                sq = sq_templ % str(o)
                parties = store.select(sq, format="python")
                if parties:
                    pass
                    # self.log.debug("   %s: %s (%s)" %
                    #               (parties[0]['celexnum'],
                    #                parties[0]['casenum'],
                    #                " v ".join([x['party'] for x in parties])))
                else:
                    self.log.warning("Can't find %s in triple store!" % o)
            self.log.debug("    Gold standard for %s: %s relevant docs" %
                           (article, len(res[article])))
            res[article].sort()
        return res
Ejemplo n.º 2
0
    def eval_get_goldstandard(self, basefile):
        goldstandard = Graph()
        goldstandard_rdf = util.relpath(
            os.path.dirname(__file__) + "/../res/eut/goldstandard.n3")
        goldstandard.load(goldstandard_rdf, format="n3")

        pred = util.ns['ir'] + 'isRelevantFor'
        res = {}
        store = TripleStore(self.config.storetype, self.config.storelocation,
                            self.config.storerepository)
        sq_templ = """PREFIX eurlex:<http://lagen.nu/eurlex#>
                      SELECT ?party ?casenum ?celexnum WHERE {
                          <%s> eurlex:party ?party ;
                               eurlex:casenum ?casenum ;
                               eurlex:celexnum ?celexnum .
                      }"""

        self.log.debug("Loading gold standard relevance judgments for %s" %
                       basefile)
        for article in self._articles(basefile):
            res[article] = []
            for o in goldstandard.objects(URIRef(article), URIRef(pred)):
                res[article].append(str(o))
                # Make sure the case exists and is the case we're looking for
                sq = sq_templ % str(o)
                parties = store.select(sq, format="python")
                if parties:
                    pass
                    # self.log.debug("   %s: %s (%s)" %
                    #               (parties[0]['celexnum'],
                    #                parties[0]['casenum'],
                    #                " v ".join([x['party'] for x in parties])))
                else:
                    self.log.warning("Can't find %s in triple store!" % o)
            self.log.debug("    Gold standard for %s: %s relevant docs" %
                           (article, len(res[article])))
            res[article].sort()
        return res
Ejemplo n.º 3
0
 def _sameas(self):
     sameas = Graph()
     sameas_rdf = util.relpath(
         os.path.dirname(__file__) + "/../res/eut/sameas.n3")
     sameas.load(sameas_rdf, format="n3")
     return sameas
Ejemplo n.º 4
0
    def construct_annotations(self, uri):
        start = time()
        keyword = basefile.split("/", 1)[1]
        # note: infile is e.g. parsed/K/Konsument.xht2, but outfile is generated/Konsument.html
        infile = util.relpath(self._xmlFileName(basefile))
        outfile = util.relpath(self._htmlFileName(keyword))

        # Use SPARQL queries to create a rdf graph (to be used by the
        # xslt transform) containing enough information about all
        # cases using this term, as well as the wiki authored
        # dct:description for this term.

        # For proper SPARQL escaping, we need to change å to \u00E5
        # etc (there probably is a neater way of doing this).
        esckeyword = ''
        for c in keyword:
            if ord(c) > 127:
                esckeyword += '\\u%04X' % ord(c)
            else:
                esckeyword += c

        escuri = keyword_to_uri(esckeyword)

        sq = """
PREFIX dct:<http://purl.org/dc/terms/>
PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
PREFIX rinfo:<http://rinfo.lagrummet.se/taxo/2007/09/rinfo/pub#>

SELECT ?desc
WHERE { ?uri dct:description ?desc . ?uri rdfs:label "%s"@sv }
""" % esckeyword
        wikidesc = self._store_select(sq)
        log.debug('%s: Selected %s descriptions (%.3f sec)',
                  basefile, len(wikidesc), time() - start)

        sq = """
PREFIX dct:<http://purl.org/dc/terms/>
PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
PREFIX rinfo:<http://rinfo.lagrummet.se/taxo/2007/09/rinfo/pub#>

SELECT DISTINCT ?uri ?label
WHERE {
    GRAPH <urn:x-local:sfs> {
       { ?uri dct:subject <%s> .
         ?baseuri dct:title ?label .
         ?uri dct:isPartOf ?x . ?x dct:isPartOf ?baseuri
       }
       UNION {
         ?uri dct:subject <%s> .
         ?baseuri dct:title ?label .
         ?uri dct:isPartOf ?x . ?x dct:isPartOf ?y . ?y dct:isPartOf ?baseuri
       }
       UNION {
         ?uri dct:subject <%s> .
         ?baseuri dct:title ?label .
         ?uri dct:isPartOf ?x . ?x dct:isPartOf ?y . ?x dct:isPartOf ?z . ?z dct:isPartOf ?baseuri
       }
       UNION {
         ?uri dct:subject <%s> .
         ?baseuri dct:title ?label .
         ?uri dct:isPartOf ?x . ?x dct:isPartOf ?y . ?x dct:isPartOf ?z . ?z dct:isPartOf ?w . ?w dct:isPartOf ?baseuri
       }
    }
}

""" % (escuri, escuri, escuri, escuri)
        # print sq
        legaldefinitioner = self._store_select(sq)
        log.debug('%s: Selected %d legal definitions (%.3f sec)',
                  basefile, len(legaldefinitioner), time() - start)

        sq = """
PREFIX dct:<http://purl.org/dc/terms/>
PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
PREFIX rinfo:<http://rinfo.lagrummet.se/taxo/2007/09/rinfo/pub#>
PREFIX rinfoex:<http://lagen.nu/terms#>

SELECT ?uri ?id ?desc
WHERE {
    {
        GRAPH <urn:x-local:dv> {
            {
                ?uri dct:description ?desc .
                ?uri dct:identifier ?id .
                ?uri dct:subject <%s>
            }
            UNION {
                ?uri dct:description ?desc .
                ?uri dct:identifier ?id .
                ?uri dct:subject "%s"@sv
            }
        }
    } UNION {
        GRAPH <urn:x-local:arn> {
                ?uri dct:description ?desc .
                ?uri rinfoex:arendenummer ?id .
                ?uri dct:subject "%s"@sv
        }
    }
}
""" % (escuri, esckeyword, esckeyword)

        # Maybe we should handle <urn:x-local:arn> triples here as well?

        rattsfall = self._store_select(sq)
        log.debug('%s: Selected %d legal cases (%.3f sec)',
                  basefile, len(rattsfall), time() - start)

        root_node = etree.Element("rdf:RDF")
        for prefix in util.ns:
            etree._namespace_map[util.ns[prefix]] = prefix
            root_node.set("xmlns:" + prefix, util.ns[prefix])

        main_node = etree.SubElement(root_node, "rdf:Description")
        main_node.set("rdf:about", keyword_to_uri(keyword))

        for d in wikidesc:
            desc_node = etree.SubElement(main_node, "dct:description")
            xhtmlstr = "<xht2:div xmlns:xht2='%s'>%s</xht2:div>" % (
                util.ns['xht2'], d['desc'])
            xhtmlstr = xhtmlstr.replace(
                ' xmlns="http://www.w3.org/2002/06/xhtml2/"', '')
            desc_node.append(etree.fromstring(xhtmlstr.encode('utf-8')))

        for r in rattsfall:
            subject_node = etree.SubElement(main_node, "dct:subject")
            rattsfall_node = etree.SubElement(subject_node, "rdf:Description")
            rattsfall_node.set("rdf:about", r['uri'])
            id_node = etree.SubElement(rattsfall_node, "dct:identifier")
            id_node.text = r['id']
            desc_node = etree.SubElement(rattsfall_node, "dct:description")
            desc_node.text = r['desc']

        for l in legaldefinitioner:
            subject_node = etree.SubElement(main_node, "rinfoex:isDefinedBy")
            rattsfall_node = etree.SubElement(subject_node, "rdf:Description")
            rattsfall_node.set("rdf:about", l['uri'])
            id_node = etree.SubElement(rattsfall_node, "rdfs:label")
            # id_node.text = "%s %s" % (l['uri'].split("#")[1], l['label'])
            id_node.text = self.sfsmgr.display_title(l['uri'])

        # FIXME: construct graph
        return graph
Ejemplo n.º 5
0
 def _sameas(self):
     sameas = Graph()
     sameas_rdf = util.relpath(
         os.path.dirname(__file__) + "/../res/eut/sameas.n3")
     sameas.load(sameas_rdf, format="n3")
     return sameas