コード例 #1
0
ファイル: parse-bench.py プロジェクト: zigit/ferenda
    def __init__(self, alias):
        # setup
        self.alias = alias
        parsetype = alias.split("/")[1]
        self.parser = LegalRef({
            'SFS': LegalRef.LAGRUM,
            'Short': LegalRef.KORTLAGRUM,
            'DV': LegalRef.RATTSFALL,
            'Regpubl': LegalRef.FORARBETEN,
            'EGLag': LegalRef.EULAGSTIFTNING,
            'ECJ': LegalRef.EURATTSFALL
        }[parsetype])

        # this particular test method is set up to use lagen.nu style
        # URIs because the canonical URIs are significantly different.
        dirname = os.path.dirname(__file__)
        basedir = dirname + "/../"
        space = basedir + "lagen/nu/res/uri/swedishlegalsource.space.ttl"
        slugs = basedir + "lagen/nu/res/uri/swedishlegalsource.slugs.ttl"
        extra = [
            basedir + "lagen/nu/res/extra/swedishlegalsource.ttl",
            basedir + "lagen/nu/res/extra/sfs.ttl"
        ]
        cfg = Graph().parse(space, format="turtle").parse(slugs,
                                                          format="turtle")
        self.metadata = Graph()
        for ttl in extra:
            self.metadata.parse(ttl, format="turtle")
        COIN = Namespace("http://purl.org/court/def/2009/coin#")
        # select correct URI for the URISpace definition by
        # finding a single coin:URISpace object
        spaceuri = cfg.value(predicate=RDF.type, object=COIN.URISpace)
        self.minter = URIMinter(cfg, spaceuri)
コード例 #2
0
ファイル: wsgiapp.py プロジェクト: zigit/ferenda
 def __init__(self, repos, inifile=None, **kwargs):
     super(WSGIApp, self).__init__(repos, inifile, **kwargs)
     sfsrepo = [repo for repo in repos if repo.alias == "sfs"][0]
     self.parser = SwedishCitationParser(
         LegalRef(LegalRef.RATTSFALL, LegalRef.LAGRUM, LegalRef.KORTLAGRUM,
                  LegalRef.FORARBETEN, LegalRef.MYNDIGHETSBESLUT),
         sfsrepo.minter,
         sfsrepo.commondata,
         allow_relative=True)
     graph = Graph().parse(sfsrepo.resourceloader.filename("extra/sfs.ttl"),
                           format="turtle")
     self.lagforkortningar = [
         str(o) for s, o in graph.subject_objects(DCTERMS.alternate)
     ]
     self.paragraflag = []
     for s, o in graph.subject_objects(DCTERMS.alternate):
         basefile = sfsrepo.basefile_from_uri(str(s))
         distilledpath = sfsrepo.store.distilled_path(basefile)
         firstpara_uri = str(s) + "#P1"
         needle = '<rpubl:Paragraf rdf:about="%s">' % firstpara_uri
         if os.path.exists(distilledpath) and needle in util.readfile(
                 distilledpath):
             self.paragraflag.append(str(o).lower())
     self.lagnamn = [str(o) for s, o in graph.subject_objects(RDFS.label)]
     self.lagforkortningar_regex = "|".join(
         sorted(self.lagforkortningar, key=len, reverse=True))
コード例 #3
0
ファイル: mediawiki.py プロジェクト: zigit/ferenda
 def parser(self):
     p = LegalRef(LegalRef.LAGRUM, LegalRef.KORTLAGRUM, LegalRef.FORARBETEN,
                  LegalRef.RATTSFALL)
     # self.commondata need to include extra/sfs.ttl
     # somehow. This is probably not the best way.
     with self.resourceloader.open("extra/sfs.ttl") as fp:
         self.commondata.parse(data=fp.read(), format="turtle")
     # actually, to mint URIs for rattsfall we need the
     # skos:altLabel for the rpubl:Rattsfallspublikation -- so we
     # need everything
     with self.resourceloader.open("extra/swedishlegalsource.ttl") as fp:
         self.commondata.parse(data=fp.read(), format="turtle")
     return SwedishCitationParser(p,
                                  self.minter,
                                  self.commondata,
                                  allow_relative=True)
コード例 #4
0
ファイル: caselaw.py プロジェクト: zigit/ferenda
    def parse_document_from_soup(self, soup, doc):
        # Process text and create DOM
        self.parser = LegalRef(LegalRef.EGRATTSFALL)

        textdiv = soup.find("div", "texte")
        if textdiv:
            for node in textdiv.childGenerator():
                if node.string:
                    # Here we should start analyzing for things like
                    # "C-197/09". Note that the Eurlex data does not use
                    # the ordinary hyphen like above, but rather
                    # 'NON-BREAKING HYPHEN' (U+2011) - LegaRef will mangle
                    # this to an ordinary hyphen.
                    subnodes = self.parser.parse(
                        node.string, predicate="dcterms:references")
                    doc.body.append(Paragraph(subnodes))
        else:
            self.log.warning("%s: No fulltext available!" % celexnum)
            doc.body.append(Paragraph(["(No fulltext available)"]))
コード例 #5
0
ファイル: sfslegacy.py プロジェクト: zigit/ferenda
 def forarbete_parser(self):
     return SwedishCitationParser(LegalRef(LegalRef.FORARBETEN),
                                  self.minter, self.commondata)
コード例 #6
0
ファイル: sfslegacy.py プロジェクト: zigit/ferenda
 def lagrum_parser(self):
     return SwedishCitationParser(LegalRef(LegalRef.LAGRUM,
                                           LegalRef.EULAGSTIFTNING),
                                  self.minter,
                                  self.commondata,
                                  allow_relative=True)
コード例 #7
0
class LNMediaWiki(MediaWiki):
    namespaces = SwedishLegalSource.namespaces

    from ferenda.sources.legal.se.legalref import LegalRef

    p = LegalRef(LegalRef.LAGRUM, LegalRef.KORTLAGRUM, LegalRef.FORARBETEN,
                 LegalRef.RATTSFALL)

    keyword_class = LNKeyword

    lang = "sv"

    def __init__(self, config=None, **kwargs):
        super(LNMediaWiki, self).__init__(config, **kwargs)
        if self.config._parent and hasattr(self.config._parent, "sfs"):
            self.sfsrepo = SFS(self.config._parent.sfs)
        else:
            self.sfsrepo = SFS()

    def get_wikisettings(self):
        settings = LNSettings(lang=self.lang)
        # NOTE: The settings object (the make_url method) only needs
        # access to the canonical_uri method.
        settings.make_sfs_url = self.sfsrepo.canonical_uri
        settings.make_keyword_url = self.keywordrepo.canonical_uri
        return settings

    def get_wikisemantics(self, parser, settings):
        return LNSemantics(parser, settings)

    def canonical_uri(self, basefile):
        if basefile.startswith("SFS/") or basefile.startswith("SFS:"):
            # "SFS/1998:204" -> "1998:204"
            return self.sfsrepo.canonical_uri(basefile[4:])
        else:
            return super(LNMediaWiki, self).canonical_uri(basefile)

    def postprocess(self, doc, xhtmltree):
        # if SFS mode:
        # create a div for root content
        # find all headers, create div for everything there
        if doc.basefile.startswith("SFS/") or doc.basefile.startswith("SFS:"):
            self.postprocess_commentary(doc, xhtmltree)
            toplevel_property = False
        else:
            toplevel_property = True
        body = super(LNMediaWiki,
                     self).postprocess(doc,
                                       xhtmltree,
                                       toplevel_property=toplevel_property)
        citparser = SwedishCitationParser(self.p, self.config.url)
        citparser.parse_recursive(body, predicate=None)
        return body

    def postprocess_commentary(self, doc, xhtmltree):
        uri = doc.uri
        body = xhtmltree.getchildren()[0]
        newbody = etree.Element("body")

        curruri = uri
        currdiv = etree.SubElement(newbody, "div")
        currdiv.set("about", curruri)
        currdiv.set("property", "dcterms:description")
        currdiv.set("datatype", "rdf:XMLLiteral")
        containerdiv = etree.SubElement(currdiv, "div")
        for child in body.getchildren():
            if child.tag in ("h1", "h2", "h3", "h4", "h5", "h6"):
                # remove that <span> element that Semantics._h_el adds for us
                assert child[
                    0].tag == "span", "Header subelement was %s not span" % child[
                        0].tag
                child.text = child[0].text
                child.remove(child[0])
                if child.text:
                    if isinstance(child.text, bytes):
                        txt = child.text.decode("utf-8")
                    else:
                        txt = child.text
                    nodes = self.p.parse(txt, curruri)
                    curruri = nodes[0].uri
                # body.remove(child)
                newbody.append(child)
                currdiv = etree.SubElement(newbody, "div")
                currdiv.set("about", curruri)
                currdiv.set("property", "dcterms:description")
                currdiv.set("datatype", "rdf:XMLLiteral")
                # create a containerdiv under currdiv for reasons
                containerdiv = etree.SubElement(currdiv, "div")
            else:
                # body.remove(child)
                currdiv[0].append(child)
        xhtmltree.remove(body)
        xhtmltree.append(newbody)
コード例 #8
0
ファイル: integrationLegalRef.py プロジェクト: zigit/ferenda
 def parametric_test(self, datafile):
     p = LegalRef(LegalRef.LAGRUM)
     return self._test_parser(datafile, p)
コード例 #9
0
ファイル: integrationLegalRef.py プロジェクト: zigit/ferenda
 def parametric_test(self, datafile):
     p = LegalRef(LegalRef.MYNDIGHETSBESLUT)
     # p.verbose = True
     return self._test_parser(datafile, p)
コード例 #10
0
ファイル: integrationLegalRef.py プロジェクト: zigit/ferenda
 def parametric_test(self, datafile):
     p = LegalRef(LegalRef.EGRATTSFALL)
     return self._test_parser(datafile, p)
コード例 #11
0
ファイル: integrationLegalRef.py プロジェクト: zigit/ferenda
 def parametric_test(self, datafile):
     p = LegalRef(LegalRef.EULAGSTIFTNING)
     return self._test_parser(datafile, p)
コード例 #12
0
ファイル: integrationLegalRef.py プロジェクト: zigit/ferenda
 def parametric_test(self, datafile):
     p = LegalRef(LegalRef.FORARBETEN)
     return self._test_parser(datafile, p)