Example #1
0
# -*- coding: utf-8 -*-
from __future__ import (absolute_import, division, print_function,
                        unicode_literals)
from builtins import *

import sys
import os

from ferenda.compat import unittest
from ferenda.testutil import FerendaTestCase, testparser, file_parametrize
from ferenda.sources.tech import RFC


# FIXME: This test should be re-worked as a normal RepoTester test
class Parse(unittest.TestCase, FerendaTestCase):
    def parametric_test(self, filename):
        # parser = ferenda.sources.tech.rfc.RFC.get_parser()
        parser = RFC.get_parser()
        testparser(self, parser, filename)


file_parametrize(Parse, "test/files/rfc", ".txt")
Example #2
0
            if result != elements.serialize(b).strip():
                # re-run the parse but with debugging on
                print("============DEBUG OUTPUT================")
                p.debug = True
                tr=TextReader(filename,encoding="utf-8",linesep=TextReader.UNIX)
                b = p.parse(tr.getiterator(tr.readparagraph))
                print("===============RESULT===================")
                print(elements.serialize(b))
                self.fail("========See output above=======")
            else:
                self.assertEqual(result, elements.serialize(b).strip())
        else:
            print("\nResult:\n"+elements.serialize(b))
            self.fail()

    def test_no_recognizer(self):
        with self.assertRaises(FSMStateError):
            self.run_test_file("test/files/fsmparser/no-recognizer.tx")

    def test_no_transition(self):
        with self.assertRaises(FSMStateError):
            self.run_test_file("test/files/fsmparser/no-transition.tx")

    def test_debug(self):
        builtins = "__builtin__" if six.PY2 else "builtins"
        with patch(builtins+".print") as printmock:
            self.run_test_file("test/files/fsmparser/basic.txt", debug=True)
            self.assertTrue(printmock.called)

file_parametrize(Parse,"test/files/fsmparser",".txt")
Example #3
0
        
        cp = CitationParser(self.parser)
        nodes = cp.parse_string(testdata)
        got = []
        for node in nodes:
            if isinstance(node, str):
                got.append(node.strip())
            else:
                (text, result) = node
                got.append(util.parseresults_as_xml(result).strip())
        
        wantfile = os.path.splitext(filename)[0] + ".result"
        if os.path.exists(wantfile):
            with open(wantfile) as fp:
                want = [x.strip() for x in fp.read().split("\n\n")]
        else:
            print("\nparse_string() returns:")
            print("\n\n".join(compare))
            self.fail("%s not found" % wantfile)
        self.maxDiff = 4096
        self.assertListEqual(want,got)

class URL(ParametricBase):
    parser = ferenda.citationpatterns.url

class EULaw(ParametricBase):
    parser = ferenda.citationpatterns.eulaw

file_parametrize(URL, "test/files/citation/url", ".txt")
# file_parametrize(URL, "test/files/citation/eulaw", ".txt")
Example #4
0
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import sys, os

from ferenda.compat import unittest

from ferenda.manager import setup_logger

setup_logger("CRITICAL")

from ferenda.testutil import FerendaTestCase, testparser, file_parametrize
from ferenda.sources.tech import RFC

# FIXME: This test should be re-worked as a normal RepoTester test
class Parse(unittest.TestCase, FerendaTestCase):
    def parametric_test(self, filename):
        # parser = ferenda.sources.tech.rfc.RFC.get_parser()
        parser = RFC.get_parser()
        testparser(self, parser, filename)


file_parametrize(Parse, "test/files/rfc", ".txt")
Example #5
0
        got = []
        for node in nodes:
            if isinstance(node, str):
                got.append(node.strip())
            else:
                (text, result) = node
                got.append(util.parseresults_as_xml(result).strip())

        wantfile = os.path.splitext(filename)[0] + ".result"
        if os.path.exists(wantfile):
            with open(wantfile) as fp:
                want = [x.strip() for x in fp.read().split("\n\n")]
        else:
            print("\nparse_string() returns:")
            print("\n\n".join(compare))
            self.fail("%s not found" % wantfile)
        self.maxDiff = 4096
        self.assertListEqual(want, got)


class URL(ParametricBase):
    parser = ferenda.citationpatterns.url


class EULaw(ParametricBase):
    parser = ferenda.citationpatterns.eulaw


file_parametrize(URL, "test/files/citation/url", ".txt")
# file_parametrize(URL, "test/files/citation/eulaw", ".txt")
@unittest.skipIf('SKIP_SIMPLEPARSE_TESTS' in os.environ,
                 "Skipping SimpleParser dependent tests")    
class EUCaselaw(TestLegalRef):
    def parametric_test(self,datafile):
        p = LegalRef(LegalRef.EGRATTSFALL)
        return self._test_parser(datafile, p)

# Some tests are not simply working right now. Since having testdata
# and wanted result in the same file makes it tricky to mark tests as
# expectedFailure, we'll just list them here.
def make_closure(brokentests):
    def broken(testname):
        return testname in brokentests
    return broken

file_parametrize(Lagrum,"test/files/legalref/SFS",".txt",
                 make_closure(['sfs-tricky-bokstavslista.txt',
                               'sfs-tricky-eller.txt',
                               'sfs-tricky-eller-paragrafer-stycke.txt',
                               'sfs-tricky-overgangsbestammelse.txt',
                               'sfs-tricky-uppdelat-lagnamn.txt',
                               'sfs-tricky-vvfs.txt']))
file_parametrize(KortLagrum, "test/files/legalref/Short",".txt")
file_parametrize(Forarbeten, "test/files/legalref/Regpubl",".txt")
file_parametrize(Rattsfall, "test/files/legalref/DV",".txt")
file_parametrize(EULaw, "test/files/legalref/EGLag",".txt")
file_parametrize(EUCaselaw, "test/files/legalref/ECJ",".txt",
                 make_closure(['civilservicetrib.txt',
                               'simple.txt']))
Example #7
0
            if not any([list(rg.predicates(subject, x)) for x in subjects]):
                rootnode = subject
                break
        coined_uri = coinstruct_from_graph(resourcegraph, rootnode, self.minter)
        self.assertEqual(uri, coined_uri)


def tests_from_atom(cls, atomfile, base):
    atom = lxml.etree.parse(atomfile).getroot()
    for entry in atom.findall("{http://www.w3.org/2005/Atom}entry"):
        uri = entry.find("{http://www.w3.org/2005/Atom}id").text
        content = entry.find("{http://www.w3.org/2005/Atom}content")
        content.tag = '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF'
        resource_graph = rdflib.Graph().parse(data=lxml.etree.tostring(content))
        name = "test_"+uri.replace(base, "").replace("/", "_").replace(":", "_")
        parametrize(cls, cls.coin_test, name, (uri, resource_graph))

class DefaultCoinstruct(Coinstruct): pass

class CustomCoinstruct(Coinstruct):
    atomfile = "test/files/legaluri/lagen.nu.atom"
    spacefile = "lagen/nu/res/uri/swedishlegalsource.space.ttl"
    slugsfile = "lagen/nu/res/uri/swedishlegalsource.slugs.ttl"
    

file_parametrize(Construct,"test/files/legaluri",".py")
tests_from_atom(CustomCoinstruct, CustomCoinstruct.atomfile, 
               "https://lagen.nu/")
tests_from_atom(DefaultCoinstruct, DefaultCoinstruct.atomfile,
                "http://rinfo.lagrummet.se/publ/")
Example #8
0
        # official rpubl URIs are minted.
        #
        # self.repo.config.localizeuri = True
        # self.repo.config.url = "http://example.org/"
        # self.repo.config.urlpath = ''
        # a few of the subclasses have specialized rules. make sure we
        # instantiate the correct class
        repo = os.path.basename(filename).split("-")[0]
        basefile = os.path.splitext(os.path.basename(filename))[0].replace(
            "-", "/", 1).replace("-", ":")
        repoclass = self.aliases[repo]
        self.repo = repoclass(
            datadir=self.datadir,
            storelocation=self.datadir + "/ferenda.sqlite",
            indexlocation=self.datadir + "/whoosh",
        )
        doc = self.repo.make_document(basefile)
        text = self.repo.sanitize_text(util.readfile(filename), basefile)
        reader = TextReader(string=text, encoding='utf-8')
        self.repo.parse_metadata_from_textreader(reader, doc)
        wantfile = filename.replace(".txt", ".n3")
        if os.path.exists(wantfile):
            self.assertEqualGraphs(wantfile, doc.meta, exact=False)
        else:
            self.fail(
                "Expected a %s with the following content:\n\n%s" %
                (wantfile, doc.meta.serialize(format="n3").decode("utf-8")))


file_parametrize(Parse, "test/files/myndfskr", ".txt")
Example #9
0
                         indexlocation=self.datadir + "/whoosh",)
        return repo, basefile

    def parametric_test(self, filename):
        # these options adjusts the constructed URIs. by default, the
        # official rpubl URIs are minted.
        # 
        # self.repo.config.localizeuri = True
        # self.repo.config.url = "http://example.org/"
        # self.repo.config.urlpath = ''
        # a few of the subclasses have specialized rules. make sure we
        # instantiate the correct class
        repo, basefile = self.parse_filename(filename)
        doc = repo.make_document(basefile)
        text = repo.sanitize_text(util.readfile(filename), basefile)
        reader = TextReader(string=text, encoding='utf-8')
        props = repo.extract_metadata(reader, basefile)
        props = repo.sanitize_metadata(props, basefile)
        resource = repo.polish_metadata(props)
        repo.infer_metadata(resource, basefile)
        
        wantfile = filename.replace(".txt", ".n3")
        if os.path.exists(wantfile):
            self.assertEqualGraphs(wantfile, resource.graph, exact=False)
        else:
            self.fail("Expected a %s with the following content:\n\n%s" %
                      (wantfile, doc.meta.serialize(format="n3").decode("utf-8")))


file_parametrize(Parse, "test/files/myndfskr", ".txt")
Example #10
0
        for subpart in part:
            if not isinstance(subpart, str):
                self._remove_uri_for_testcases(subpart)
            elif hasattr(subpart, 'uri') and not isinstance(subpart, LinkSubject):
                del subpart.uri
            
                
            
from ferenda.testutil import file_parametrize

# tests that are broken 
brokentests = ['definition-no-definition.txt',
               'definition-paranthesis-lista.txt',
               'definition-paranthesis-multiple.txt',
               'definition-strecksatslista-andrastycke.txt',
               'extra-overgangsbestammelse-med-rubriker.txt',
               'regression-10kap-ellagen.txt',
               'tricky-felformatterad-tabell.txt',
               'tricky-lang-rubrik.txt',
               'tricky-lista-inte-rubrik.txt',
               'tricky-lista-not-rubriker-2.txt',
               'tricky-lopande-rubriknumrering.txt',
               'tricky-okand-aldre-lag.txt',
               'tricky-paragraf-inledande-tomrad.txt',
               'tricky-tabell-overgangsbest.txt',
               'tricky-tabell-sju-kolumner.txt']

def broken(testname):
    return testname in brokentests
file_parametrize(Parse,"test/files/sfs/parse",".txt", broken)
Example #11
0
    
    def parametric_test(self,filename):
        with open(filename) as fp:
            testdata = fp.read()
        d = json.loads(testdata)
        
        d = FakeParseResult(d,name=self.get_formatter()[0])
        uf = URIFormatter(self.get_formatter())
        uri = uf.format(d)

        resultfile = os.path.splitext(filename)[0] + ".txt"
        if os.path.exists(resultfile):
            with open(resultfile) as fp:
                result = fp.read().strip()
        else:
            print("format() returns: %s" % uri)
            self.fail("%s not found" % resultfile)

        self.assertEqual(uri,result)

class URL(ParametricBase):
    def get_formatter(self):
        return ("url",ferenda.uriformats.url)

class EULaw(ParametricBase):
    def get_formatter(self):
        return ("eulaw",ferenda.uriformats.eulaw)

file_parametrize(URL,"test/files/uriformat/url", ".json")
# file_parametrize(EULaw,"test/files/uriformat/eulaw", ".json")
Example #12
0
        # p.verbose = True
        return self._test_parser(datafile, p)


# Some tests are not simply working right now. Since having testdata
# and wanted result in the same file makes it tricky to mark tests as
# expectedFailure, we'll just list them here.
def make_closure(brokentests):
    def broken(testname):
        return testname in brokentests

    return broken


file_parametrize(
    Lagrum, "test/files/legalref/SFS", ".txt",
    make_closure([
        'sfs-tricky-bokstavslista.txt', 'sfs-tricky-eller.txt',
        'sfs-tricky-eller-paragrafer-stycke.txt',
        'sfs-tricky-overgangsbestammelse.txt',
        'sfs-tricky-uppdelat-lagnamn.txt', 'sfs-tricky-vvfs.txt'
    ]))
file_parametrize(KortLagrum, "test/files/legalref/Short", ".txt")
file_parametrize(EnklaLagrum, "test/files/legalref/Simple", ".txt")
file_parametrize(Forarbeten, "test/files/legalref/Regpubl", ".txt")
file_parametrize(Rattsfall, "test/files/legalref/DV", ".txt")
file_parametrize(EULaw, "test/files/legalref/EGLag", ".txt")
file_parametrize(EUCaselaw, "test/files/legalref/ECJ", ".txt",
                 make_closure(['civilservicetrib.txt', 'simple.txt']))
file_parametrize(Avg, "test/files/legalref/Avg", ".txt")
Example #13
0
from ferenda.sources.legal.se.legaluri import construct,parse
from ferenda.testutil import file_parametrize

class Construct(unittest.TestCase):
    def parametric_test(self,filename):
        with open(filename) as fp:
            testdata = fp.read()
        with open(filename.replace(".py",".txt")) as fp:
            testanswer = fp.read().strip()
        
        # All test case writers are honorable, noble and thorough
        # persons, but just in case, let's make eval somewhat safer.
        # FIXME: use ast.literal_eval instead
        testdata = testdata.strip().replace("\r\n", " ")
        d = eval(testdata,{"__builtins__":None},globals())
        uri = construct(d)
        self.assertEqual(uri,testanswer)

class Parse(unittest.TestCase):
    def parametric_test(self,filename):
        with open(filename) as fp:
            uri = fp.read().strip()
        with open(filename.replace(".txt",".py")) as fp:
            parts_repr = " ".join(fp.read().split())
        parts = eval(parts_repr,{"__builtins__":None},globals())
        self.assertEqual(parse(uri),parts)

file_parametrize(Construct,"test/files/legaluri",".py")
file_parametrize(Parse,"test/files/legaluri",".txt")
Example #14
0
            if not isinstance(subpart, str):
                self._remove_uri_for_testcases(subpart)
            elif hasattr(subpart, 'uri') and not isinstance(subpart, LinkSubject):
                del subpart.uri

            
from ferenda.testutil import file_parametrize

# tests that are broken 
brokentests = ['definition-no-definition.txt',
               'definition-paranthesis-lista.txt',
               'definition-paranthesis-multiple.txt',
               'definition-strecksatslista-andrastycke.txt',
               'extra-overgangsbestammelse-med-rubriker.txt',
               'regression-10kap-ellagen.txt',
               'tricky-felformatterad-tabell.txt',
               'tricky-lang-rubrik.txt',
               'tricky-lista-inte-rubrik.txt',
               'tricky-lista-not-rubriker-2.txt',
               'tricky-lopande-rubriknumrering.txt',
               'tricky-okand-aldre-lag.txt',
               'tricky-paragraf-inledande-tomrad.txt',
               'tricky-tabell-overgangsbest.txt',
               'tricky-tabell-sju-kolumner.txt']


def broken(testname):
    return testname in brokentests
    
file_parametrize(Parse, "test/files/sfs/parse", ".txt", broken)
Example #15
0
                # re-run the parse but with debugging on
                print("============DEBUG OUTPUT================")
                p.debug = True
                tr = TextReader(filename,
                                encoding="utf-8",
                                linesep=TextReader.UNIX)
                b = p.parse(tr.getiterator(tr.readparagraph))
                print("===============RESULT===================")
                print(elements.serialize(b))
                self.fail("========See output above=======")
            else:
                self.assertEqual(result, elements.serialize(b).strip())
        else:
            print("\nResult:\n" + elements.serialize(b))
            self.fail()

    def test_no_recognizer(self):
        with self.assertRaises(FSMStateError):
            self.run_test_file("test/files/fsmparser/no-recognizer.tx")

    def test_no_transition(self):
        with self.assertRaises(FSMStateError):
            self.run_test_file("test/files/fsmparser/no-transition.tx")

    def test_debug(self):
        with patch("builtins.print") as printmock:
            self.run_test_file("test/files/fsmparser/basic.txt", debug=True)
            self.assertTrue(printmock.called)

file_parametrize(Parse, "test/files/fsmparser", ".txt")