Exemple #1
0
 def test_more(self):
     from ferenda import DocumentRepository
     d = DocumentRepository()
     rows = [{
         'uri': 'http://ex.org/1',
         'dcterms_title': 'Abc',
         'dcterms_issued': '2009-04-02'
     }, {
         'uri': 'http://ex.org/2',
         'dcterms_title': 'Abcd',
         'dcterms_issued': '2010-06-30'
     }, {
         'uri': 'http://ex.org/3',
         'dcterms_title': 'Dfg',
         'dcterms_issued': '2010-08-01'
     }]
     from rdflib.namespace import DCTERMS
     facets = [Facet(DCTERMS.title), Facet(DCTERMS.issued)]
     pagesets = d.toc_pagesets(rows, facets)
     expected = {
         ('dcterms_title', 'a'): [[Link('Abc', uri='http://ex.org/1')],
                                  [Link('Abcd', uri='http://ex.org/2')]],
         ('dcterms_title', 'd'): [[Link('Dfg', uri='http://ex.org/3')]],
         ('dcterms_issued', '2009'): [[Link('Abc', uri='http://ex.org/1')]],
         ('dcterms_issued', '2010'): [[Link('Abcd', uri='http://ex.org/2')],
                                      [Link('Dfg', uri='http://ex.org/3')]]
     }
     got = d.toc_select_for_pages(rows, pagesets, facets)
     self.assertEqual(expected, got)
Exemple #2
0
 def remove_rootsegments(item, rootsegments):
     linktext = str(item[0])
     prefix = "»".join(rootsegments)
     assert linktext.startswith(
         prefix), "Tried to remove prefix %s from %s" % (prefix,
                                                         linktext)
     prefixlen = len(prefix)
     if prefixlen:
         prefixlen += 1  # removes extra "»", but not if rootsegments is empty
     if linktext[prefixlen:]:
         return [Link(linktext[prefixlen:], uri=item[0].uri)]
     else:
         return []
Exemple #3
0
    def toc_generate_page_body(self, documentlist, nav):
        # move documentlist into a ordereddict keyed on url,
        # concatenating rpubl_konsolideringsunderlag as we go
        documents = OrderedDict()
        # make sure all rpubl:KonsolideradGrundforfattning comes first in the list
        for row in documentlist:
            row = dict(row)
            if row['rdf_type'] == str(RPUBL.KonsolideradGrundforfattning):
                if row['uri'] not in documents:
                    documents[row['uri']] = row
                    # transform single value to a list, so we can
                    # append more if other rows are about the same
                    # rpubl:KonsolideradGrundforfattning
                    row['rpubl_konsolideringsunderlag'] = [
                        row['rpubl_konsolideringsunderlag']
                    ]
                else:
                    documents[
                        row['uri']]['rpubl_konsolideringsunderlag'].append(
                            row['rpubl_konsolideringsunderlag'])
        # then the rest
        for row in documentlist:
            if row['rdf_type'] != str(RPUBL.KonsolideradGrundforfattning):
                documents[row['uri']] = row

        # now that we have all documents, check if some of them change
        # some other of them
        for uri in list(documents):
            row = documents[uri]
            if 'rpubl_andrar' in row:
                if row['rpubl_andrar'] not in documents:
                    self.log.warning(
                        "%(uri)s: changes %(rpubl_andrar)s, but that doc doesn't exist"
                        % row)
                    continue
                if 'andras_av' not in documents[row['rpubl_andrar']]:
                    documents[row['rpubl_andrar']]['andras_av'] = []
                documents[row['rpubl_andrar']]['andras_av'].insert(0, uri)
                documents.move_to_end(uri)

        dl = html.DL(role='main')
        for uri in list(documents):
            if uri not in documents:
                continue  # we must have removed it earlier in the loop
            row = documents[uri]
            label = row.get('dcterms_title',
                            row.get('dcterms_identifier', '(Titel saknas)'))
            if row['dcterms_identifier'] not in label:
                label = "%s: %s" % (row['dcterms_identifier'], label)
            # in most cases we want to link this thing, but not if
            # this is the base act of a non-consolidated act (we link
            # to it in the DD element below instead)
            if (row['rdf_type'] == str(RPUBL.KonsolideradGrundforfattning)
                    or 'andras_av' not in row):
                label = Link(label, uri=uri)
            dl.append(html.DT([label]))
            # groups of base+change acts may be present wether we have
            # consolidated acts or not, and they might be grouped a
            # little differently, but we need to do the same things
            # with them.
            relevant_docs = []
            if row['rdf_type'] == str(RPUBL.KonsolideradGrundforfattning):
                relevant_docs = row['rpubl_konsolideringsunderlag']
            elif 'andras_av' in row:
                relevant_docs = [uri] + row['andras_av']
            if relevant_docs:
                fs = []
                for f in relevant_docs:
                    if f in documents:
                        fs.append(
                            Link(documents[f]['dcterms_identifier'],
                                 uri=documents[f]['uri']))
                        fs.append(", ")
                        del documents[f]
                if fs:
                    dl.append(
                        html.DD(
                            ["Grund- och ändringsförfattningar: ", *fs[:-1]]))
        return Body([nav, dl])
Exemple #4
0
 def toc_item(self, binding, row):
     # note: look at binding to determine which pageset is being
     # constructed in case you want to present documents in
     # different ways depending on that.
     from ferenda.elements import Link
     return [row['identifier'] + ": ", Link(row['title'], uri=row['uri'])]
Exemple #5
0
class TOC(RepoTester):
    results1 = json.load(open("test/files/datasets/results1.json"))
    results2 = json.load(open("test/files/datasets/results2.json"))
    results2data = Graph().parse(open("test/files/datasets/results2data.ttl"),
                                 format="turtle")
    pagesets = [
        TocPageset('Sorted by title', [
            TocPage('a', 'Documents starting with "a"', 'dcterms_title', 'a'),
            TocPage('d', 'Documents starting with "d"', 'dcterms_title', 'd'),
            TocPage('h', 'Documents starting with "h"', 'dcterms_title', 'h'),
            TocPage('l', 'Documents starting with "l"', 'dcterms_title', 'l')
        ], DCTERMS.title),
        TocPageset('Sorted by publication year', [
            TocPage('1791', 'Documents published in 1791', 'dcterms_issued',
                    '1791'),
            TocPage('1859', 'Documents published in 1859', 'dcterms_issued',
                    '1859'),
            TocPage('1937', 'Documents published in 1937', 'dcterms_issued',
                    '1937'),
            TocPage('1939', 'Documents published in 1939', 'dcterms_issued',
                    '1939'),
            TocPage('1943', 'Documents published in 1943', 'dcterms_issued',
                    '1943'),
            TocPage('1954', 'Documents published in 1954', 'dcterms_issued',
                    '1954')
        ], DCTERMS.issued)
    ]

    pagesets2 = [
        TocPageset('Sorted by publisher', [
            TocPage('Analytical Biochemistry',
                    'Documents published in Analytical Biochemistry',
                    'dcterms_publisher', 'analytical'),
            TocPage('Journal of Biological Chemistry',
                    'Documents published in Journal of Biological Chemistry',
                    'dcterms_publisher', 'biochem'),
            TocPage('Nature', 'Documents published in Nature',
                    'dcterms_publisher', 'nature'),
        ], DCTERMS.publisher)
    ]

    documentlists = {
        ('dcterms_issued', '1791'): [[
            Link("Dream of the Red Chamber",
                 uri='http://example.org/books/Dream_of_the_Red_Chamber')
        ]],
        ('dcterms_issued', '1859'): [[
            Link("A Tale of Two Cities",
                 uri='http://example.org/books/A_Tale_of_Two_Cities')
        ]],
        ('dcterms_issued', '1937'):
        [[Link("The Hobbit", uri='http://example.org/books/The_Hobbit')]],
        ('dcterms_issued', '1939'): [[
            Link("And Then There Were None",
                 uri='http://example.org/books/And_Then_There_Were_None')
        ]],
        ('dcterms_issued', '1943'): [[
            Link("The Little Prince",
                 uri='http://example.org/books/The_Little_Prince')
        ]],
        ('dcterms_issued', '1954'): [[
            Link("The Lord of the Rings",
                 uri='http://example.org/books/The_Lord_of_the_Rings')
        ]],
        ('dcterms_title', 'a'):
        [[
            Link("And Then There Were None",
                 uri='http://example.org/books/And_Then_There_Were_None')
        ],
         [
             Link("A Tale of Two Cities",
                  uri='http://example.org/books/A_Tale_of_Two_Cities')
         ]],
        ('dcterms_title', 'd'): [[
            Link("Dream of the Red Chamber",
                 uri='http://example.org/books/Dream_of_the_Red_Chamber')
        ]],
        ('dcterms_title', 'h'):
        [[Link("The Hobbit", uri='http://example.org/books/The_Hobbit')]],
        ('dcterms_title', 'l'):
        [[
            Link("The Little Prince",
                 uri='http://example.org/books/The_Little_Prince')
        ],
         [
             Link("The Lord of the Rings",
                  uri='http://example.org/books/The_Lord_of_the_Rings')
         ]]
    }

    facets = [
        Facet(rdftype=RDF.type),
        Facet(rdftype=DCTERMS.title),
        Facet(rdftype=DCTERMS.issued)
    ]

    def setUp(self):
        super(TOC, self).setUp()
        resources = self.datadir + os.sep + "rsrc" + os.sep + "resources.xml"
        util.ensure_dir(resources)
        shutil.copy2(
            "%s/files/base/rsrc/resources.xml" % os.path.dirname(__file__),
            resources)

    def tearDown(self):
        # make sure self.repo is always newly initialized, not reused
        super(TOC, self).tearDown()
        if hasattr(TOC, 'repo'):
            delattr(TOC, 'repo')

    def test_toc(self):
        # tests the main TOC method, not the helper methods (they are
        # tested separately)
        self.repo.facets = MagicMock()
        self.repo.facet_select = MagicMock()
        self.repo.facet_query = MagicMock()
        self.repo.faceted_data = MagicMock()
        self.repo.log = Mock()
        self.repo.toc_pagesets = Mock()
        self.repo.toc_select_for_pages = Mock()
        self.repo.toc_generate_pages = Mock()
        self.repo.toc_generate_first_page = Mock()
        with patch('json.dump'):
            self.repo.toc()

        # assert facet_query was properly called, error and info msg
        # was printed
        self.assertEqual("http://*****:*****@rel='stylesheet']")
        self.assertEqual(len(css), 3)  # bootstrap, ferenda and sfs (?!)

        self.assertRegex(css[2].get('href'), '^../../../rsrc/css')

        # 2.2 JS links, relativized correctly?
        js = t.findall("body/script")
        self.assertEqual(len(js),
                         5)  # jquery, bootstrap, hammer, typeahead, ferenda
        self.assertRegex(js[4].get('src'), '^../../../rsrc/js')
        # 2.3 <nav id="toc"> correct (c.f 1.2)
        navlinks = t.findall(".//nav[@id='toc']//li/a")
        self.assertEqual(len(navlinks), 9)

        self.assertEqual(navlinks[0].get("href"),
                         'http://*****:*****@class='main-container']/article (c.f 1.3)
        docs = t.findall(".//ul[@role='main']/li/a")
        self.assertEqual(len(docs), 2)
        # "And..." should go before "A Tale..."
        self.assertEqual(docs[0].text, 'And Then There Were None')
        self.assertEqual(docs[0].attrib['href'],
                         'http://example.org/books/And_Then_There_Were_None')

        # 2.5 site name correct
        header = t.find(".//div[@class='navbar-header']/a")
        self.assertEqual(header.text, 'testsite')

        # 2.6 main article header correct?
        header = t.find(".//article/h1")
        self.assertEqual(header.text, 'Documents starting with "a"')

    def test_generate_page_staticsite(self):
        self.repo.config.staticsite = True
        self.repo.config.removeinvalidlinks = False
        path = self.repo.toc_generate_page(
            'dcterms_title', 'a', self.documentlists[('dcterms_title', 'a')],
            self.pagesets)
        t = etree.parse(path)

        # TOC link should be relativized
        navlinks = t.findall(".//nav[@id='toc']//li/a")
        self.assertEqual('d.html', navlinks[0].get("href"))
        self.assertEqual('../dcterms_issued/1791.html',
                         navlinks[3].get("href"))

        header = t.find(".//div[@class='navbar-header']/a")
        # from /base/toc/title/a.html -> /index.html = 3 levels up
        self.assertEqual('../../../index.html', header.get("href"))

        headernavlinks = t.findall(".//ul[@class='nav navbar-nav']/li/a")
        self.assertEqual('../index.html', headernavlinks[0].get("href"))

        # docs (which in this case use non-base-repo-contained URIs, should be unaffected
        docs = t.findall(".//ul[@role='main']/li/a")
        self.assertEqual('http://example.org/books/And_Then_There_Were_None',
                         docs[0].get("href"))
        self.repo.config.removeinvalidlinks = True

    def test_generate_pages(self):
        paths = self.repo.toc_generate_pages(self.documentlists, self.pagesets)
        self.assertEqual(len(paths), 10)
        #print("=============%s====================" % paths[0])
        #with open(paths[0]) as fp:
        #    print(fp.read())
        for path in paths:
            self.assertTrue(os.path.exists(path))

    def test_generate_first_page(self):
        path = self.repo.toc_generate_first_page(self.documentlists,
                                                 self.pagesets)
        self.assertEqual(path, self.p("base/toc/index.html"))
        self.assertTrue(os.path.exists(path))
        tree = etree.parse(path)
        # check content of path, particularly that css/js refs
        # and pageset links are correct. Also, that the selected
        # indexpage is indeed the first (eg. title/a)
        # (NOTE: the first page in the first pageset (by title/a) isn't linked. The second one (by title/d) is).
        self.assertEqual(
            "http://*****:*****@id='toc']").findall(".//a")[0].get("href"))
        self.assertEqual(
            "https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css",
            tree.find(".//link").get("href"))

        self.assertEqual('Documents starting with "a"',
                         tree.find(".//article/h1").text)

    def test_more(self):
        from ferenda import DocumentRepository
        d = DocumentRepository()
        rows = [{
            'uri': 'http://ex.org/1',
            'dcterms_title': 'Abc',
            'dcterms_issued': '2009-04-02'
        }, {
            'uri': 'http://ex.org/2',
            'dcterms_title': 'Abcd',
            'dcterms_issued': '2010-06-30'
        }, {
            'uri': 'http://ex.org/3',
            'dcterms_title': 'Dfg',
            'dcterms_issued': '2010-08-01'
        }]
        from rdflib.namespace import DCTERMS
        facets = [Facet(DCTERMS.title), Facet(DCTERMS.issued)]
        pagesets = d.toc_pagesets(rows, facets)
        expected = {
            ('dcterms_title', 'a'): [[Link('Abc', uri='http://ex.org/1')],
                                     [Link('Abcd', uri='http://ex.org/2')]],
            ('dcterms_title', 'd'): [[Link('Dfg', uri='http://ex.org/3')]],
            ('dcterms_issued', '2009'): [[Link('Abc', uri='http://ex.org/1')]],
            ('dcterms_issued', '2010'): [[Link('Abcd', uri='http://ex.org/2')],
                                         [Link('Dfg', uri='http://ex.org/3')]]
        }
        got = d.toc_select_for_pages(rows, pagesets, facets)
        self.assertEqual(expected, got)
Exemple #6
0
 def test_link(self):
     x = Link("Link text", uri="http://example.org/")
     self.assertEqual(str(x), "Link text")
     self.assertEqual(repr(x), "Link('Link text', uri=http://example.org/)")
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
# begin
from ferenda import CitationParser, URIFormatter, citationpatterns, uriformats
from ferenda.elements import Link

citparser = CitationParser()
citparser.add_grammar(citationpatterns.url)
formatter = URIFormatter(("url", uriformats.url))

res = []
text = "An example: http://example.org/. That is all."

for node in citparser.parse_string(text):
    if isinstance(node, str):
        # non-linked text, add and continue
        res.append(node)
    if isinstance(node, tuple):
        (text, match) = node
        uri = formatter.format(match)
        if uri:
            res.append(Link(uri, text, rel="dcterms:references"))
# end
return_value = True
Exemple #8
0
 def makeitem(self, text):
     return [
         Link(text,
              uri="https://lagen.nu/begrepp/" +
              text.replace("»", "//").replace(" ", "_"))
     ]
Exemple #9
0
 def toc_item(self, binding, row):
     return [
         row['dcterms_identifier'] + ": ",
         Link(row['dcterms_title'], uri=row['uri'])
     ]
Exemple #10
0
 def toc_item(self, binding, row):
     from ferenda.elements import Link
     return [
         row['dcterms_identifier'] + ": ",
         Link(row['dcterms_title'], uri=row['uri'])
     ]