Exemple #1
0
 def toc_pagesets(self, data, facets):
     # FIXME: Main structure of this (create a two-level hierarchy
     # based on two different facets) mirrors the dv.py
     # toc_pagesets and could possibly be abstracted.
     pagesetdict = {}
     selector_values = {}
     for row in data:
         pagesetid = facets[0].identificator(row, 'rdf_type', None)
         label = facets[0].selector(row, 'rdf_type', None)
         pagesetdict[pagesetid] = TocPageset(
             label=label,
             predicate=pagesetid,  # ??
             pages=[])
         try:
             selected = facets[1].selector(row, 'rpubl_arsutgava', None)
             selector_values[(pagesetid, selected)] = True
         except KeyError as e:
             self.log.error("Unable to select from %r: %s" % (row, e))
     for (pagesetid, value) in sorted(list(selector_values.keys()),
                                      reverse=True):
         pageset = pagesetdict[pagesetid]
         pageset.pages.append(
             TocPage(linktext=value,
                     title="%s från %s" % (pageset.label, value),
                     binding=pagesetid,
                     value=value))
     sortorder = {'prop': 1, 'sou': 2, 'ds': 3, 'dir': 4}
     return sorted(pagesetdict.values(),
                   key=lambda ps: sortorder[ps.predicate])
Exemple #2
0
 def toc_pagesets(self, data, facets):
     # FIXME: Main structure of this (create a two-level hierarchy
     # based on two different facets) mirrors the dv.py
     # toc_pagesets and could possibly be abstracted.
     pagesetdict = {}
     selector_values = {}
     for row in data:
         # should use a SKOS.altLabel?
         try:
             pagesetid = facets[0].identificator(row, 'dcterms_publisher',
                                                 self.commondata)
             label = facets[0].selector(row, 'dcterms_publisher',
                                        self.commondata)
             pagesetdict[pagesetid] = TocPageset(
                 label=label,
                 predicate=pagesetid,  # ??
                 pages=[])
             selected = facets[1].selector(row, 'dcterms_issued', None)
             selector_values[(pagesetid, selected)] = True
         except (KeyError, ValueError) as e:
             self.log.error("toc_pagesets: Couldn't process row %s: %s" %
                            (row.get("uri"), e))
     for (pagesetid, value) in sorted(list(selector_values.keys()),
                                      reverse=True):
         pageset = pagesetdict[pagesetid]
         pageset.pages.append(
             TocPage(linktext=value,
                     title="%s från %s" % (pageset.label, value),
                     binding=pagesetid,
                     value=value))
     return sorted(pagesetdict.values())
Exemple #3
0
    def toc_pagesets(self, data, facets):
        # our primary facet is RPUBL.rattsfallspublikation, but we
        # need to create one pageset for each value thereof.
        pagesetdict = {}
        selector_values = {}
        facet = facets[0]  # should be the RPUBL.rattsfallspublikation one
        for row in data:
            pagesetid = row['rpubl_rattsfallspublikation']
            if pagesetid not in pagesetdict:
                # Get the preferred court label from our own mapping,
                # fall back to the skos:prefLabel of the publikation
                label = self._rattsfallspublikation_label.get(
                    row['rpubl_rattsfallspublikation'],
                    Facet.resourcelabel(row, 'rpubl_rattsfallspublikation',
                                        self.commondata))
                pagesetdict[pagesetid] = TocPageset(label=label,
                                                    predicate=pagesetid,
                                                    pages=[])
            selected = row['rpubl_arsutgava']
            selector_values[(pagesetid, selected)] = True

        for (pagesetid, value) in sorted(list(selector_values.keys()), reverse=True):
            pageset = pagesetdict[pagesetid]
            pageset.pages.append(TocPage(linktext=value,
                                         title="Rättsfall från %s under %s" % (pageset.label, value),
                                         binding=util.uri_leaf(pagesetid),
                                         value=value))

        # make sure pagesets are returned in the preferred, arbitrary order specified by _rattsfallspublikation_order
        for x in pagesetdict.values():
            assert x.label in self._rattsfallspublikation_order, "%s not in _rattsfallspublikation_order" % x.label
        return sorted(list(pagesetdict.values()), key=lambda x: self._rattsfallspublikation_order.index(x.label))
Exemple #4
0
    def toc_pagesets(self, data, facets):
        # FIXME: Main structure of this (create a two-level hierarchy
        # based on two different facets) mirrors the dv.py
        # toc_pagesets and could possibly be abstracted.
        pagesetdict = {}
        labelsets = {} 
        selector_values = {}
        for row in data:
            pagesetid = facets[0].identificator(row,
                                                'rpubl_forfattningssamling',
                                                self.commondata)
            altlabel = facets[0].selector(row, 'rpubl_forfattningssamling', self.commondata)
            if "|" in altlabel:
                mainaltlabel, altaltlabel = altlabel.split
            else:
                mainaltlabel = altaltlabel = altlabel

            # this makes sure that each value in labelsets is a array
            # with the main preflabel and altlabel first (eg ["Statens
            # Jordbruksverks författningssamling, "SJVFS"]), and
            # alternate altlabels (eg DFS) later (in an arbitrary
            # order).
            if pagesetid not in labelsets:
                preflabel = self.commondata.value(URIRef(row['rpubl_forfattningssamling']),
                                                  SKOS.prefLabel)
                labelsets[pagesetid] = [preflabel, mainaltlabel]
            if altaltlabel not in labelsets[pagesetid]:
                labelsets[pagesetid].append(altaltlabel)
                
            selected = facets[1].selector(row, 'rpubl_arsutgava', self.commondata)
            selector_values[(pagesetid, selected)] = True
        for (pagesetid, value) in sorted(list(selector_values.keys()), reverse=True):
            if pagesetid not in pagesetdict:
                # generate eg "Skatteverkets författningssamling (SKVFS, RSFS)
                labels = labelsets[pagesetid]
                preflabel = labels.pop(0)
                pslabel = "%s (%s)" % (preflabel, ", ".join(labels))
                pagesetdict[pagesetid] = TocPageset(label=pslabel,
                                                    predicate=pagesetid,  # ??
                                                    pages=[])
            pageset = pagesetdict[pagesetid]
            pageset.pages.append(TocPage(linktext=value,
                                         title="%s från %s" % (pageset.label, value),
                                         binding=pagesetid,
                                         value=value))
        return sorted(pagesetdict.values(), key=attrgetter('label'))
Exemple #5
0
class TOC(RepoTester):
    results1 = json.load(open("test/files/datasets/results1.json"))
    results2 = json.load(open("test/files/datasets/results2.json"))
    results2data = Graph().parse(open("test/files/datasets/results2data.ttl"),
                                 format="turtle")
    pagesets = [
        TocPageset('Sorted by title', [
            TocPage('a', 'Documents starting with "a"', 'dcterms_title', 'a'),
            TocPage('d', 'Documents starting with "d"', 'dcterms_title', 'd'),
            TocPage('h', 'Documents starting with "h"', 'dcterms_title', 'h'),
            TocPage('l', 'Documents starting with "l"', 'dcterms_title', 'l')
        ], DCTERMS.title),
        TocPageset('Sorted by publication year', [
            TocPage('1791', 'Documents published in 1791', 'dcterms_issued',
                    '1791'),
            TocPage('1859', 'Documents published in 1859', 'dcterms_issued',
                    '1859'),
            TocPage('1937', 'Documents published in 1937', 'dcterms_issued',
                    '1937'),
            TocPage('1939', 'Documents published in 1939', 'dcterms_issued',
                    '1939'),
            TocPage('1943', 'Documents published in 1943', 'dcterms_issued',
                    '1943'),
            TocPage('1954', 'Documents published in 1954', 'dcterms_issued',
                    '1954')
        ], DCTERMS.issued)
    ]

    pagesets2 = [
        TocPageset('Sorted by publisher', [
            TocPage('Analytical Biochemistry',
                    'Documents published in Analytical Biochemistry',
                    'dcterms_publisher', 'analytical'),
            TocPage('Journal of Biological Chemistry',
                    'Documents published in Journal of Biological Chemistry',
                    'dcterms_publisher', 'biochem'),
            TocPage('Nature', 'Documents published in Nature',
                    'dcterms_publisher', 'nature'),
        ], DCTERMS.publisher)
    ]

    documentlists = {
        ('dcterms_issued', '1791'): [[
            Link("Dream of the Red Chamber",
                 uri='http://example.org/books/Dream_of_the_Red_Chamber')
        ]],
        ('dcterms_issued', '1859'): [[
            Link("A Tale of Two Cities",
                 uri='http://example.org/books/A_Tale_of_Two_Cities')
        ]],
        ('dcterms_issued', '1937'):
        [[Link("The Hobbit", uri='http://example.org/books/The_Hobbit')]],
        ('dcterms_issued', '1939'): [[
            Link("And Then There Were None",
                 uri='http://example.org/books/And_Then_There_Were_None')
        ]],
        ('dcterms_issued', '1943'): [[
            Link("The Little Prince",
                 uri='http://example.org/books/The_Little_Prince')
        ]],
        ('dcterms_issued', '1954'): [[
            Link("The Lord of the Rings",
                 uri='http://example.org/books/The_Lord_of_the_Rings')
        ]],
        ('dcterms_title', 'a'):
        [[
            Link("And Then There Were None",
                 uri='http://example.org/books/And_Then_There_Were_None')
        ],
         [
             Link("A Tale of Two Cities",
                  uri='http://example.org/books/A_Tale_of_Two_Cities')
         ]],
        ('dcterms_title', 'd'): [[
            Link("Dream of the Red Chamber",
                 uri='http://example.org/books/Dream_of_the_Red_Chamber')
        ]],
        ('dcterms_title', 'h'):
        [[Link("The Hobbit", uri='http://example.org/books/The_Hobbit')]],
        ('dcterms_title', 'l'):
        [[
            Link("The Little Prince",
                 uri='http://example.org/books/The_Little_Prince')
        ],
         [
             Link("The Lord of the Rings",
                  uri='http://example.org/books/The_Lord_of_the_Rings')
         ]]
    }

    facets = [
        Facet(rdftype=RDF.type),
        Facet(rdftype=DCTERMS.title),
        Facet(rdftype=DCTERMS.issued)
    ]

    def setUp(self):
        super(TOC, self).setUp()
        resources = self.datadir + os.sep + "rsrc" + os.sep + "resources.xml"
        util.ensure_dir(resources)
        shutil.copy2(
            "%s/files/base/rsrc/resources.xml" % os.path.dirname(__file__),
            resources)

    def tearDown(self):
        # make sure self.repo is always newly initialized, not reused
        super(TOC, self).tearDown()
        if hasattr(TOC, 'repo'):
            delattr(TOC, 'repo')

    def test_toc(self):
        # tests the main TOC method, not the helper methods (they are
        # tested separately)
        self.repo.facets = MagicMock()
        self.repo.facet_select = MagicMock()
        self.repo.facet_query = MagicMock()
        self.repo.faceted_data = MagicMock()
        self.repo.log = Mock()
        self.repo.toc_pagesets = Mock()
        self.repo.toc_select_for_pages = Mock()
        self.repo.toc_generate_pages = Mock()
        self.repo.toc_generate_first_page = Mock()
        with patch('json.dump'):
            self.repo.toc()

        # assert facet_query was properly called, error and info msg
        # was printed
        self.assertEqual("http://*****:*****@rel='stylesheet']")
        self.assertEqual(len(css), 3)  # bootstrap, ferenda and sfs (?!)

        self.assertRegex(css[2].get('href'), '^../../../rsrc/css')

        # 2.2 JS links, relativized correctly?
        js = t.findall("body/script")
        self.assertEqual(len(js),
                         5)  # jquery, bootstrap, hammer, typeahead, ferenda
        self.assertRegex(js[4].get('src'), '^../../../rsrc/js')
        # 2.3 <nav id="toc"> correct (c.f 1.2)
        navlinks = t.findall(".//nav[@id='toc']//li/a")
        self.assertEqual(len(navlinks), 9)

        self.assertEqual(navlinks[0].get("href"),
                         'http://*****:*****@class='main-container']/article (c.f 1.3)
        docs = t.findall(".//ul[@role='main']/li/a")
        self.assertEqual(len(docs), 2)
        # "And..." should go before "A Tale..."
        self.assertEqual(docs[0].text, 'And Then There Were None')
        self.assertEqual(docs[0].attrib['href'],
                         'http://example.org/books/And_Then_There_Were_None')

        # 2.5 site name correct
        header = t.find(".//div[@class='navbar-header']/a")
        self.assertEqual(header.text, 'testsite')

        # 2.6 main article header correct?
        header = t.find(".//article/h1")
        self.assertEqual(header.text, 'Documents starting with "a"')

    def test_generate_page_staticsite(self):
        self.repo.config.staticsite = True
        self.repo.config.removeinvalidlinks = False
        path = self.repo.toc_generate_page(
            'dcterms_title', 'a', self.documentlists[('dcterms_title', 'a')],
            self.pagesets)
        t = etree.parse(path)

        # TOC link should be relativized
        navlinks = t.findall(".//nav[@id='toc']//li/a")
        self.assertEqual('d.html', navlinks[0].get("href"))
        self.assertEqual('../dcterms_issued/1791.html',
                         navlinks[3].get("href"))

        header = t.find(".//div[@class='navbar-header']/a")
        # from /base/toc/title/a.html -> /index.html = 3 levels up
        self.assertEqual('../../../index.html', header.get("href"))

        headernavlinks = t.findall(".//ul[@class='nav navbar-nav']/li/a")
        self.assertEqual('../index.html', headernavlinks[0].get("href"))

        # docs (which in this case use non-base-repo-contained URIs, should be unaffected
        docs = t.findall(".//ul[@role='main']/li/a")
        self.assertEqual('http://example.org/books/And_Then_There_Were_None',
                         docs[0].get("href"))
        self.repo.config.removeinvalidlinks = True

    def test_generate_pages(self):
        paths = self.repo.toc_generate_pages(self.documentlists, self.pagesets)
        self.assertEqual(len(paths), 10)
        #print("=============%s====================" % paths[0])
        #with open(paths[0]) as fp:
        #    print(fp.read())
        for path in paths:
            self.assertTrue(os.path.exists(path))

    def test_generate_first_page(self):
        path = self.repo.toc_generate_first_page(self.documentlists,
                                                 self.pagesets)
        self.assertEqual(path, self.p("base/toc/index.html"))
        self.assertTrue(os.path.exists(path))
        tree = etree.parse(path)
        # check content of path, particularly that css/js refs
        # and pageset links are correct. Also, that the selected
        # indexpage is indeed the first (eg. title/a)
        # (NOTE: the first page in the first pageset (by title/a) isn't linked. The second one (by title/d) is).
        self.assertEqual(
            "http://*****:*****@id='toc']").findall(".//a")[0].get("href"))
        self.assertEqual(
            "https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css",
            tree.find(".//link").get("href"))

        self.assertEqual('Documents starting with "a"',
                         tree.find(".//article/h1").text)

    def test_more(self):
        from ferenda import DocumentRepository
        d = DocumentRepository()
        rows = [{
            'uri': 'http://ex.org/1',
            'dcterms_title': 'Abc',
            'dcterms_issued': '2009-04-02'
        }, {
            'uri': 'http://ex.org/2',
            'dcterms_title': 'Abcd',
            'dcterms_issued': '2010-06-30'
        }, {
            'uri': 'http://ex.org/3',
            'dcterms_title': 'Dfg',
            'dcterms_issued': '2010-08-01'
        }]
        from rdflib.namespace import DCTERMS
        facets = [Facet(DCTERMS.title), Facet(DCTERMS.issued)]
        pagesets = d.toc_pagesets(rows, facets)
        expected = {
            ('dcterms_title', 'a'): [[Link('Abc', uri='http://ex.org/1')],
                                     [Link('Abcd', uri='http://ex.org/2')]],
            ('dcterms_title', 'd'): [[Link('Dfg', uri='http://ex.org/3')]],
            ('dcterms_issued', '2009'): [[Link('Abc', uri='http://ex.org/1')]],
            ('dcterms_issued', '2010'): [[Link('Abcd', uri='http://ex.org/2')],
                                         [Link('Dfg', uri='http://ex.org/3')]]
        }
        got = d.toc_select_for_pages(rows, pagesets, facets)
        self.assertEqual(expected, got)