def toc_pagesets(self, data, facets): # FIXME: Main structure of this (create a two-level hierarchy # based on two different facets) mirrors the dv.py # toc_pagesets and could possibly be abstracted. pagesetdict = {} selector_values = {} for row in data: pagesetid = facets[0].identificator(row, 'rdf_type', None) label = facets[0].selector(row, 'rdf_type', None) pagesetdict[pagesetid] = TocPageset( label=label, predicate=pagesetid, # ?? pages=[]) try: selected = facets[1].selector(row, 'rpubl_arsutgava', None) selector_values[(pagesetid, selected)] = True except KeyError as e: self.log.error("Unable to select from %r: %s" % (row, e)) for (pagesetid, value) in sorted(list(selector_values.keys()), reverse=True): pageset = pagesetdict[pagesetid] pageset.pages.append( TocPage(linktext=value, title="%s från %s" % (pageset.label, value), binding=pagesetid, value=value)) sortorder = {'prop': 1, 'sou': 2, 'ds': 3, 'dir': 4} return sorted(pagesetdict.values(), key=lambda ps: sortorder[ps.predicate])
def toc_pagesets(self, data, facets): # FIXME: Main structure of this (create a two-level hierarchy # based on two different facets) mirrors the dv.py # toc_pagesets and could possibly be abstracted. pagesetdict = {} selector_values = {} for row in data: # should use a SKOS.altLabel? try: pagesetid = facets[0].identificator(row, 'dcterms_publisher', self.commondata) label = facets[0].selector(row, 'dcterms_publisher', self.commondata) pagesetdict[pagesetid] = TocPageset( label=label, predicate=pagesetid, # ?? pages=[]) selected = facets[1].selector(row, 'dcterms_issued', None) selector_values[(pagesetid, selected)] = True except (KeyError, ValueError) as e: self.log.error("toc_pagesets: Couldn't process row %s: %s" % (row.get("uri"), e)) for (pagesetid, value) in sorted(list(selector_values.keys()), reverse=True): pageset = pagesetdict[pagesetid] pageset.pages.append( TocPage(linktext=value, title="%s från %s" % (pageset.label, value), binding=pagesetid, value=value)) return sorted(pagesetdict.values())
def toc_pagesets(self, data, facets): # our primary facet is RPUBL.rattsfallspublikation, but we # need to create one pageset for each value thereof. pagesetdict = {} selector_values = {} facet = facets[0] # should be the RPUBL.rattsfallspublikation one for row in data: pagesetid = row['rpubl_rattsfallspublikation'] if pagesetid not in pagesetdict: # Get the preferred court label from our own mapping, # fall back to the skos:prefLabel of the publikation label = self._rattsfallspublikation_label.get( row['rpubl_rattsfallspublikation'], Facet.resourcelabel(row, 'rpubl_rattsfallspublikation', self.commondata)) pagesetdict[pagesetid] = TocPageset(label=label, predicate=pagesetid, pages=[]) selected = row['rpubl_arsutgava'] selector_values[(pagesetid, selected)] = True for (pagesetid, value) in sorted(list(selector_values.keys()), reverse=True): pageset = pagesetdict[pagesetid] pageset.pages.append(TocPage(linktext=value, title="Rättsfall från %s under %s" % (pageset.label, value), binding=util.uri_leaf(pagesetid), value=value)) # make sure pagesets are returned in the preferred, arbitrary order specified by _rattsfallspublikation_order for x in pagesetdict.values(): assert x.label in self._rattsfallspublikation_order, "%s not in _rattsfallspublikation_order" % x.label return sorted(list(pagesetdict.values()), key=lambda x: self._rattsfallspublikation_order.index(x.label))
def toc_pagesets(self, data, facets): # FIXME: Main structure of this (create a two-level hierarchy # based on two different facets) mirrors the dv.py # toc_pagesets and could possibly be abstracted. pagesetdict = {} labelsets = {} selector_values = {} for row in data: pagesetid = facets[0].identificator(row, 'rpubl_forfattningssamling', self.commondata) altlabel = facets[0].selector(row, 'rpubl_forfattningssamling', self.commondata) if "|" in altlabel: mainaltlabel, altaltlabel = altlabel.split else: mainaltlabel = altaltlabel = altlabel # this makes sure that each value in labelsets is a array # with the main preflabel and altlabel first (eg ["Statens # Jordbruksverks författningssamling, "SJVFS"]), and # alternate altlabels (eg DFS) later (in an arbitrary # order). if pagesetid not in labelsets: preflabel = self.commondata.value(URIRef(row['rpubl_forfattningssamling']), SKOS.prefLabel) labelsets[pagesetid] = [preflabel, mainaltlabel] if altaltlabel not in labelsets[pagesetid]: labelsets[pagesetid].append(altaltlabel) selected = facets[1].selector(row, 'rpubl_arsutgava', self.commondata) selector_values[(pagesetid, selected)] = True for (pagesetid, value) in sorted(list(selector_values.keys()), reverse=True): if pagesetid not in pagesetdict: # generate eg "Skatteverkets författningssamling (SKVFS, RSFS) labels = labelsets[pagesetid] preflabel = labels.pop(0) pslabel = "%s (%s)" % (preflabel, ", ".join(labels)) pagesetdict[pagesetid] = TocPageset(label=pslabel, predicate=pagesetid, # ?? pages=[]) pageset = pagesetdict[pagesetid] pageset.pages.append(TocPage(linktext=value, title="%s från %s" % (pageset.label, value), binding=pagesetid, value=value)) return sorted(pagesetdict.values(), key=attrgetter('label'))
class TOC(RepoTester): results1 = json.load(open("test/files/datasets/results1.json")) results2 = json.load(open("test/files/datasets/results2.json")) results2data = Graph().parse(open("test/files/datasets/results2data.ttl"), format="turtle") pagesets = [ TocPageset('Sorted by title', [ TocPage('a', 'Documents starting with "a"', 'dcterms_title', 'a'), TocPage('d', 'Documents starting with "d"', 'dcterms_title', 'd'), TocPage('h', 'Documents starting with "h"', 'dcterms_title', 'h'), TocPage('l', 'Documents starting with "l"', 'dcterms_title', 'l') ], DCTERMS.title), TocPageset('Sorted by publication year', [ TocPage('1791', 'Documents published in 1791', 'dcterms_issued', '1791'), TocPage('1859', 'Documents published in 1859', 'dcterms_issued', '1859'), TocPage('1937', 'Documents published in 1937', 'dcterms_issued', '1937'), TocPage('1939', 'Documents published in 1939', 'dcterms_issued', '1939'), TocPage('1943', 'Documents published in 1943', 'dcterms_issued', '1943'), TocPage('1954', 'Documents published in 1954', 'dcterms_issued', '1954') ], DCTERMS.issued) ] pagesets2 = [ TocPageset('Sorted by publisher', [ TocPage('Analytical Biochemistry', 'Documents published in Analytical Biochemistry', 'dcterms_publisher', 'analytical'), TocPage('Journal of Biological Chemistry', 'Documents published in Journal of Biological Chemistry', 'dcterms_publisher', 'biochem'), TocPage('Nature', 'Documents published in Nature', 'dcterms_publisher', 'nature'), ], DCTERMS.publisher) ] documentlists = { ('dcterms_issued', '1791'): [[ Link("Dream of the Red Chamber", uri='http://example.org/books/Dream_of_the_Red_Chamber') ]], ('dcterms_issued', '1859'): [[ Link("A Tale of Two Cities", uri='http://example.org/books/A_Tale_of_Two_Cities') ]], ('dcterms_issued', '1937'): [[Link("The Hobbit", uri='http://example.org/books/The_Hobbit')]], ('dcterms_issued', '1939'): [[ Link("And Then There Were None", uri='http://example.org/books/And_Then_There_Were_None') ]], ('dcterms_issued', '1943'): [[ Link("The Little Prince", uri='http://example.org/books/The_Little_Prince') ]], ('dcterms_issued', '1954'): [[ Link("The Lord of the Rings", uri='http://example.org/books/The_Lord_of_the_Rings') ]], ('dcterms_title', 'a'): [[ Link("And Then There Were None", uri='http://example.org/books/And_Then_There_Were_None') ], [ Link("A Tale of Two Cities", uri='http://example.org/books/A_Tale_of_Two_Cities') ]], ('dcterms_title', 'd'): [[ Link("Dream of the Red Chamber", uri='http://example.org/books/Dream_of_the_Red_Chamber') ]], ('dcterms_title', 'h'): [[Link("The Hobbit", uri='http://example.org/books/The_Hobbit')]], ('dcterms_title', 'l'): [[ Link("The Little Prince", uri='http://example.org/books/The_Little_Prince') ], [ Link("The Lord of the Rings", uri='http://example.org/books/The_Lord_of_the_Rings') ]] } facets = [ Facet(rdftype=RDF.type), Facet(rdftype=DCTERMS.title), Facet(rdftype=DCTERMS.issued) ] def setUp(self): super(TOC, self).setUp() resources = self.datadir + os.sep + "rsrc" + os.sep + "resources.xml" util.ensure_dir(resources) shutil.copy2( "%s/files/base/rsrc/resources.xml" % os.path.dirname(__file__), resources) def tearDown(self): # make sure self.repo is always newly initialized, not reused super(TOC, self).tearDown() if hasattr(TOC, 'repo'): delattr(TOC, 'repo') def test_toc(self): # tests the main TOC method, not the helper methods (they are # tested separately) self.repo.facets = MagicMock() self.repo.facet_select = MagicMock() self.repo.facet_query = MagicMock() self.repo.faceted_data = MagicMock() self.repo.log = Mock() self.repo.toc_pagesets = Mock() self.repo.toc_select_for_pages = Mock() self.repo.toc_generate_pages = Mock() self.repo.toc_generate_first_page = Mock() with patch('json.dump'): self.repo.toc() # assert facet_query was properly called, error and info msg # was printed self.assertEqual("http://*****:*****@rel='stylesheet']") self.assertEqual(len(css), 3) # bootstrap, ferenda and sfs (?!) self.assertRegex(css[2].get('href'), '^../../../rsrc/css') # 2.2 JS links, relativized correctly? js = t.findall("body/script") self.assertEqual(len(js), 5) # jquery, bootstrap, hammer, typeahead, ferenda self.assertRegex(js[4].get('src'), '^../../../rsrc/js') # 2.3 <nav id="toc"> correct (c.f 1.2) navlinks = t.findall(".//nav[@id='toc']//li/a") self.assertEqual(len(navlinks), 9) self.assertEqual(navlinks[0].get("href"), 'http://*****:*****@class='main-container']/article (c.f 1.3) docs = t.findall(".//ul[@role='main']/li/a") self.assertEqual(len(docs), 2) # "And..." should go before "A Tale..." self.assertEqual(docs[0].text, 'And Then There Were None') self.assertEqual(docs[0].attrib['href'], 'http://example.org/books/And_Then_There_Were_None') # 2.5 site name correct header = t.find(".//div[@class='navbar-header']/a") self.assertEqual(header.text, 'testsite') # 2.6 main article header correct? header = t.find(".//article/h1") self.assertEqual(header.text, 'Documents starting with "a"') def test_generate_page_staticsite(self): self.repo.config.staticsite = True self.repo.config.removeinvalidlinks = False path = self.repo.toc_generate_page( 'dcterms_title', 'a', self.documentlists[('dcterms_title', 'a')], self.pagesets) t = etree.parse(path) # TOC link should be relativized navlinks = t.findall(".//nav[@id='toc']//li/a") self.assertEqual('d.html', navlinks[0].get("href")) self.assertEqual('../dcterms_issued/1791.html', navlinks[3].get("href")) header = t.find(".//div[@class='navbar-header']/a") # from /base/toc/title/a.html -> /index.html = 3 levels up self.assertEqual('../../../index.html', header.get("href")) headernavlinks = t.findall(".//ul[@class='nav navbar-nav']/li/a") self.assertEqual('../index.html', headernavlinks[0].get("href")) # docs (which in this case use non-base-repo-contained URIs, should be unaffected docs = t.findall(".//ul[@role='main']/li/a") self.assertEqual('http://example.org/books/And_Then_There_Were_None', docs[0].get("href")) self.repo.config.removeinvalidlinks = True def test_generate_pages(self): paths = self.repo.toc_generate_pages(self.documentlists, self.pagesets) self.assertEqual(len(paths), 10) #print("=============%s====================" % paths[0]) #with open(paths[0]) as fp: # print(fp.read()) for path in paths: self.assertTrue(os.path.exists(path)) def test_generate_first_page(self): path = self.repo.toc_generate_first_page(self.documentlists, self.pagesets) self.assertEqual(path, self.p("base/toc/index.html")) self.assertTrue(os.path.exists(path)) tree = etree.parse(path) # check content of path, particularly that css/js refs # and pageset links are correct. Also, that the selected # indexpage is indeed the first (eg. title/a) # (NOTE: the first page in the first pageset (by title/a) isn't linked. The second one (by title/d) is). self.assertEqual( "http://*****:*****@id='toc']").findall(".//a")[0].get("href")) self.assertEqual( "https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css", tree.find(".//link").get("href")) self.assertEqual('Documents starting with "a"', tree.find(".//article/h1").text) def test_more(self): from ferenda import DocumentRepository d = DocumentRepository() rows = [{ 'uri': 'http://ex.org/1', 'dcterms_title': 'Abc', 'dcterms_issued': '2009-04-02' }, { 'uri': 'http://ex.org/2', 'dcterms_title': 'Abcd', 'dcterms_issued': '2010-06-30' }, { 'uri': 'http://ex.org/3', 'dcterms_title': 'Dfg', 'dcterms_issued': '2010-08-01' }] from rdflib.namespace import DCTERMS facets = [Facet(DCTERMS.title), Facet(DCTERMS.issued)] pagesets = d.toc_pagesets(rows, facets) expected = { ('dcterms_title', 'a'): [[Link('Abc', uri='http://ex.org/1')], [Link('Abcd', uri='http://ex.org/2')]], ('dcterms_title', 'd'): [[Link('Dfg', uri='http://ex.org/3')]], ('dcterms_issued', '2009'): [[Link('Abc', uri='http://ex.org/1')]], ('dcterms_issued', '2010'): [[Link('Abcd', uri='http://ex.org/2')], [Link('Dfg', uri='http://ex.org/3')]] } got = d.toc_select_for_pages(rows, pagesets, facets) self.assertEqual(expected, got)