def test_more(self): from ferenda import DocumentRepository d = DocumentRepository() rows = [{ 'uri': 'http://ex.org/1', 'dcterms_title': 'Abc', 'dcterms_issued': '2009-04-02' }, { 'uri': 'http://ex.org/2', 'dcterms_title': 'Abcd', 'dcterms_issued': '2010-06-30' }, { 'uri': 'http://ex.org/3', 'dcterms_title': 'Dfg', 'dcterms_issued': '2010-08-01' }] from rdflib.namespace import DCTERMS facets = [Facet(DCTERMS.title), Facet(DCTERMS.issued)] pagesets = d.toc_pagesets(rows, facets) expected = { ('dcterms_title', 'a'): [[Link('Abc', uri='http://ex.org/1')], [Link('Abcd', uri='http://ex.org/2')]], ('dcterms_title', 'd'): [[Link('Dfg', uri='http://ex.org/3')]], ('dcterms_issued', '2009'): [[Link('Abc', uri='http://ex.org/1')]], ('dcterms_issued', '2010'): [[Link('Abcd', uri='http://ex.org/2')], [Link('Dfg', uri='http://ex.org/3')]] } got = d.toc_select_for_pages(rows, pagesets, facets) self.assertEqual(expected, got)
def remove_rootsegments(item, rootsegments): linktext = str(item[0]) prefix = "»".join(rootsegments) assert linktext.startswith( prefix), "Tried to remove prefix %s from %s" % (prefix, linktext) prefixlen = len(prefix) if prefixlen: prefixlen += 1 # removes extra "»", but not if rootsegments is empty if linktext[prefixlen:]: return [Link(linktext[prefixlen:], uri=item[0].uri)] else: return []
def toc_generate_page_body(self, documentlist, nav): # move documentlist into a ordereddict keyed on url, # concatenating rpubl_konsolideringsunderlag as we go documents = OrderedDict() # make sure all rpubl:KonsolideradGrundforfattning comes first in the list for row in documentlist: row = dict(row) if row['rdf_type'] == str(RPUBL.KonsolideradGrundforfattning): if row['uri'] not in documents: documents[row['uri']] = row # transform single value to a list, so we can # append more if other rows are about the same # rpubl:KonsolideradGrundforfattning row['rpubl_konsolideringsunderlag'] = [ row['rpubl_konsolideringsunderlag'] ] else: documents[ row['uri']]['rpubl_konsolideringsunderlag'].append( row['rpubl_konsolideringsunderlag']) # then the rest for row in documentlist: if row['rdf_type'] != str(RPUBL.KonsolideradGrundforfattning): documents[row['uri']] = row # now that we have all documents, check if some of them change # some other of them for uri in list(documents): row = documents[uri] if 'rpubl_andrar' in row: if row['rpubl_andrar'] not in documents: self.log.warning( "%(uri)s: changes %(rpubl_andrar)s, but that doc doesn't exist" % row) continue if 'andras_av' not in documents[row['rpubl_andrar']]: documents[row['rpubl_andrar']]['andras_av'] = [] documents[row['rpubl_andrar']]['andras_av'].insert(0, uri) documents.move_to_end(uri) dl = html.DL(role='main') for uri in list(documents): if uri not in documents: continue # we must have removed it earlier in the loop row = documents[uri] label = row.get('dcterms_title', row.get('dcterms_identifier', '(Titel saknas)')) if row['dcterms_identifier'] not in label: label = "%s: %s" % (row['dcterms_identifier'], label) # in most cases we want to link this thing, but not if # this is the base act of a non-consolidated act (we link # to it in the DD element below instead) if (row['rdf_type'] == str(RPUBL.KonsolideradGrundforfattning) or 'andras_av' not in row): label = Link(label, uri=uri) dl.append(html.DT([label])) # groups of base+change acts may be present wether we have # consolidated acts or not, and they might be grouped a # little differently, but we need to do the same things # with them. relevant_docs = [] if row['rdf_type'] == str(RPUBL.KonsolideradGrundforfattning): relevant_docs = row['rpubl_konsolideringsunderlag'] elif 'andras_av' in row: relevant_docs = [uri] + row['andras_av'] if relevant_docs: fs = [] for f in relevant_docs: if f in documents: fs.append( Link(documents[f]['dcterms_identifier'], uri=documents[f]['uri'])) fs.append(", ") del documents[f] if fs: dl.append( html.DD( ["Grund- och ändringsförfattningar: ", *fs[:-1]])) return Body([nav, dl])
def toc_item(self, binding, row): # note: look at binding to determine which pageset is being # constructed in case you want to present documents in # different ways depending on that. from ferenda.elements import Link return [row['identifier'] + ": ", Link(row['title'], uri=row['uri'])]
class TOC(RepoTester): results1 = json.load(open("test/files/datasets/results1.json")) results2 = json.load(open("test/files/datasets/results2.json")) results2data = Graph().parse(open("test/files/datasets/results2data.ttl"), format="turtle") pagesets = [ TocPageset('Sorted by title', [ TocPage('a', 'Documents starting with "a"', 'dcterms_title', 'a'), TocPage('d', 'Documents starting with "d"', 'dcterms_title', 'd'), TocPage('h', 'Documents starting with "h"', 'dcterms_title', 'h'), TocPage('l', 'Documents starting with "l"', 'dcterms_title', 'l') ], DCTERMS.title), TocPageset('Sorted by publication year', [ TocPage('1791', 'Documents published in 1791', 'dcterms_issued', '1791'), TocPage('1859', 'Documents published in 1859', 'dcterms_issued', '1859'), TocPage('1937', 'Documents published in 1937', 'dcterms_issued', '1937'), TocPage('1939', 'Documents published in 1939', 'dcterms_issued', '1939'), TocPage('1943', 'Documents published in 1943', 'dcterms_issued', '1943'), TocPage('1954', 'Documents published in 1954', 'dcterms_issued', '1954') ], DCTERMS.issued) ] pagesets2 = [ TocPageset('Sorted by publisher', [ TocPage('Analytical Biochemistry', 'Documents published in Analytical Biochemistry', 'dcterms_publisher', 'analytical'), TocPage('Journal of Biological Chemistry', 'Documents published in Journal of Biological Chemistry', 'dcterms_publisher', 'biochem'), TocPage('Nature', 'Documents published in Nature', 'dcterms_publisher', 'nature'), ], DCTERMS.publisher) ] documentlists = { ('dcterms_issued', '1791'): [[ Link("Dream of the Red Chamber", uri='http://example.org/books/Dream_of_the_Red_Chamber') ]], ('dcterms_issued', '1859'): [[ Link("A Tale of Two Cities", uri='http://example.org/books/A_Tale_of_Two_Cities') ]], ('dcterms_issued', '1937'): [[Link("The Hobbit", uri='http://example.org/books/The_Hobbit')]], ('dcterms_issued', '1939'): [[ Link("And Then There Were None", uri='http://example.org/books/And_Then_There_Were_None') ]], ('dcterms_issued', '1943'): [[ Link("The Little Prince", uri='http://example.org/books/The_Little_Prince') ]], ('dcterms_issued', '1954'): [[ Link("The Lord of the Rings", uri='http://example.org/books/The_Lord_of_the_Rings') ]], ('dcterms_title', 'a'): [[ Link("And Then There Were None", uri='http://example.org/books/And_Then_There_Were_None') ], [ Link("A Tale of Two Cities", uri='http://example.org/books/A_Tale_of_Two_Cities') ]], ('dcterms_title', 'd'): [[ Link("Dream of the Red Chamber", uri='http://example.org/books/Dream_of_the_Red_Chamber') ]], ('dcterms_title', 'h'): [[Link("The Hobbit", uri='http://example.org/books/The_Hobbit')]], ('dcterms_title', 'l'): [[ Link("The Little Prince", uri='http://example.org/books/The_Little_Prince') ], [ Link("The Lord of the Rings", uri='http://example.org/books/The_Lord_of_the_Rings') ]] } facets = [ Facet(rdftype=RDF.type), Facet(rdftype=DCTERMS.title), Facet(rdftype=DCTERMS.issued) ] def setUp(self): super(TOC, self).setUp() resources = self.datadir + os.sep + "rsrc" + os.sep + "resources.xml" util.ensure_dir(resources) shutil.copy2( "%s/files/base/rsrc/resources.xml" % os.path.dirname(__file__), resources) def tearDown(self): # make sure self.repo is always newly initialized, not reused super(TOC, self).tearDown() if hasattr(TOC, 'repo'): delattr(TOC, 'repo') def test_toc(self): # tests the main TOC method, not the helper methods (they are # tested separately) self.repo.facets = MagicMock() self.repo.facet_select = MagicMock() self.repo.facet_query = MagicMock() self.repo.faceted_data = MagicMock() self.repo.log = Mock() self.repo.toc_pagesets = Mock() self.repo.toc_select_for_pages = Mock() self.repo.toc_generate_pages = Mock() self.repo.toc_generate_first_page = Mock() with patch('json.dump'): self.repo.toc() # assert facet_query was properly called, error and info msg # was printed self.assertEqual("http://*****:*****@rel='stylesheet']") self.assertEqual(len(css), 3) # bootstrap, ferenda and sfs (?!) self.assertRegex(css[2].get('href'), '^../../../rsrc/css') # 2.2 JS links, relativized correctly? js = t.findall("body/script") self.assertEqual(len(js), 5) # jquery, bootstrap, hammer, typeahead, ferenda self.assertRegex(js[4].get('src'), '^../../../rsrc/js') # 2.3 <nav id="toc"> correct (c.f 1.2) navlinks = t.findall(".//nav[@id='toc']//li/a") self.assertEqual(len(navlinks), 9) self.assertEqual(navlinks[0].get("href"), 'http://*****:*****@class='main-container']/article (c.f 1.3) docs = t.findall(".//ul[@role='main']/li/a") self.assertEqual(len(docs), 2) # "And..." should go before "A Tale..." self.assertEqual(docs[0].text, 'And Then There Were None') self.assertEqual(docs[0].attrib['href'], 'http://example.org/books/And_Then_There_Were_None') # 2.5 site name correct header = t.find(".//div[@class='navbar-header']/a") self.assertEqual(header.text, 'testsite') # 2.6 main article header correct? header = t.find(".//article/h1") self.assertEqual(header.text, 'Documents starting with "a"') def test_generate_page_staticsite(self): self.repo.config.staticsite = True self.repo.config.removeinvalidlinks = False path = self.repo.toc_generate_page( 'dcterms_title', 'a', self.documentlists[('dcterms_title', 'a')], self.pagesets) t = etree.parse(path) # TOC link should be relativized navlinks = t.findall(".//nav[@id='toc']//li/a") self.assertEqual('d.html', navlinks[0].get("href")) self.assertEqual('../dcterms_issued/1791.html', navlinks[3].get("href")) header = t.find(".//div[@class='navbar-header']/a") # from /base/toc/title/a.html -> /index.html = 3 levels up self.assertEqual('../../../index.html', header.get("href")) headernavlinks = t.findall(".//ul[@class='nav navbar-nav']/li/a") self.assertEqual('../index.html', headernavlinks[0].get("href")) # docs (which in this case use non-base-repo-contained URIs, should be unaffected docs = t.findall(".//ul[@role='main']/li/a") self.assertEqual('http://example.org/books/And_Then_There_Were_None', docs[0].get("href")) self.repo.config.removeinvalidlinks = True def test_generate_pages(self): paths = self.repo.toc_generate_pages(self.documentlists, self.pagesets) self.assertEqual(len(paths), 10) #print("=============%s====================" % paths[0]) #with open(paths[0]) as fp: # print(fp.read()) for path in paths: self.assertTrue(os.path.exists(path)) def test_generate_first_page(self): path = self.repo.toc_generate_first_page(self.documentlists, self.pagesets) self.assertEqual(path, self.p("base/toc/index.html")) self.assertTrue(os.path.exists(path)) tree = etree.parse(path) # check content of path, particularly that css/js refs # and pageset links are correct. Also, that the selected # indexpage is indeed the first (eg. title/a) # (NOTE: the first page in the first pageset (by title/a) isn't linked. The second one (by title/d) is). self.assertEqual( "http://*****:*****@id='toc']").findall(".//a")[0].get("href")) self.assertEqual( "https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css", tree.find(".//link").get("href")) self.assertEqual('Documents starting with "a"', tree.find(".//article/h1").text) def test_more(self): from ferenda import DocumentRepository d = DocumentRepository() rows = [{ 'uri': 'http://ex.org/1', 'dcterms_title': 'Abc', 'dcterms_issued': '2009-04-02' }, { 'uri': 'http://ex.org/2', 'dcterms_title': 'Abcd', 'dcterms_issued': '2010-06-30' }, { 'uri': 'http://ex.org/3', 'dcterms_title': 'Dfg', 'dcterms_issued': '2010-08-01' }] from rdflib.namespace import DCTERMS facets = [Facet(DCTERMS.title), Facet(DCTERMS.issued)] pagesets = d.toc_pagesets(rows, facets) expected = { ('dcterms_title', 'a'): [[Link('Abc', uri='http://ex.org/1')], [Link('Abcd', uri='http://ex.org/2')]], ('dcterms_title', 'd'): [[Link('Dfg', uri='http://ex.org/3')]], ('dcterms_issued', '2009'): [[Link('Abc', uri='http://ex.org/1')]], ('dcterms_issued', '2010'): [[Link('Abcd', uri='http://ex.org/2')], [Link('Dfg', uri='http://ex.org/3')]] } got = d.toc_select_for_pages(rows, pagesets, facets) self.assertEqual(expected, got)
def test_link(self): x = Link("Link text", uri="http://example.org/") self.assertEqual(str(x), "Link text") self.assertEqual(repr(x), "Link('Link text', uri=http://example.org/)")
# -*- coding: utf-8 -*- from __future__ import unicode_literals # begin from ferenda import CitationParser, URIFormatter, citationpatterns, uriformats from ferenda.elements import Link citparser = CitationParser() citparser.add_grammar(citationpatterns.url) formatter = URIFormatter(("url", uriformats.url)) res = [] text = "An example: http://example.org/. That is all." for node in citparser.parse_string(text): if isinstance(node, str): # non-linked text, add and continue res.append(node) if isinstance(node, tuple): (text, match) = node uri = formatter.format(match) if uri: res.append(Link(uri, text, rel="dcterms:references")) # end return_value = True
def makeitem(self, text): return [ Link(text, uri="https://lagen.nu/begrepp/" + text.replace("»", "//").replace(" ", "_")) ]
def toc_item(self, binding, row): return [ row['dcterms_identifier'] + ": ", Link(row['dcterms_title'], uri=row['uri']) ]
def toc_item(self, binding, row): from ferenda.elements import Link return [ row['dcterms_identifier'] + ": ", Link(row['dcterms_title'], uri=row['uri']) ]