def test_more(self): from ferenda import DocumentRepository d = DocumentRepository() rows = [{ 'uri': 'http://ex.org/1', 'dcterms_title': 'Abc', 'dcterms_issued': '2009-04-02' }, { 'uri': 'http://ex.org/2', 'dcterms_title': 'Abcd', 'dcterms_issued': '2010-06-30' }, { 'uri': 'http://ex.org/3', 'dcterms_title': 'Dfg', 'dcterms_issued': '2010-08-01' }] from rdflib.namespace import DCTERMS facets = [Facet(DCTERMS.title), Facet(DCTERMS.issued)] pagesets = d.toc_pagesets(rows, facets) expected = { ('dcterms_title', 'a'): [[Link('Abc', uri='http://ex.org/1')], [Link('Abcd', uri='http://ex.org/2')]], ('dcterms_title', 'd'): [[Link('Dfg', uri='http://ex.org/3')]], ('dcterms_issued', '2009'): [[Link('Abc', uri='http://ex.org/1')]], ('dcterms_issued', '2010'): [[Link('Abcd', uri='http://ex.org/2')], [Link('Dfg', uri='http://ex.org/3')]] } got = d.toc_select_for_pages(rows, pagesets, facets) self.assertEqual(expected, got)
def facets(self): return [Facet(RDF.type), Facet(DCTERMS.title), # Facet(DCTERMS.publisher), -- is always w3c Facet(DCTERMS.identifier), Facet(DCTERMS.issued) ]
def facets(self): from ferenda import Facet return [ Facet(self.ns['dcterms'].title), Facet(self.ns['dcterms'].issued), Facet(self.ns['dcterms'].subject), Facet(self.ns['dcterms'].identifier) ]
def facets(self): labels = { 'dir': 'Komittédirektiv', 'sou': 'SOU', 'ds': 'Ds', 'prop': 'Propositioner' } # rdf:type rpubl:Kommittedirektiv => "Kommittédirektiv" # rdf:type rpubl:Utredningsbetankande, rpubl:utrSerie .*sou => "SOU" # rdf:type rpubl:Utredningsbetankande, rpubl:utrSerie .*ds => "Ds" # rdf:type rpubl:Proposition => "Propositioner" def select(row, binding, extra): return labels[ident(row, binding, extra)] # This is a selector that can CLEARLY not run on arbitrary rows def ident(row, binding, extra): rdftype = row[binding] if rdftype == str(self.ns['rpubl'].Utredningsbetankande): if row['rpubl_utrSerie']: leaf = util.uri_leaf(row['rpubl_utrSerie']) if leaf.startswith("ds"): return "ds" elif leaf.startswith("sou"): return "sou" else: assert leaf in ( "sou", "ds" ), "leaf was %s, unsure whether this is a SOU or a Ds." % leaf else: self.log.error( "Row for %s is rpubl:Utredning but lacks rpubl:utrSerie" % row['uri']) elif rdftype == str(self.ns['rpubl'].Kommittedirektiv): return "dir" elif rdftype == str(self.ns['rpubl'].Proposition): return "prop" else: pass # self.log.error("Row for %s has unrecognized type %s" % (row['uri'], row['rdf_type'])) return [ Facet(RDF.type, selector=select, pagetitle="Alla %(selected)s", identificator=ident), Facet(RPUBL.arsutgava, indexingtype=fulltextindex.Label(), selector_descending=True) ]
def news_feedsets(self, data, facets): # Start by mangling the keyfunc in the existing facets so that # feeds will be sorted by 'rpubl_avgorandedatum' (aliased by # feed_item to 'published') -- this'll probably not end # well... for facet in facets: facet.key = lambda row, binding, resource_graph: row[self.news_sortkey] facet.key_descending = True # works pretty much the same as toc_pagesets, but returns ONE # feedset (not several) that has one feed per publisher feeds = {} facet = facets[0] # should be the RPUBL.rattsfallspublikation one for row in data: feedid = row['rpubl_rattsfallspublikation'] if feedid not in feeds: slug = Facet.term(row, 'rpubl_rattsfallspublikation') title = "Rättsfall från %s" % self.slug_to_title[slug] feeds[feedid] = Feed(slug=slug, title=title, binding='rpubl_rattsfallspublikation', value=feedid) feeds = sorted(feeds.values(), key=attrgetter('value')) return [Feedset(label="Rättsfallspublikation", predicate=facet.rdftype, feeds=feeds), Feedset(label="All", feeds=[Feed(slug="main", title="Samtliga rättsfall", binding=None, value=None)])]
def toc_pagesets(self, data, facets): # our primary facet is RPUBL.rattsfallspublikation, but we # need to create one pageset for each value thereof. pagesetdict = {} selector_values = {} facet = facets[0] # should be the RPUBL.rattsfallspublikation one for row in data: pagesetid = row['rpubl_rattsfallspublikation'] if pagesetid not in pagesetdict: # Get the preferred court label from our own mapping, # fall back to the skos:prefLabel of the publikation label = self._rattsfallspublikation_label.get( row['rpubl_rattsfallspublikation'], Facet.resourcelabel(row, 'rpubl_rattsfallspublikation', self.commondata)) pagesetdict[pagesetid] = TocPageset(label=label, predicate=pagesetid, pages=[]) selected = row['rpubl_arsutgava'] selector_values[(pagesetid, selected)] = True for (pagesetid, value) in sorted(list(selector_values.keys()), reverse=True): pageset = pagesetdict[pagesetid] pageset.pages.append(TocPage(linktext=value, title="Rättsfall från %s under %s" % (pageset.label, value), binding=util.uri_leaf(pagesetid), value=value)) # make sure pagesets are returned in the preferred, arbitrary order specified by _rattsfallspublikation_order for x in pagesetdict.values(): assert x.label in self._rattsfallspublikation_order, "%s not in _rattsfallspublikation_order" % x.label return sorted(list(pagesetdict.values()), key=lambda x: self._rattsfallspublikation_order.index(x.label))
def test_pageset_resourcelabel(self): facets = [Facet(DCTERMS.publisher, pagetitle="Documents published in %(selected)s")] self.repo.commondata = self.results2data got = self.repo.toc_pagesets(self.results2, facets) want = self.pagesets2 self.assertEqual(want, got)
def facets(self): resourcecache = {} def resourcename(row, binding, resource_graph): k = (row[binding], resource_graph.identifier) if k not in resourcecache: uri = URIRef(row[binding]) resourcecache[k] = str(resource_graph.value(uri, FOAF.name)) return resourcecache[k] return [ Facet(DCTERMS.publisher, selector=resourcename, identificator=Facet.resourcelabel), Facet(DCTERMS.issued) ]
def facets(self): def select_rfcnum(row, binding, resource_graph): # "RFC 6998" -> "6900" return row[binding][4:-2] + "00" from ferenda import Facet return [ Facet(self.ns['dcterms'].title), Facet(self.ns['dcterms'].issued), Facet(self.ns['dcterms'].subject, selector=Facet.defaultselector, identificator=Facet.defaultselector, key=Facet.defaultselector), Facet(self.ns['dcterms'].identifier, use_for_toc=True, selector=select_rfcnum, pagetitle="RFC %(selected)s00-%(selected)s99") ]
def facets(self): return [ Facet(RDF.type), Facet(DCTERMS.title), Facet(DCTERMS.publisher), Facet(DCTERMS.identifier), Facet(DCTERMS.issued), Facet(DCTERMS.publisher), Facet(DCTERMS.abstract), Facet(DC.subject) ]
def facets(self): # note that RDF.type is not one of the facets return [ Facet(DC.publisher), # Having a dcterms_issued of type string conflicts # with other repos havind dcterms_issued of type date # (range queries against the field does not work). Disable for now. # Facet(DCTERMS.issued, indexingtype=fulltextindex.Label()), Facet(DCTERMS.rightsHolder, indexingtype=fulltextindex.Resource(), multiple_values=True), Facet(DCTERMS.title, toplevel_only=True), Facet(DCTERMS.identifier, selector=self.my_id_selector, key=self.lexicalkey, label="IDs having %(selected) characters"), Facet(DC.creator, toplevel_only=False) ]
def facets(self): EX = self.ns['ex'] return [ Facet(RDF.type), Facet(DCTERMS.title), Facet(DCTERMS.publisher, multiple_values=True), Facet(DCTERMS.identifier), Facet(DCTERMS.issued), Facet(EX.secret, indexingtype=Boolean()), Facet(DCTERMS.references), Facet(DC.subject) ]
def test_year(self): self.assertEqual('2014', Facet.year({'dcterms_issued': '2014-06-05T12:00:00'})) self.assertEqual('2014', Facet.year({'dcterms_issued': '2014-06-05'})) self.assertEqual('2014', Facet.year({'dcterms_issued': '2014-06'})) with self.assertRaises(Exception): Facet.year({'dcterms_issued': 'This is clearly an invalid date'}) with self.assertRaises(Exception): Facet.year({'dcterms_issued': '2014-14-99'})
def facets(self): # maybe if each entry in the list could be a tuple or a single # element. If it's a tuple, then the first elements' selector # values (eg organizations) become top level facets, the # second elements' selector values become subsection # underneath, and possibly more levels. def altlabel(row, binding, resource_graph): uri = URIRef(row[binding]) if resource_graph.value(uri, SKOS.altLabel): return str(resource_graph.value(uri, SKOS.altLabel)) else: return row[binding] def mainfs(row, binding, resource_graph): uri = URIRef(row[binding]) mainuri = resource_graph.value(uri, DCTERMS.isReplacedBy) if mainuri: uri = mainuri return util.uri_leaf(uri) return [ Facet( RPUBL.forfattningssamling, # selector=altlabel, selector=mainfs, identificator=mainfs, use_for_toc=True, label="Ordnade efter författningssamling", pagetitle="Föreskrifter i %(selected)s"), Facet(RPUBL.arsutgava, indexingtype=fulltextindex.Label(), selector_descending=True, use_for_toc=False), Facet(RPUBL.konsolideringsunderlag, indexingtype=fulltextindex.Identifier(), use_for_toc=False, use_for_feed=False, multiple_values=True), Facet(RPUBL.andrar, indexingtype=fulltextindex.Identifier(), use_for_toc=False, use_for_feed=False, multiple_values=True), Facet(RDF.type, use_for_toc=False, use_for_feed=False), Facet(DCTERMS.title, use_for_toc=False), Facet(DCTERMS.publisher, use_for_toc=False, pagetitle="Författningar utgivna av %(selected)s"), Facet(DCTERMS.identifier) ] + self.standardfacets
def facets(self): return [ Facet(RDF.type), # fulltextindex.URI Facet(DCTERMS.title), # fulltextindex.Text(boost=4) Facet(DCTERMS.identifier), # fulltextindex.Label(boost=16) Facet(DCTERMS.issued), # fulltextindex.Datetime() Facet(DCTERMS.issued, indexingtype=fulltextindex.Boolean(), selector=self.is_april_fools, dimension_type="value", dimension_label="aprilfools"), Facet(DCTERMS.publisher), # fulltextindex.Resource() Facet(DC.subject), # fulltextindex.Keywords() Facet(SCHEMA.free) # fulltextindex.Boolean() ]
def facets(self): def kwselector(row, binding, resource_graph): bucket = row[binding][0] if bucket.isalpha(): return bucket.upper() else: return "#" return [ Facet(DCTERMS.title, label="Ordnade efter titel", pagetitle='Begrepp som b\xf6rjar p\xe5 "%(selected)s"', selector=kwselector) ]
def facets(self): # The facets of a repo control indexing, particularly the # synthesized 'label', 'creator' and 'issued'. By only # defining a facet for 'label' we avoid having to define # issued and creator for static pages. Or maybe we should try # to do that? return [Facet(RDFS.label, use_for_toc=False, use_for_feed=False, toplevel_only=False, dimension_label="label", dimension_type="value", multiple_values=False, indexingtype=fulltextindex.Label(boost=16))]
def facets(self): def select_rfcnum(row, binding, resource_graph): # "RFC 6998" -> "6900" return row[binding][4:-2] + "00" return [ Facet(self.ns['rdf'].type), Facet(self.ns['dcterms'].identifier, label="Sorted by RFC #", pagetitle="RFC %(selected)s00-%(selected)s99", selector=select_rfcnum, use_for_toc=True), Facet(self.ns['dcterms'].title), Facet(self.ns['dcterms'].publisher, label="Sorted by stream", pagetitle="The %(selected)s stream"), Facet(self.ns['dcterms'].issued), Facet( self.ns['dcterms']. subject, # should be rfc:category not dcterms:status? label="Sorted by status", pagetitle="Status: %(selected)s") ]
def facets(self): return [Facet(DCTERMS.issued)]
def facets(self): return super(Direktiv, self).facets() + [Facet(DCTERMS.title, toplevel_only=False)]
class Feedsets(RepoTester): results2 = json.load( open("test/files/datasets/results2-plus-entries.json"), object_hook=util.make_json_date_object_hook('published', 'updated')) results2data = rdflib.Graph().parse( open("test/files/datasets/results2data.ttl"), format="turtle") facets = [ Facet(rdftype=RDF.type), Facet(rdftype=DCTERMS.publisher), Facet(rdftype=DCTERMS.issued) ] feedsets = [ Feedset(label="Sorted by type", predicate=RDF.type, feeds=[ Feed(title="All Book documents", slug="type/book", binding="rdf_type", value="Book") ]), Feedset( label="Sorted by publisher", predicate=DCTERMS.publisher, feeds=[ Feed(title="Documents published by Analytical Biochemistry", slug="publisher/analytical", binding="dcterms_publisher", value="analytical"), Feed(title= "Documents published by Journal of Biological Chemistry", slug="publisher/biochem", binding="dcterms_publisher", value="biochem"), Feed(title="Documents published by Nature", slug="publisher/nature", binding="dcterms_publisher", value="nature") ]), Feedset( label="All", predicate=None, feeds=[ Feed( title="All documents", # "... in base" ? slug="main", binding=None, value=None) ]) ] def setUp(self): super(Feedsets, self).setUp() self.repo.news_facet_entries = Mock(return_value=self.results2) self.repo.commondata = self.results2data def test_feedsets(self): got = self.repo.news_feedsets(self.results2, self.facets) want = self.feedsets # make sure 3 feedsets were created and their labels self.assertEqual(3, len(got)) self.assertEqual("Sorted by type", got[0].label) self.assertEqual("Sorted by publisher", got[1].label) self.assertEqual("All", got[2].label) # make sure the title of the only feed in the first feedset # turned out OK self.assertEqual("All Book documents", got[0].feeds[0].title) # make sure the publisher feedset has the correct things self.assertEqual(3, len(got[1].feeds)) # 3 different journals self.assertEqual("publisher/analytical", got[1].feeds[0].slug) self.assertEqual("Documents published by Analytical Biochemistry", got[1].feeds[0].title) # this test incorporates all of the above self.assertEqual(want, got) def test_select_for_feeds(self): got = self.repo.news_select_for_feeds(self.results2, self.feedsets, self.facets) # last feedset (main) should have one single feed and it # should contain all entries. self.assertEqual(len(got[-1].feeds), 1) self.assertEqual(len(got[-1].feeds[0].entries), 4) self.assertEqual("http://example.org/articles/pm14907713", got[-1].feeds[0].entries[0]['uri']) self.assertEqual("http://example.org/articles/pm942051", got[-1].feeds[0].entries[3]['uri'])
def stats_slice(self, data, facet, resource_graph): binding = resource_graph.qname(facet.rdftype).replace(":", "_") if facet.dimension_label: dimension_label = facet.dimension_label elif self.config.legacyapi: dimension_label = util.uri_leaf(str(facet.rdftype)) else: dimension_label = binding dimension_type = facet.dimension_type if (self.config.legacyapi and dimension_type == "value"): # legacyapi doesn't support the value type, we must # convert it into ref, and convert all string values to # fake resource ref URIs dimension_type = "ref" transformer = lambda x: ("http://example.org/fake-resource/%s" % x ).replace(" ", "_") elif self.config.legacyapi and dimension_type == "term": # legacyapi expects "Standard" over "bibo:Standard", which is what # Facet.qname returns transformer = lambda x: x.split(":")[1] else: transformer = lambda x: x observations = Counter() # one file per uri+observation seen -- avoid # double-counting observed = {} for row in data: observation = None try: # maybe if facet.dimension_type == "ref", selector # should always be Facet.defaultselector? NOTE: # we look at facet.dimension_type, not # dimension_type, as the latter may be altered if # legacyapi == True if facet.dimension_type == "ref": observation = transformer( Facet.defaultselector(row, binding)) else: observation = transformer( facet.selector(row, binding, resource_graph)) except Exception as e: # most of the time, we should swallow this # exception since it's a selector that relies on # information that is just not present in the rows # from some repos. I think. if hasattr(facet.selector, 'im_self'): # try to find the location of the selector # function for easier debugging fname = "%s.%s.%s" % (facet.selector.__module__, facet.selector.im_self.__name__, facet.selector.__name__) else: # probably a lambda function fname = facet.selector.__name__ # FIXME: do we need the repo name here to provide useful # messages? # self.log.warning("facet %s (%s) fails for row %s : %s %s" % (binding, fname, row['uri'], e.__class__.__name__, str(e))) pass if observation is not None: k = (dimension_type, observation) if (row['uri'], observation) not in observed: observed[(row['uri'], observation)] = True observations[k] += 1 return dimension_label, observations
class TOC(RepoTester): results1 = json.load(open("test/files/datasets/results1.json")) results2 = json.load(open("test/files/datasets/results2.json")) results2data = Graph().parse(open("test/files/datasets/results2data.ttl"), format="turtle") pagesets = [ TocPageset('Sorted by title', [ TocPage('a', 'Documents starting with "a"', 'dcterms_title', 'a'), TocPage('d', 'Documents starting with "d"', 'dcterms_title', 'd'), TocPage('h', 'Documents starting with "h"', 'dcterms_title', 'h'), TocPage('l', 'Documents starting with "l"', 'dcterms_title', 'l') ], DCTERMS.title), TocPageset('Sorted by publication year', [ TocPage('1791', 'Documents published in 1791', 'dcterms_issued', '1791'), TocPage('1859', 'Documents published in 1859', 'dcterms_issued', '1859'), TocPage('1937', 'Documents published in 1937', 'dcterms_issued', '1937'), TocPage('1939', 'Documents published in 1939', 'dcterms_issued', '1939'), TocPage('1943', 'Documents published in 1943', 'dcterms_issued', '1943'), TocPage('1954', 'Documents published in 1954', 'dcterms_issued', '1954') ], DCTERMS.issued) ] pagesets2 = [ TocPageset('Sorted by publisher', [ TocPage('Analytical Biochemistry', 'Documents published in Analytical Biochemistry', 'dcterms_publisher', 'analytical'), TocPage('Journal of Biological Chemistry', 'Documents published in Journal of Biological Chemistry', 'dcterms_publisher', 'biochem'), TocPage('Nature', 'Documents published in Nature', 'dcterms_publisher', 'nature'), ], DCTERMS.publisher) ] documentlists = { ('dcterms_issued', '1791'): [[ Link("Dream of the Red Chamber", uri='http://example.org/books/Dream_of_the_Red_Chamber') ]], ('dcterms_issued', '1859'): [[ Link("A Tale of Two Cities", uri='http://example.org/books/A_Tale_of_Two_Cities') ]], ('dcterms_issued', '1937'): [[Link("The Hobbit", uri='http://example.org/books/The_Hobbit')]], ('dcterms_issued', '1939'): [[ Link("And Then There Were None", uri='http://example.org/books/And_Then_There_Were_None') ]], ('dcterms_issued', '1943'): [[ Link("The Little Prince", uri='http://example.org/books/The_Little_Prince') ]], ('dcterms_issued', '1954'): [[ Link("The Lord of the Rings", uri='http://example.org/books/The_Lord_of_the_Rings') ]], ('dcterms_title', 'a'): [[ Link("And Then There Were None", uri='http://example.org/books/And_Then_There_Were_None') ], [ Link("A Tale of Two Cities", uri='http://example.org/books/A_Tale_of_Two_Cities') ]], ('dcterms_title', 'd'): [[ Link("Dream of the Red Chamber", uri='http://example.org/books/Dream_of_the_Red_Chamber') ]], ('dcterms_title', 'h'): [[Link("The Hobbit", uri='http://example.org/books/The_Hobbit')]], ('dcterms_title', 'l'): [[ Link("The Little Prince", uri='http://example.org/books/The_Little_Prince') ], [ Link("The Lord of the Rings", uri='http://example.org/books/The_Lord_of_the_Rings') ]] } facets = [ Facet(rdftype=RDF.type), Facet(rdftype=DCTERMS.title), Facet(rdftype=DCTERMS.issued) ] def setUp(self): super(TOC, self).setUp() resources = self.datadir + os.sep + "rsrc" + os.sep + "resources.xml" util.ensure_dir(resources) shutil.copy2( "%s/files/base/rsrc/resources.xml" % os.path.dirname(__file__), resources) def tearDown(self): # make sure self.repo is always newly initialized, not reused super(TOC, self).tearDown() if hasattr(TOC, 'repo'): delattr(TOC, 'repo') def test_toc(self): # tests the main TOC method, not the helper methods (they are # tested separately) self.repo.facets = MagicMock() self.repo.facet_select = MagicMock() self.repo.facet_query = MagicMock() self.repo.faceted_data = MagicMock() self.repo.log = Mock() self.repo.toc_pagesets = Mock() self.repo.toc_select_for_pages = Mock() self.repo.toc_generate_pages = Mock() self.repo.toc_generate_first_page = Mock() with patch('json.dump'): self.repo.toc() # assert facet_query was properly called, error and info msg # was printed self.assertEqual("http://*****:*****@rel='stylesheet']") self.assertEqual(len(css), 3) # bootstrap, ferenda and sfs (?!) self.assertRegex(css[2].get('href'), '^../../../rsrc/css') # 2.2 JS links, relativized correctly? js = t.findall("body/script") self.assertEqual(len(js), 5) # jquery, bootstrap, hammer, typeahead, ferenda self.assertRegex(js[4].get('src'), '^../../../rsrc/js') # 2.3 <nav id="toc"> correct (c.f 1.2) navlinks = t.findall(".//nav[@id='toc']//li/a") self.assertEqual(len(navlinks), 9) self.assertEqual(navlinks[0].get("href"), 'http://*****:*****@class='main-container']/article (c.f 1.3) docs = t.findall(".//ul[@role='main']/li/a") self.assertEqual(len(docs), 2) # "And..." should go before "A Tale..." self.assertEqual(docs[0].text, 'And Then There Were None') self.assertEqual(docs[0].attrib['href'], 'http://example.org/books/And_Then_There_Were_None') # 2.5 site name correct header = t.find(".//div[@class='navbar-header']/a") self.assertEqual(header.text, 'testsite') # 2.6 main article header correct? header = t.find(".//article/h1") self.assertEqual(header.text, 'Documents starting with "a"') def test_generate_page_staticsite(self): self.repo.config.staticsite = True self.repo.config.removeinvalidlinks = False path = self.repo.toc_generate_page( 'dcterms_title', 'a', self.documentlists[('dcterms_title', 'a')], self.pagesets) t = etree.parse(path) # TOC link should be relativized navlinks = t.findall(".//nav[@id='toc']//li/a") self.assertEqual('d.html', navlinks[0].get("href")) self.assertEqual('../dcterms_issued/1791.html', navlinks[3].get("href")) header = t.find(".//div[@class='navbar-header']/a") # from /base/toc/title/a.html -> /index.html = 3 levels up self.assertEqual('../../../index.html', header.get("href")) headernavlinks = t.findall(".//ul[@class='nav navbar-nav']/li/a") self.assertEqual('../index.html', headernavlinks[0].get("href")) # docs (which in this case use non-base-repo-contained URIs, should be unaffected docs = t.findall(".//ul[@role='main']/li/a") self.assertEqual('http://example.org/books/And_Then_There_Were_None', docs[0].get("href")) self.repo.config.removeinvalidlinks = True def test_generate_pages(self): paths = self.repo.toc_generate_pages(self.documentlists, self.pagesets) self.assertEqual(len(paths), 10) #print("=============%s====================" % paths[0]) #with open(paths[0]) as fp: # print(fp.read()) for path in paths: self.assertTrue(os.path.exists(path)) def test_generate_first_page(self): path = self.repo.toc_generate_first_page(self.documentlists, self.pagesets) self.assertEqual(path, self.p("base/toc/index.html")) self.assertTrue(os.path.exists(path)) tree = etree.parse(path) # check content of path, particularly that css/js refs # and pageset links are correct. Also, that the selected # indexpage is indeed the first (eg. title/a) # (NOTE: the first page in the first pageset (by title/a) isn't linked. The second one (by title/d) is). self.assertEqual( "http://*****:*****@id='toc']").findall(".//a")[0].get("href")) self.assertEqual( "https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css", tree.find(".//link").get("href")) self.assertEqual('Documents starting with "a"', tree.find(".//article/h1").text) def test_more(self): from ferenda import DocumentRepository d = DocumentRepository() rows = [{ 'uri': 'http://ex.org/1', 'dcterms_title': 'Abc', 'dcterms_issued': '2009-04-02' }, { 'uri': 'http://ex.org/2', 'dcterms_title': 'Abcd', 'dcterms_issued': '2010-06-30' }, { 'uri': 'http://ex.org/3', 'dcterms_title': 'Dfg', 'dcterms_issued': '2010-08-01' }] from rdflib.namespace import DCTERMS facets = [Facet(DCTERMS.title), Facet(DCTERMS.issued)] pagesets = d.toc_pagesets(rows, facets) expected = { ('dcterms_title', 'a'): [[Link('Abc', uri='http://ex.org/1')], [Link('Abcd', uri='http://ex.org/2')]], ('dcterms_title', 'd'): [[Link('Dfg', uri='http://ex.org/3')]], ('dcterms_issued', '2009'): [[Link('Abc', uri='http://ex.org/1')]], ('dcterms_issued', '2010'): [[Link('Abcd', uri='http://ex.org/2')], [Link('Dfg', uri='http://ex.org/3')]] } got = d.toc_select_for_pages(rows, pagesets, facets) self.assertEqual(expected, got)
def facets(self): return super(Propositioner, self).facets() + [ Facet(DCTERMS.title, toplevel_only=False) ]
def facets(self): return super(SOU, self).facets() + [Facet(DCTERMS.title)]
def stats_slice(self, data, facet, resource_graph): binding = resource_graph.qname(facet.rdftype).replace(":", "_") if facet.dimension_label: dimension_label = facet.dimension_label elif self.config.legacyapi: dimension_label = util.uri_leaf(str(facet.rdftype)) else: dimension_label = binding dimension_type = facet.dimension_type if (self.config.legacyapi and dimension_type == "value"): # legacyapi doesn't support the value type, we must # convert it into ref, and convert all string values to # fake resource ref URIs dimension_type = "ref" transformer = lambda x: ( "http://example.org/fake-resource/%s" % x).replace( " ", "_") elif self.config.legacyapi and dimension_type == "term": # legacyapi expects "Standard" over "bibo:Standard", which is what # Facet.qname returns transformer = lambda x: x.split(":")[1] else: transformer = lambda x: x observations = Counter() # one file per uri+observation seen -- avoid # double-counting observed = {} for row in data: observation = None try: # maybe if facet.dimension_type == "ref", selector # should always be Facet.defaultselector? NOTE: # we look at facet.dimension_type, not # dimension_type, as the latter may be altered if # legacyapi == True if facet.dimension_type == "ref": observation = transformer(Facet.defaultselector( row, binding)) else: observation = transformer( facet.selector( row, binding, resource_graph)) except Exception as e: # most of the time, we should swallow this # exception since it's a selector that relies on # information that is just not present in the rows # from some repos. I think. if hasattr(facet.selector, 'im_self'): # try to find the location of the selector # function for easier debugging fname = "%s.%s.%s" % (facet.selector.__module__, facet.selector.im_self.__name__, facet.selector.__name__) else: # probably a lambda function fname = facet.selector.__name__ # FIXME: do we need the repo name here to provide useful # messages? # self.log.warning("facet %s (%s) fails for row %s : %s %s" % (binding, fname, row['uri'], e.__class__.__name__, str(e))) pass if observation is not None: k = (dimension_type, observation) if (row['uri'], observation) not in observed: observed[(row['uri'], observation)] = True observations[k] += 1 return dimension_label, observations