Example #1
0
 def test_more(self):
     from ferenda import DocumentRepository
     d = DocumentRepository()
     rows = [{
         'uri': 'http://ex.org/1',
         'dcterms_title': 'Abc',
         'dcterms_issued': '2009-04-02'
     }, {
         'uri': 'http://ex.org/2',
         'dcterms_title': 'Abcd',
         'dcterms_issued': '2010-06-30'
     }, {
         'uri': 'http://ex.org/3',
         'dcterms_title': 'Dfg',
         'dcterms_issued': '2010-08-01'
     }]
     from rdflib.namespace import DCTERMS
     facets = [Facet(DCTERMS.title), Facet(DCTERMS.issued)]
     pagesets = d.toc_pagesets(rows, facets)
     expected = {
         ('dcterms_title', 'a'): [[Link('Abc', uri='http://ex.org/1')],
                                  [Link('Abcd', uri='http://ex.org/2')]],
         ('dcterms_title', 'd'): [[Link('Dfg', uri='http://ex.org/3')]],
         ('dcterms_issued', '2009'): [[Link('Abc', uri='http://ex.org/1')]],
         ('dcterms_issued', '2010'): [[Link('Abcd', uri='http://ex.org/2')],
                                      [Link('Dfg', uri='http://ex.org/3')]]
     }
     got = d.toc_select_for_pages(rows, pagesets, facets)
     self.assertEqual(expected, got)
Example #2
0
 def facets(self):
     return [Facet(RDF.type),
             Facet(DCTERMS.title),
             # Facet(DCTERMS.publisher), -- is always w3c
             Facet(DCTERMS.identifier),
             Facet(DCTERMS.issued)
             ]
Example #3
0
 def facets(self):
     from ferenda import Facet
     return [
         Facet(self.ns['dcterms'].title),
         Facet(self.ns['dcterms'].issued),
         Facet(self.ns['dcterms'].subject),
         Facet(self.ns['dcterms'].identifier)
     ]
Example #4
0
    def facets(self):
        labels = {
            'dir': 'Komittédirektiv',
            'sou': 'SOU',
            'ds': 'Ds',
            'prop': 'Propositioner'
        }

        # rdf:type rpubl:Kommittedirektiv => "Kommittédirektiv"
        # rdf:type rpubl:Utredningsbetankande, rpubl:utrSerie .*sou => "SOU"
        # rdf:type rpubl:Utredningsbetankande, rpubl:utrSerie .*ds => "Ds"
        # rdf:type rpubl:Proposition => "Propositioner"
        def select(row, binding, extra):
            return labels[ident(row, binding, extra)]

        # This is a selector that can CLEARLY not run on arbitrary rows
        def ident(row, binding, extra):
            rdftype = row[binding]
            if rdftype == str(self.ns['rpubl'].Utredningsbetankande):
                if row['rpubl_utrSerie']:
                    leaf = util.uri_leaf(row['rpubl_utrSerie'])
                    if leaf.startswith("ds"):
                        return "ds"
                    elif leaf.startswith("sou"):
                        return "sou"
                    else:
                        assert leaf in (
                            "sou", "ds"
                        ), "leaf was %s, unsure whether this is a SOU or a Ds." % leaf
                else:
                    self.log.error(
                        "Row for %s is rpubl:Utredning but lacks rpubl:utrSerie"
                        % row['uri'])
            elif rdftype == str(self.ns['rpubl'].Kommittedirektiv):
                return "dir"
            elif rdftype == str(self.ns['rpubl'].Proposition):
                return "prop"
            else:
                pass
                # self.log.error("Row for %s has unrecognized type %s" % (row['uri'], row['rdf_type']))

        return [
            Facet(RDF.type,
                  selector=select,
                  pagetitle="Alla %(selected)s",
                  identificator=ident),
            Facet(RPUBL.arsutgava,
                  indexingtype=fulltextindex.Label(),
                  selector_descending=True)
        ]
Example #5
0
    def news_feedsets(self, data, facets):
        # Start by mangling the keyfunc in the existing facets so that
        # feeds will be sorted by 'rpubl_avgorandedatum' (aliased by
        # feed_item to 'published') -- this'll probably not end
        # well...
        for facet in facets:
            facet.key = lambda row, binding, resource_graph: row[self.news_sortkey]
            facet.key_descending = True

        # works pretty much the same as toc_pagesets, but returns ONE
        # feedset (not several) that has one feed per publisher
        feeds = {}
        facet = facets[0]  # should be the RPUBL.rattsfallspublikation one


        for row in data:
            feedid = row['rpubl_rattsfallspublikation']
            if feedid not in feeds:
                slug = Facet.term(row, 'rpubl_rattsfallspublikation')
                title = "Rättsfall från %s" % self.slug_to_title[slug]
                feeds[feedid] = Feed(slug=slug,
                                     title=title,
                                     binding='rpubl_rattsfallspublikation',
                                     value=feedid)
        feeds = sorted(feeds.values(), key=attrgetter('value'))
        return [Feedset(label="Rättsfallspublikation",
                        predicate=facet.rdftype,
                        feeds=feeds),
                Feedset(label="All",
                        feeds=[Feed(slug="main",
                                    title="Samtliga rättsfall",
                                    binding=None,
                                    value=None)])]
Example #6
0
    def toc_pagesets(self, data, facets):
        # our primary facet is RPUBL.rattsfallspublikation, but we
        # need to create one pageset for each value thereof.
        pagesetdict = {}
        selector_values = {}
        facet = facets[0]  # should be the RPUBL.rattsfallspublikation one
        for row in data:
            pagesetid = row['rpubl_rattsfallspublikation']
            if pagesetid not in pagesetdict:
                # Get the preferred court label from our own mapping,
                # fall back to the skos:prefLabel of the publikation
                label = self._rattsfallspublikation_label.get(
                    row['rpubl_rattsfallspublikation'],
                    Facet.resourcelabel(row, 'rpubl_rattsfallspublikation',
                                        self.commondata))
                pagesetdict[pagesetid] = TocPageset(label=label,
                                                    predicate=pagesetid,
                                                    pages=[])
            selected = row['rpubl_arsutgava']
            selector_values[(pagesetid, selected)] = True

        for (pagesetid, value) in sorted(list(selector_values.keys()), reverse=True):
            pageset = pagesetdict[pagesetid]
            pageset.pages.append(TocPage(linktext=value,
                                         title="Rättsfall från %s under %s" % (pageset.label, value),
                                         binding=util.uri_leaf(pagesetid),
                                         value=value))

        # make sure pagesets are returned in the preferred, arbitrary order specified by _rattsfallspublikation_order
        for x in pagesetdict.values():
            assert x.label in self._rattsfallspublikation_order, "%s not in _rattsfallspublikation_order" % x.label
        return sorted(list(pagesetdict.values()), key=lambda x: self._rattsfallspublikation_order.index(x.label))
Example #7
0
 def test_pageset_resourcelabel(self):
     facets = [Facet(DCTERMS.publisher,
                     pagetitle="Documents published in %(selected)s")]
     self.repo.commondata = self.results2data
     got = self.repo.toc_pagesets(self.results2, facets)
     want = self.pagesets2
     self.assertEqual(want, got)
Example #8
0
    def facets(self):
        resourcecache = {}

        def resourcename(row, binding, resource_graph):
            k = (row[binding], resource_graph.identifier)
            if k not in resourcecache:
                uri = URIRef(row[binding])
                resourcecache[k] = str(resource_graph.value(uri, FOAF.name))
            return resourcecache[k]

        return [
            Facet(DCTERMS.publisher,
                  selector=resourcename,
                  identificator=Facet.resourcelabel),
            Facet(DCTERMS.issued)
        ]
Example #9
0
    def facets(self):
        def select_rfcnum(row, binding, resource_graph):
            # "RFC 6998" -> "6900"
            return row[binding][4:-2] + "00"

        from ferenda import Facet
        return [
            Facet(self.ns['dcterms'].title),
            Facet(self.ns['dcterms'].issued),
            Facet(self.ns['dcterms'].subject,
                  selector=Facet.defaultselector,
                  identificator=Facet.defaultselector,
                  key=Facet.defaultselector),
            Facet(self.ns['dcterms'].identifier,
                  use_for_toc=True,
                  selector=select_rfcnum,
                  pagetitle="RFC %(selected)s00-%(selected)s99")
        ]
Example #10
0
 def facets(self):
     return [
         Facet(RDF.type),
         Facet(DCTERMS.title),
         Facet(DCTERMS.publisher),
         Facet(DCTERMS.identifier),
         Facet(DCTERMS.issued),
         Facet(DCTERMS.publisher),
         Facet(DCTERMS.abstract),
         Facet(DC.subject)
     ]
Example #11
0
    def facets(self):

        # note that RDF.type is not one of the facets
        return [
            Facet(DC.publisher),
            # Having a dcterms_issued of type string conflicts
            # with other repos havind dcterms_issued of type date
            # (range queries against the field does not work). Disable for now.
            # Facet(DCTERMS.issued, indexingtype=fulltextindex.Label()),
            Facet(DCTERMS.rightsHolder,
                  indexingtype=fulltextindex.Resource(),
                  multiple_values=True),
            Facet(DCTERMS.title, toplevel_only=True),
            Facet(DCTERMS.identifier,
                  selector=self.my_id_selector,
                  key=self.lexicalkey,
                  label="IDs having %(selected) characters"),
            Facet(DC.creator, toplevel_only=False)
        ]
Example #12
0
 def facets(self):
     EX = self.ns['ex']
     return [
         Facet(RDF.type),
         Facet(DCTERMS.title),
         Facet(DCTERMS.publisher, multiple_values=True),
         Facet(DCTERMS.identifier),
         Facet(DCTERMS.issued),
         Facet(EX.secret, indexingtype=Boolean()),
         Facet(DCTERMS.references),
         Facet(DC.subject)
     ]
Example #13
0
 def test_year(self):
     self.assertEqual('2014',
                      Facet.year({'dcterms_issued': '2014-06-05T12:00:00'}))
     self.assertEqual('2014', Facet.year({'dcterms_issued': '2014-06-05'}))
     self.assertEqual('2014', Facet.year({'dcterms_issued': '2014-06'}))
     with self.assertRaises(Exception):
         Facet.year({'dcterms_issued': 'This is clearly an invalid date'})
     with self.assertRaises(Exception):
         Facet.year({'dcterms_issued': '2014-14-99'})
Example #14
0
    def facets(self):
        # maybe if each entry in the list could be a tuple or a single
        # element. If it's a tuple, then the first elements' selector
        # values (eg organizations) become top level facets, the
        # second elements' selector values become subsection
        # underneath, and possibly more levels.
        def altlabel(row, binding, resource_graph):
            uri = URIRef(row[binding])
            if resource_graph.value(uri, SKOS.altLabel):
                return str(resource_graph.value(uri, SKOS.altLabel))
            else:
                return row[binding]

        def mainfs(row, binding, resource_graph):
            uri = URIRef(row[binding])
            mainuri = resource_graph.value(uri, DCTERMS.isReplacedBy)
            if mainuri:
                uri = mainuri
            return util.uri_leaf(uri)

        return [
            Facet(
                RPUBL.forfattningssamling,
                # selector=altlabel,
                selector=mainfs,
                identificator=mainfs,
                use_for_toc=True,
                label="Ordnade efter författningssamling",
                pagetitle="Föreskrifter i %(selected)s"),
            Facet(RPUBL.arsutgava,
                  indexingtype=fulltextindex.Label(),
                  selector_descending=True,
                  use_for_toc=False),
            Facet(RPUBL.konsolideringsunderlag,
                  indexingtype=fulltextindex.Identifier(),
                  use_for_toc=False,
                  use_for_feed=False,
                  multiple_values=True),
            Facet(RPUBL.andrar,
                  indexingtype=fulltextindex.Identifier(),
                  use_for_toc=False,
                  use_for_feed=False,
                  multiple_values=True),
            Facet(RDF.type, use_for_toc=False, use_for_feed=False),
            Facet(DCTERMS.title, use_for_toc=False),
            Facet(DCTERMS.publisher,
                  use_for_toc=False,
                  pagetitle="Författningar utgivna av %(selected)s"),
            Facet(DCTERMS.identifier)
        ] + self.standardfacets
Example #15
0
 def facets(self):
     return [
         Facet(RDF.type),  # fulltextindex.URI
         Facet(DCTERMS.title),  # fulltextindex.Text(boost=4)
         Facet(DCTERMS.identifier),  # fulltextindex.Label(boost=16)
         Facet(DCTERMS.issued),  # fulltextindex.Datetime()
         Facet(DCTERMS.issued,
               indexingtype=fulltextindex.Boolean(),
               selector=self.is_april_fools,
               dimension_type="value",
               dimension_label="aprilfools"),
         Facet(DCTERMS.publisher),  # fulltextindex.Resource()
         Facet(DC.subject),  # fulltextindex.Keywords()
         Facet(SCHEMA.free)  # fulltextindex.Boolean()
     ]
Example #16
0
    def facets(self):
        def kwselector(row, binding, resource_graph):
            bucket = row[binding][0]
            if bucket.isalpha():
                return bucket.upper()
            else:
                return "#"

        return [
            Facet(DCTERMS.title,
                  label="Ordnade efter titel",
                  pagetitle='Begrepp som b\xf6rjar p\xe5 "%(selected)s"',
                  selector=kwselector)
        ]
Example #17
0
 def facets(self):
     # The facets of a repo control indexing, particularly the
     # synthesized 'label', 'creator' and 'issued'. By only
     # defining a facet for 'label' we avoid having to define
     # issued and creator for static pages. Or maybe we should try
     # to do that?
     return [Facet(RDFS.label,
                   use_for_toc=False,
                   use_for_feed=False,
                   toplevel_only=False,
                   dimension_label="label",
                   dimension_type="value",
                   multiple_values=False,
                   indexingtype=fulltextindex.Label(boost=16))]
Example #18
0
File: rfc.py Project: zigit/ferenda
    def facets(self):
        def select_rfcnum(row, binding, resource_graph):
            # "RFC 6998" -> "6900"
            return row[binding][4:-2] + "00"

        return [
            Facet(self.ns['rdf'].type),
            Facet(self.ns['dcterms'].identifier,
                  label="Sorted by RFC #",
                  pagetitle="RFC %(selected)s00-%(selected)s99",
                  selector=select_rfcnum,
                  use_for_toc=True),
            Facet(self.ns['dcterms'].title),
            Facet(self.ns['dcterms'].publisher,
                  label="Sorted by stream",
                  pagetitle="The %(selected)s stream"),
            Facet(self.ns['dcterms'].issued),
            Facet(
                self.ns['dcterms'].
                subject,  # should be rfc:category not dcterms:status?
                label="Sorted by status",
                pagetitle="Status: %(selected)s")
        ]
Example #19
0
 def facets(self):
     return [Facet(DCTERMS.issued)]
Example #20
0
 def facets(self):
     return super(Direktiv, self).facets() + [Facet(DCTERMS.title,
                                                    toplevel_only=False)]
Example #21
0
class Feedsets(RepoTester):
    results2 = json.load(
        open("test/files/datasets/results2-plus-entries.json"),
        object_hook=util.make_json_date_object_hook('published', 'updated'))
    results2data = rdflib.Graph().parse(
        open("test/files/datasets/results2data.ttl"), format="turtle")

    facets = [
        Facet(rdftype=RDF.type),
        Facet(rdftype=DCTERMS.publisher),
        Facet(rdftype=DCTERMS.issued)
    ]

    feedsets = [
        Feedset(label="Sorted by type",
                predicate=RDF.type,
                feeds=[
                    Feed(title="All Book documents",
                         slug="type/book",
                         binding="rdf_type",
                         value="Book")
                ]),
        Feedset(
            label="Sorted by publisher",
            predicate=DCTERMS.publisher,
            feeds=[
                Feed(title="Documents published by Analytical Biochemistry",
                     slug="publisher/analytical",
                     binding="dcterms_publisher",
                     value="analytical"),
                Feed(title=
                     "Documents published by Journal of Biological Chemistry",
                     slug="publisher/biochem",
                     binding="dcterms_publisher",
                     value="biochem"),
                Feed(title="Documents published by Nature",
                     slug="publisher/nature",
                     binding="dcterms_publisher",
                     value="nature")
            ]),
        Feedset(
            label="All",
            predicate=None,
            feeds=[
                Feed(
                    title="All documents",  # "... in base" ? 
                    slug="main",
                    binding=None,
                    value=None)
            ])
    ]

    def setUp(self):
        super(Feedsets, self).setUp()
        self.repo.news_facet_entries = Mock(return_value=self.results2)
        self.repo.commondata = self.results2data

    def test_feedsets(self):
        got = self.repo.news_feedsets(self.results2, self.facets)
        want = self.feedsets

        # make sure 3 feedsets were created and their labels
        self.assertEqual(3, len(got))
        self.assertEqual("Sorted by type", got[0].label)
        self.assertEqual("Sorted by publisher", got[1].label)
        self.assertEqual("All", got[2].label)

        # make sure the title of the only feed in the first feedset
        # turned out OK
        self.assertEqual("All Book documents", got[0].feeds[0].title)

        # make sure the publisher feedset has the correct things
        self.assertEqual(3, len(got[1].feeds))  # 3 different journals
        self.assertEqual("publisher/analytical", got[1].feeds[0].slug)
        self.assertEqual("Documents published by Analytical Biochemistry",
                         got[1].feeds[0].title)

        # this test incorporates all of the above
        self.assertEqual(want, got)

    def test_select_for_feeds(self):
        got = self.repo.news_select_for_feeds(self.results2, self.feedsets,
                                              self.facets)
        # last feedset (main) should have one single feed and it
        # should contain all entries.
        self.assertEqual(len(got[-1].feeds), 1)
        self.assertEqual(len(got[-1].feeds[0].entries), 4)
        self.assertEqual("http://example.org/articles/pm14907713",
                         got[-1].feeds[0].entries[0]['uri'])
        self.assertEqual("http://example.org/articles/pm942051",
                         got[-1].feeds[0].entries[3]['uri'])
Example #22
0
    def stats_slice(self, data, facet, resource_graph):
        binding = resource_graph.qname(facet.rdftype).replace(":", "_")
        if facet.dimension_label:
            dimension_label = facet.dimension_label
        elif self.config.legacyapi:
            dimension_label = util.uri_leaf(str(facet.rdftype))
        else:
            dimension_label = binding

        dimension_type = facet.dimension_type
        if (self.config.legacyapi and dimension_type == "value"):
            # legacyapi doesn't support the value type, we must
            # convert it into ref, and convert all string values to
            # fake resource ref URIs
            dimension_type = "ref"
            transformer = lambda x: ("http://example.org/fake-resource/%s" % x
                                     ).replace(" ", "_")
        elif self.config.legacyapi and dimension_type == "term":
            # legacyapi expects "Standard" over "bibo:Standard", which is what
            # Facet.qname returns
            transformer = lambda x: x.split(":")[1]
        else:
            transformer = lambda x: x

        observations = Counter()
        # one file per uri+observation seen -- avoid
        # double-counting
        observed = {}
        for row in data:
            observation = None
            try:
                # maybe if facet.dimension_type == "ref", selector
                # should always be Facet.defaultselector?  NOTE:
                # we look at facet.dimension_type, not
                # dimension_type, as the latter may be altered if
                # legacyapi == True
                if facet.dimension_type == "ref":
                    observation = transformer(
                        Facet.defaultselector(row, binding))
                else:
                    observation = transformer(
                        facet.selector(row, binding, resource_graph))

            except Exception as e:
                # most of the time, we should swallow this
                # exception since it's a selector that relies on
                # information that is just not present in the rows
                # from some repos. I think.
                if hasattr(facet.selector, 'im_self'):
                    # try to find the location of the selector
                    # function for easier debugging
                    fname = "%s.%s.%s" % (facet.selector.__module__,
                                          facet.selector.im_self.__name__,
                                          facet.selector.__name__)
                else:
                    # probably a lambda function
                    fname = facet.selector.__name__
                # FIXME: do we need the repo name here to provide useful
                # messages?
                # self.log.warning("facet %s (%s) fails for row %s : %s %s" % (binding, fname, row['uri'], e.__class__.__name__, str(e)))

                pass
            if observation is not None:
                k = (dimension_type, observation)
                if (row['uri'], observation) not in observed:
                    observed[(row['uri'], observation)] = True
                    observations[k] += 1
        return dimension_label, observations
Example #23
0
class TOC(RepoTester):
    results1 = json.load(open("test/files/datasets/results1.json"))
    results2 = json.load(open("test/files/datasets/results2.json"))
    results2data = Graph().parse(open("test/files/datasets/results2data.ttl"),
                                 format="turtle")
    pagesets = [
        TocPageset('Sorted by title', [
            TocPage('a', 'Documents starting with "a"', 'dcterms_title', 'a'),
            TocPage('d', 'Documents starting with "d"', 'dcterms_title', 'd'),
            TocPage('h', 'Documents starting with "h"', 'dcterms_title', 'h'),
            TocPage('l', 'Documents starting with "l"', 'dcterms_title', 'l')
        ], DCTERMS.title),
        TocPageset('Sorted by publication year', [
            TocPage('1791', 'Documents published in 1791', 'dcterms_issued',
                    '1791'),
            TocPage('1859', 'Documents published in 1859', 'dcterms_issued',
                    '1859'),
            TocPage('1937', 'Documents published in 1937', 'dcterms_issued',
                    '1937'),
            TocPage('1939', 'Documents published in 1939', 'dcterms_issued',
                    '1939'),
            TocPage('1943', 'Documents published in 1943', 'dcterms_issued',
                    '1943'),
            TocPage('1954', 'Documents published in 1954', 'dcterms_issued',
                    '1954')
        ], DCTERMS.issued)
    ]

    pagesets2 = [
        TocPageset('Sorted by publisher', [
            TocPage('Analytical Biochemistry',
                    'Documents published in Analytical Biochemistry',
                    'dcterms_publisher', 'analytical'),
            TocPage('Journal of Biological Chemistry',
                    'Documents published in Journal of Biological Chemistry',
                    'dcterms_publisher', 'biochem'),
            TocPage('Nature', 'Documents published in Nature',
                    'dcterms_publisher', 'nature'),
        ], DCTERMS.publisher)
    ]

    documentlists = {
        ('dcterms_issued', '1791'): [[
            Link("Dream of the Red Chamber",
                 uri='http://example.org/books/Dream_of_the_Red_Chamber')
        ]],
        ('dcterms_issued', '1859'): [[
            Link("A Tale of Two Cities",
                 uri='http://example.org/books/A_Tale_of_Two_Cities')
        ]],
        ('dcterms_issued', '1937'):
        [[Link("The Hobbit", uri='http://example.org/books/The_Hobbit')]],
        ('dcterms_issued', '1939'): [[
            Link("And Then There Were None",
                 uri='http://example.org/books/And_Then_There_Were_None')
        ]],
        ('dcterms_issued', '1943'): [[
            Link("The Little Prince",
                 uri='http://example.org/books/The_Little_Prince')
        ]],
        ('dcterms_issued', '1954'): [[
            Link("The Lord of the Rings",
                 uri='http://example.org/books/The_Lord_of_the_Rings')
        ]],
        ('dcterms_title', 'a'):
        [[
            Link("And Then There Were None",
                 uri='http://example.org/books/And_Then_There_Were_None')
        ],
         [
             Link("A Tale of Two Cities",
                  uri='http://example.org/books/A_Tale_of_Two_Cities')
         ]],
        ('dcterms_title', 'd'): [[
            Link("Dream of the Red Chamber",
                 uri='http://example.org/books/Dream_of_the_Red_Chamber')
        ]],
        ('dcterms_title', 'h'):
        [[Link("The Hobbit", uri='http://example.org/books/The_Hobbit')]],
        ('dcterms_title', 'l'):
        [[
            Link("The Little Prince",
                 uri='http://example.org/books/The_Little_Prince')
        ],
         [
             Link("The Lord of the Rings",
                  uri='http://example.org/books/The_Lord_of_the_Rings')
         ]]
    }

    facets = [
        Facet(rdftype=RDF.type),
        Facet(rdftype=DCTERMS.title),
        Facet(rdftype=DCTERMS.issued)
    ]

    def setUp(self):
        super(TOC, self).setUp()
        resources = self.datadir + os.sep + "rsrc" + os.sep + "resources.xml"
        util.ensure_dir(resources)
        shutil.copy2(
            "%s/files/base/rsrc/resources.xml" % os.path.dirname(__file__),
            resources)

    def tearDown(self):
        # make sure self.repo is always newly initialized, not reused
        super(TOC, self).tearDown()
        if hasattr(TOC, 'repo'):
            delattr(TOC, 'repo')

    def test_toc(self):
        # tests the main TOC method, not the helper methods (they are
        # tested separately)
        self.repo.facets = MagicMock()
        self.repo.facet_select = MagicMock()
        self.repo.facet_query = MagicMock()
        self.repo.faceted_data = MagicMock()
        self.repo.log = Mock()
        self.repo.toc_pagesets = Mock()
        self.repo.toc_select_for_pages = Mock()
        self.repo.toc_generate_pages = Mock()
        self.repo.toc_generate_first_page = Mock()
        with patch('json.dump'):
            self.repo.toc()

        # assert facet_query was properly called, error and info msg
        # was printed
        self.assertEqual("http://*****:*****@rel='stylesheet']")
        self.assertEqual(len(css), 3)  # bootstrap, ferenda and sfs (?!)

        self.assertRegex(css[2].get('href'), '^../../../rsrc/css')

        # 2.2 JS links, relativized correctly?
        js = t.findall("body/script")
        self.assertEqual(len(js),
                         5)  # jquery, bootstrap, hammer, typeahead, ferenda
        self.assertRegex(js[4].get('src'), '^../../../rsrc/js')
        # 2.3 <nav id="toc"> correct (c.f 1.2)
        navlinks = t.findall(".//nav[@id='toc']//li/a")
        self.assertEqual(len(navlinks), 9)

        self.assertEqual(navlinks[0].get("href"),
                         'http://*****:*****@class='main-container']/article (c.f 1.3)
        docs = t.findall(".//ul[@role='main']/li/a")
        self.assertEqual(len(docs), 2)
        # "And..." should go before "A Tale..."
        self.assertEqual(docs[0].text, 'And Then There Were None')
        self.assertEqual(docs[0].attrib['href'],
                         'http://example.org/books/And_Then_There_Were_None')

        # 2.5 site name correct
        header = t.find(".//div[@class='navbar-header']/a")
        self.assertEqual(header.text, 'testsite')

        # 2.6 main article header correct?
        header = t.find(".//article/h1")
        self.assertEqual(header.text, 'Documents starting with "a"')

    def test_generate_page_staticsite(self):
        self.repo.config.staticsite = True
        self.repo.config.removeinvalidlinks = False
        path = self.repo.toc_generate_page(
            'dcterms_title', 'a', self.documentlists[('dcterms_title', 'a')],
            self.pagesets)
        t = etree.parse(path)

        # TOC link should be relativized
        navlinks = t.findall(".//nav[@id='toc']//li/a")
        self.assertEqual('d.html', navlinks[0].get("href"))
        self.assertEqual('../dcterms_issued/1791.html',
                         navlinks[3].get("href"))

        header = t.find(".//div[@class='navbar-header']/a")
        # from /base/toc/title/a.html -> /index.html = 3 levels up
        self.assertEqual('../../../index.html', header.get("href"))

        headernavlinks = t.findall(".//ul[@class='nav navbar-nav']/li/a")
        self.assertEqual('../index.html', headernavlinks[0].get("href"))

        # docs (which in this case use non-base-repo-contained URIs, should be unaffected
        docs = t.findall(".//ul[@role='main']/li/a")
        self.assertEqual('http://example.org/books/And_Then_There_Were_None',
                         docs[0].get("href"))
        self.repo.config.removeinvalidlinks = True

    def test_generate_pages(self):
        paths = self.repo.toc_generate_pages(self.documentlists, self.pagesets)
        self.assertEqual(len(paths), 10)
        #print("=============%s====================" % paths[0])
        #with open(paths[0]) as fp:
        #    print(fp.read())
        for path in paths:
            self.assertTrue(os.path.exists(path))

    def test_generate_first_page(self):
        path = self.repo.toc_generate_first_page(self.documentlists,
                                                 self.pagesets)
        self.assertEqual(path, self.p("base/toc/index.html"))
        self.assertTrue(os.path.exists(path))
        tree = etree.parse(path)
        # check content of path, particularly that css/js refs
        # and pageset links are correct. Also, that the selected
        # indexpage is indeed the first (eg. title/a)
        # (NOTE: the first page in the first pageset (by title/a) isn't linked. The second one (by title/d) is).
        self.assertEqual(
            "http://*****:*****@id='toc']").findall(".//a")[0].get("href"))
        self.assertEqual(
            "https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css",
            tree.find(".//link").get("href"))

        self.assertEqual('Documents starting with "a"',
                         tree.find(".//article/h1").text)

    def test_more(self):
        from ferenda import DocumentRepository
        d = DocumentRepository()
        rows = [{
            'uri': 'http://ex.org/1',
            'dcterms_title': 'Abc',
            'dcterms_issued': '2009-04-02'
        }, {
            'uri': 'http://ex.org/2',
            'dcterms_title': 'Abcd',
            'dcterms_issued': '2010-06-30'
        }, {
            'uri': 'http://ex.org/3',
            'dcterms_title': 'Dfg',
            'dcterms_issued': '2010-08-01'
        }]
        from rdflib.namespace import DCTERMS
        facets = [Facet(DCTERMS.title), Facet(DCTERMS.issued)]
        pagesets = d.toc_pagesets(rows, facets)
        expected = {
            ('dcterms_title', 'a'): [[Link('Abc', uri='http://ex.org/1')],
                                     [Link('Abcd', uri='http://ex.org/2')]],
            ('dcterms_title', 'd'): [[Link('Dfg', uri='http://ex.org/3')]],
            ('dcterms_issued', '2009'): [[Link('Abc', uri='http://ex.org/1')]],
            ('dcterms_issued', '2010'): [[Link('Abcd', uri='http://ex.org/2')],
                                         [Link('Dfg', uri='http://ex.org/3')]]
        }
        got = d.toc_select_for_pages(rows, pagesets, facets)
        self.assertEqual(expected, got)
Example #24
0
 def facets(self):
     return super(Propositioner, self).facets() + [
         Facet(DCTERMS.title, toplevel_only=False)
     ]
Example #25
0
 def facets(self):
     return super(SOU, self).facets() + [Facet(DCTERMS.title)]
Example #26
0
    def stats_slice(self, data, facet, resource_graph):
        binding = resource_graph.qname(facet.rdftype).replace(":", "_")
        if facet.dimension_label:
            dimension_label = facet.dimension_label
        elif self.config.legacyapi:
            dimension_label = util.uri_leaf(str(facet.rdftype))
        else:
            dimension_label = binding

        dimension_type = facet.dimension_type
        if (self.config.legacyapi and
                dimension_type == "value"):
            # legacyapi doesn't support the value type, we must
            # convert it into ref, and convert all string values to
            # fake resource ref URIs
            dimension_type = "ref"
            transformer = lambda x: (
                "http://example.org/fake-resource/%s" %
                x).replace(
                " ",
                "_")
        elif self.config.legacyapi and dimension_type == "term":
            # legacyapi expects "Standard" over "bibo:Standard", which is what
            # Facet.qname returns
            transformer = lambda x: x.split(":")[1]
        else:
            transformer = lambda x: x

        observations = Counter()
        # one file per uri+observation seen -- avoid
        # double-counting
        observed = {}
        for row in data:
            observation = None
            try:
                # maybe if facet.dimension_type == "ref", selector
                # should always be Facet.defaultselector?  NOTE:
                # we look at facet.dimension_type, not
                # dimension_type, as the latter may be altered if
                # legacyapi == True
                if facet.dimension_type == "ref":
                    observation = transformer(Facet.defaultselector(
                        row, binding))
                else:
                    observation = transformer(
                        facet.selector(
                            row,
                            binding,
                            resource_graph))

            except Exception as e:
                # most of the time, we should swallow this
                # exception since it's a selector that relies on
                # information that is just not present in the rows
                # from some repos. I think.
                if hasattr(facet.selector, 'im_self'):
                    # try to find the location of the selector
                    # function for easier debugging
                    fname = "%s.%s.%s" % (facet.selector.__module__,
                                          facet.selector.im_self.__name__,
                                          facet.selector.__name__)
                else:
                    # probably a lambda function
                    fname = facet.selector.__name__
                # FIXME: do we need the repo name here to provide useful
                # messages?
                # self.log.warning("facet %s (%s) fails for row %s : %s %s" % (binding, fname, row['uri'], e.__class__.__name__, str(e)))

                pass
            if observation is not None:
                k = (dimension_type, observation)
                if (row['uri'], observation) not in observed:
                    observed[(row['uri'], observation)] = True
                    observations[k] += 1
        return dimension_label, observations