Ejemplo n.º 1
0
    def search(self, environ, start_response):
        """WSGI method, called by the wsgi app for requests that matches
           ``searchendpoint``."""
        queryparams = self._search_parse_query(environ['QUERY_STRING'])
        # massage queryparams['issued'] if present, then restore it
        y = None
        if 'issued' in queryparams:
            y = int(queryparams['issued'])
            queryparams['issued'] = Between(datetime(y, 1, 1),
                                            datetime(y, 12, 31, 23, 59, 59))
        boost_types = [("sfs", 10)]
        res, pager = self._search_run_query(queryparams,
                                            boost_types=boost_types)
        if y:
            queryparams['issued'] = str(y)

        if pager['totalresults'] == 1:
            title = "1 träff"
        else:
            title = "%s träffar" % pager['totalresults']
        title += " för '%s'" % queryparams.get("q")

        body = html.Body()
        if hasattr(res, 'aggregations'):
            body.append(
                self._search_render_facets(res.aggregations, queryparams,
                                           environ))
        for r in res:
            if 'label' not in r:
                label = r['uri']
            elif isinstance(r['label'], list):
                label = str(r['label'])  # flattens any nested element
                # structure, eg
                # <p><strong><em>foo</em></strong></p>
                # -> foo
            else:
                label = r['label']
            rendered_hit = html.Div([
                html.B([elements.Link(label, uri=r['uri'])], **
                       {'class': 'lead'})
            ], **{'class': 'hit'})
            if r.get('text'):
                rendered_hit.append(html.P([r.get('text', '')]))
            if 'innerhits' in r:
                for innerhit in r['innerhits']:
                    rendered_hit.append(self._search_render_innerhit(innerhit))
            body.append(rendered_hit)
        pagerelem = self._search_render_pager(pager, queryparams,
                                              environ['PATH_INFO'])
        body.append(
            html.Div([
                html.P([
                    "Träff %(firstresult)s-%(lastresult)s "
                    "av %(totalresults)s" % pager
                ]), pagerelem
            ], **{'class': 'pager'}))
        data = self._transform(title, body, environ, template="xsl/search.xsl")
        return self._return_response(data, start_response)
Ejemplo n.º 2
0
 def exception(self, environ, start_response):
     import traceback
     from pprint import pformat
     exc_type, exc_value, tb = sys.exc_info()
     tblines = traceback.format_exception(exc_type, exc_value, tb)
     tbstr = "\n".join(tblines)
     # render the error
     title = tblines[-1]
     body = html.Body([
         html.Div([
             html.H1(self.exception_heading),
             html.P([self.exception_description]),
             html.H2("Traceback"),
             html.Pre([tbstr]),
             html.H2("Variables"),
             html.Pre([
                 "request_uri: %s\nos.getcwd(): %s" %
                 (request_uri(environ), os.getcwd())
             ]),
             html.H2("environ"),
             html.Pre([pformat(environ)]),
             html.H2("sys.path"),
             html.Pre([pformat(sys.path)]),
             html.H2("os.environ"),
             html.Pre([pformat(dict(os.environ))])
         ])
     ])
     msg = self._transform(title, body, environ)
     return self._return_response(msg,
                                  start_response,
                                  status="500 Internal Server Error",
                                  contenttype="text/html")
Ejemplo n.º 3
0
    def search(self, environ, start_response):
        """WSGI method, called by the wsgi app for requests that matches
           ``searchendpoint``."""
        queryparams = self._search_parse_query(environ['QUERY_STRING'])
        res, pager = self._search_run_query(queryparams)

        if pager['totalresults'] == 1:
            title = "1 match"
        else:
            title = "%s matches" % pager['totalresults']
        title += " for '%s'" % queryparams.get("q")
        body = html.Body()
        for r in res:
            if not 'dcterms_title' in r or r['dcterms_title'] is None:
                r['dcterms_title'] = r['uri']
            if r.get('dcterms_identifier', False):
                r['dcterms_title'] = r['dcterms_identifier'] + ": " + r[
                    'dcterms_title']
            body.append(
                html.Div([
                    html.H2([elements.Link(r['dcterms_title'], uri=r['uri'])]),
                    r.get('text', '')
                ], **{'class': 'hit'}))
        pagerelem = self._search_render_pager(pager, queryparams,
                                              environ['PATH_INFO'])
        body.append(
            html.Div([
                html.P([
                    "Results %(firstresult)s-%(lastresult)s "
                    "of %(totalresults)s" % pager
                ]), pagerelem
            ], **{'class': 'pager'}))
        data = self._transform(title, body, environ, template="xsl/search.xsl")
        return self._return_response(data, start_response)
Ejemplo n.º 4
0
    def test_highlighted_snippet(self):
        res = ([{'title':'Example',
                 'uri':'http://example.org/base/123/b1',
                 'text':html.P(['sollicitudin justo ',
                                html.Strong(['needle'], **{'class':'match'}),
                                ' tempor ut eu enim ... himenaeos. ',
                                html.Strong(['Needle'], **{'class':'match'}),
                                ' id tincidunt orci'])}],
               {'pagenum': 1,
                'pagecount': 1,
                'firstresult': 1,
                'lastresult': 1,
                'totalresults': 1})

        self.builder.query_string = "q=needle"
        config = {'connect.return_value': Mock(**{'query.return_value': res})}
        with patch('ferenda.wsgiapp.FulltextIndex', **config):
            status, headers, content = self.call_wsgi()
        
        self.assertResponse("200 OK",
                            {'Content-Type': 'text/html; charset=utf-8'},
                            None,
                            status, headers, None)
        
        t = etree.fromstring(content)
        docs = t.findall(".//section[@class='hit']")
        self.assertEqualXML(res[0][0]['text'].as_xhtml(),
                            docs[0][1],
                            namespace_aware=False)
Ejemplo n.º 5
0
 def _search_render_facets(self, facets, queryparams, environ):
     facetgroups = []
     commondata = self.repos[0].commondata
     searchurl = request_uri(environ, include_query=False)
     for facetresult in ('type', 'creator', 'issued'):
         if facetresult in facets:
             if facetresult in queryparams:
                 # the user has selected a value for this
                 # particular facet, we should not display all
                 # buckets (but offer a link to reset the value)
                 qpcopy = dict(queryparams)
                 del qpcopy[facetresult]
                 href = "%s?%s" % (searchurl, urlencode(qpcopy))
                 val = queryparams[facetresult]
                 if facetresult == "creator":
                     val = self.repos[0].lookup_label(val)
                 elif facetresult == "type":
                     val = self.repolabels.get(val, val)
                 lbl = "%s: %s" % (self.facetlabels.get(
                     facetresult, facetresult), val)
                 facetgroups.append(
                     html.LI([
                         lbl,
                         html.A(
                             "\xa0", **{
                                 'href': href,
                                 'class': 'glyphicon glyphicon-remove'
                             })
                     ]))
             else:
                 facetgroup = []
                 for bucket in facets[facetresult]['buckets']:
                     if facetresult == 'type':
                         lbl = self.repolabels.get(bucket['key'],
                                                   bucket['key'])
                         key = bucket['key']
                     elif facetresult == 'creator':
                         k = URIRef(bucket['key'])
                         pred = SKOS.altLabel if commondata.value(
                             k, SKOS.altLabel) else FOAF.name
                         lbl = commondata.value(k, pred)
                         key = bucket['key']
                     elif facetresult == "issued":
                         lbl = bucket["key_as_string"]
                         key = lbl
                     qpcopy = dict(queryparams)
                     qpcopy[facetresult] = key
                     href = "%s?%s" % (searchurl, urlencode(qpcopy))
                     facetgroup.append(
                         html.LI([
                             html.A("%s" % (lbl), **{'href': href}),
                             html.Span([str(bucket['doc_count'])],
                                       **{'class': 'badge pull-right'})
                         ]))
                 lbl = self.facetlabels.get(facetresult, facetresult)
                 facetgroups.append(
                     html.LI([html.P([lbl]),
                              html.UL(facetgroup)]))
     return html.Div(facetgroups, **{'class': 'facets'})
Ejemplo n.º 6
0
 def _search_render_innerhit(self, innerhit):
     r = innerhit
     if 'text' not in r:
         r['text'] = []
     r['text'].insert(0, ": ")
     r['text'].insert(
         0,
         elements.LinkMarkup(r.get('label', ['(beteckning saknas)']),
                             uri=r['uri']))
     return html.P(r['text'], **{'class': 'innerhit'})
Ejemplo n.º 7
0
    def stream(self, environ, start_response):
        """WSGI method, called by the wsgi app for requests that indicate the
        need for a streaming response."""

        path = environ['PATH_INFO']
        if not isinstance(path, str):
            path = path.decode("utf-8")
        fullpath = self.config.documentroot + path
        # we start by asking all repos "do you handle this path"?
        # default impl is to say yes if 1st seg == self.alias and the
        # rest can be treated as basefile yielding a existing
        # generated file.  a yes answer contains a FileWrapper around
        # the repo-selected file and optionally length (but not
        # status, always 200, or mimetype, always text/html). None
        # means no.
        fp = None
        reasons = OrderedDict()
        if not ((path.startswith("/rsrc") or path == "/robots.txt")
                and os.path.exists(fullpath)):
            for repo in self.repos:
                supports = repo.requesthandler.supports(environ)
                if supports:
                    return repo.requesthandler.stream(environ, start_response)
                elif hasattr(supports, 'reason'):
                    reasons[repo.alias] = supports.reason
                else:
                    reasons[repo.alias] = '(unknown reason)'
        # if we reach this, no repo handled the path
        mimetype = "text/html"
        reasonmsg = "\n".join(["%s: %s" % (k, reasons[k]) for k in reasons])
        msgbody = html.Body([
            html.H1("Document not found"),
            html.P(["The path %s was not found at %s" % (path, fullpath)]),
            html.P(["Examined %s repos" % (len(self.repos))]),
            html.Pre([reasonmsg])
        ])
        iterdata = self._transform("404 Not found", msgbody, environ)
        status = "404 Not Found"
        length = None
        return self._return_response(iterdata, start_response, status,
                                     mimetype, length)
Ejemplo n.º 8
0
    def test_html(self):
        # test 2: use element.html elements only, to make a similar
        # document (although without metadata about
        # sections/subsection and classses). Uses some HTML5 elements
        # that are converted to divs when rendering as XHTML 1.1
        body = html.Body([
            html.H1(['Toplevel heading']),
            html.Summary(['Introductory preamble']),
            html.Section([
                html.H2(['First section']),
                html.P(['Some text']),
                html.Section(
                    [html.H3(['First subsection']),
                     html.P(['More text'])])
            ]),
            html.Section(
                [html.H2(['Second section']),
                 html.P(['Even more text'])])
        ])
        want = """
<body xmlns="http://www.w3.org/1999/xhtml"
      about="http://localhost:8000/res/base/basefile">
  <h1>Toplevel heading</h1>
  <div class="summary">Introductory preamble</div>
  <div class="section">
    <h2>First section</h2>
    <p>Some text</p>
    <div class="section">
      <h3>First subsection</h3>
      <p>More text</p>
    </div>
  </div>
  <div class="section">
    <h2>Second section</h2>
    <p>Even more text</p>
  </div>
</body>
"""
        self._test_asxhtml(want, body)
Ejemplo n.º 9
0
 def mkres(page=1, pagesize=10, total=25):
     hits = []
     for i in range((page-1)*pagesize, min(page*pagesize, total)):
         hits.append(
             {'title':'',
              'uri':'http://example.org/base/123/c#S%d'% ((i*2)-1),
              'text': html.P(['This is a needle document'])})
     return (hits,
             {'pagenum': page,
              'pagecount': int(total / pagesize) + 1,
              'firstresult': (page - 1) * pagesize + 1,
              'lastresult': (page - 1) * pagesize + len(hits),
              'totalresults': total})
Ejemplo n.º 10
0
    def test_elements_from_soup(self):
        soup = BeautifulSoup(
            """<html>
<head>
  <title>Example doc</title>
</head>
<body>
  <marquee>Hello world</marquee>
  <!-- Hello world -->
  <center>Hello world</center>
  <p>That's enough of this nonsense</p>
</body>""", "lxml")
        got = html.elements_from_soup(soup.html)
        self.assertEqual(
            html.HTML([
                html.Head([html.Title(["Example doc"])]),
                html.Body([html.P(["That's enough of this nonsense"])])
            ]), got)
Ejemplo n.º 11
0
    def test_elements_from_soup(self):
        from ferenda.elements import html
        soup = BeautifulSoup(
            """<body>
<h1>Sample</h1>
<div class="main">
<img src="xyz.png"/>
<p>Some <b>text</b></p>
<dl>
<dt>Term 1</dt>
<dd>Definition 1</dd>
</dl>
</div>
<div id="foot">
<hr/>
<a href="/">home</a> - <a href="/about">about</a>
</div>
</body>""", "lxml")
        body = html.elements_from_soup(soup.body)
        # print("Body: \n%s" % serialize(body))
        result = html.Body([
            html.H1(["Sample"]),
            html.Div([
                html.Img(src="xyz.png"),
                html.P(["Some ", html.B(["text"])]),
                html.DL([html.DT(["Term 1"]),
                         html.DD(["Definition 1"])])
            ], **{"class": "main"}),
            html.Div([
                html.HR(),
                html.A(["home"], href="/"), " - ",
                html.A(["about"], href="/about")
            ],
                     id="foot")
        ])
        self.maxDiff = 4096
        self.assertEqual(serialize(body), serialize(result))
Ejemplo n.º 12
0
    def handle_search(self, request, **values):
        # return Response("<h1>Hello search: " + request.args.get("q") +" </h1>", mimetype="text/html")
        res, pager = self._search_run_query(request.args)

        if pager['totalresults'] == 1:
            title = "1 match"
        else:
            title = "%s matches" % pager['totalresults']
        title += " for '%s'" % request.args.get("q")

        body = html.Body()
        for r in res:
            if not 'dcterms_title' in r or r['dcterms_title'] is None:
                r['dcterms_title'] = r['uri']
            if r.get('dcterms_identifier', False):
                r['dcterms_title'] = r['dcterms_identifier'] + ": " + r[
                    'dcterms_title']
            body.append(
                html.Div([
                    html.H2([elements.Link(r['dcterms_title'], uri=r['uri'])]),
                    r.get('text', '')
                ], **{'class': 'hit'}))
        pagerelem = self._search_render_pager(pager, dict(request.args),
                                              request.path)
        body.append(
            html.Div([
                html.P([
                    "Results %(firstresult)s-%(lastresult)s "
                    "of %(totalresults)s" % pager
                ]), pagerelem
            ], **{'class': 'pager'}))
        data = self._transform(title,
                               body,
                               request.environ,
                               template="xsl/search.xsl")
        return Response(data, mimetype="text/html")
Ejemplo n.º 13
0
    def test_search_multiple(self):
        self.env['QUERY_STRING'] = "q=part"
        res = (
            [
                {
                    'dcterms_title':
                    'Introduction',
                    'dcterms_identifier':
                    '123/a¶1',
                    'uri':
                    'http://example.org/base/123/a#S1',
                    'text':
                    html.P([
                        'This is ',
                        html.Strong(['part'], **{'class':
                                                 'match'}), ' of document-',
                        html.Strong(['part'], **{'class': 'match'}),
                        ' section 1</p>'
                    ])
                },
                {  #'title':'Definitions and Abbreviations',
                    'uri':
                    'http://example.org/base/123/a#S2',
                    'text':
                    html.P([
                        'second main document ',
                        html.Strong(['part'], **{'class': 'match'})
                    ])
                },
                {
                    'dcterms_title':
                    'Example',
                    'uri':
                    'http://example.org/base/123/a',
                    'text':
                    html.P([
                        'This is ',
                        html.Strong(['part'], **{'class': 'match'}),
                        ' of the main document'
                    ])
                }
            ],
            {
                'pagenum': 1,
                'pagecount': 1,
                'firstresult': 1,
                'lastresult': 3,
                'totalresults': 3
            })

        config = {'connect.return_value': Mock(**{'query.return_value': res})}
        with patch('ferenda.wsgiapp.FulltextIndex', **config):
            status, headers, content = self.call_wsgi(self.env)
        self.assertResponse("200 OK",
                            {'Content-Type': 'text/html; charset=utf-8'}, None,
                            status, headers, None)
        t = etree.parse(BytesIO(content))
        css = t.findall("head/link[@rel='stylesheet']")
        self.assertEqual(len(css),
                         3)  # bootstrap, bootstrap-theme, ferenda and sfs (?!)
        self.assertEqual(
            'https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css',
            css[0].get('href'))
        js = t.findall("body/script")
        self.assertEqual(len(js), 4)  # jquery, bootstrap, ferenda, typeahead
        resulthead = t.find(".//article/h1").text
        self.assertEqual(resulthead, "3 matches for 'part'")
        docs = t.findall(".//section[@class='hit']")
        self.assertEqual(len(docs), 3)
        self.assertEqual(docs[0][0].tag, 'h2')
        expect = res[0]
        self.assertIn(expect[0]['dcterms_title'], docs[0][0][0].text)
        self.assertEqual(expect[0]['uri'], docs[0][0][0].get('href'))
        self.assertEqualXML(expect[0]['text'].as_xhtml(),
                            docs[0][1],
                            namespace_aware=False)

        self.assertIn(expect[1]['dcterms_title'], docs[1][0][0].text)
        self.assertEqual(expect[1]['uri'], docs[1][0][0].get('href'))
        self.assertEqualXML(expect[1]['text'].as_xhtml(),
                            docs[1][1],
                            namespace_aware=False)

        self.assertIn(expect[2]['dcterms_title'], docs[2][0][0].text)
        self.assertEqual(expect[2]['uri'], docs[2][0][0].get('href'))
        self.assertEqualXML(expect[2]['text'].as_xhtml(),
                            docs[2][1],
                            namespace_aware=False)
Ejemplo n.º 14
0
    def test_meta(self):
        # test 3: use a mix of our own elements and html elements,
        # with meta + uri attached to some nodes
        g1 = Graph().parse(format='n3',
                           data="""
@prefix bibo: <http://purl.org/ontology/bibo/> .
@prefix dcterms: <http://purl.org/dc/terms/> .

<http://localhost:8000/res/base/basefile#S1> a bibo:DocumentPart;
        dcterms:title "First section";
        bibo:chapter "1" .
        """)
        g2 = Graph().parse(format='n3',
                           data="""
@prefix bibo: <http://purl.org/ontology/bibo/> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<http://localhost:8000/res/base/basefile#S2> a bibo:DocumentPart;
        dcterms:title "Second section";
        bibo:chapter "2";
        dcterms:creator "Fred Bloggs"@en-GB;
        dcterms:issued "2013-05-10"^^xsd:date;
        owl:sameAs <http://example.org/s2> .

<http://example.org/s2> dcterms:title "Same same but different" .
       
<http://localhost:8000/res/base/unlrelated> dcterms:title "Unrelated document" .
        
        """)

        body = el.Body([
            el.Heading(['Toplevel heading'], level=1),
            html.P(['Introductory preamble']),
            html.Div([
                html.P(['Some text']),
                el.Subsection([el.Paragraph(['More text'])],
                              ordinal='1.1',
                              title="First subsection")
            ],
                     uri='http://localhost:8000/res/base/basefile#S1',
                     meta=g1),
            el.Section([el.Paragraph(['Even more text'])],
                       uri='http://localhost:8000/res/base/basefile#S2',
                       meta=g2)
        ])
        want = """
<body xmlns="http://www.w3.org/1999/xhtml"
      about="http://localhost:8000/res/base/basefile">
  <h1>Toplevel heading</h1>
  <p>Introductory preamble</p>
  <div about="http://localhost:8000/res/base/basefile#S1"
       content="First section"
       property="dcterms:title"
       typeof="bibo:DocumentPart">
    <span href="http://localhost:8000/res/base/basefile"
          rel="dcterms:isPartOf"/>
    <span content="1"
          property="bibo:chapter"
          xml:lang=""/>
    <p>Some text</p>
    <div about="http://localhost:8000/res/base/basefile#S1.1"
         content="First subsection"
         property="dcterms:title"
         typeof="bibo:DocumentPart"
         class="subsection">
      <span href="http://localhost:8000/res/base/basefile#S1"
            rel="dcterms:isPartOf"/>
      <span about="http://localhost:8000/res/base/basefile#S1.1"
            content="1.1"
            property="bibo:chapter"/>
      <p>More text</p>
    </div>
  </div>
  <div about="http://localhost:8000/res/base/basefile#S2"
      class="section"
      content="Second section"
      property="dcterms:title"
      typeof="bibo:DocumentPart">
    <span href="http://localhost:8000/res/base/basefile"
          rel="dcterms:isPartOf"/>
    <span href="http://example.org/s2"
          rel="owl:sameAs">
      <span content="Same same but different"
            property="dcterms:title"
            xml:lang=""/>
    </span>
    <span content="2"
          property="bibo:chapter"
          xml:lang=""/>
    <span content="2013-05-10"
          property="dcterms:issued"
          datatype="xsd:date"/>
    <span content="Fred Bloggs"
          property="dcterms:creator"
          xml:lang="en-GB"/>
    <p>Even more text</p>
  </div>
</body>"""
        self._test_asxhtml(want, body)
Ejemplo n.º 15
0
    def static(self, environ, start_response):
        """WSGI method, called by the wsgi app for all other requests not
        handled by :py:func:`~ferenda.Manager.search` or
        :py:func:`~ferenda.Manager.api`

        """
        path = environ['PATH_INFO']
        if not isinstance(path, str):
            path = path.decode("utf-8")
        fullpath = self.config.documentroot + path
        # we start by asking all repos "do you handle this path"?
        # default impl is to say yes if 1st seg == self.alias and the
        # rest can be treated as basefile yielding a existing
        # generated file.  a yes answer contains a FileWrapper around
        # the repo-selected file and optionally length (but not
        # status, always 200, or mimetype, always text/html). None
        # means no.
        fp = None
        reasons = OrderedDict()
        if not ((path.startswith("/rsrc") or path == "/robots.txt")
                and os.path.exists(fullpath)):
            for repo in self.repos:
                supports = repo.requesthandler.supports(environ)
                if supports:
                    fp, length, status, mimetype = repo.requesthandler.handle(
                        environ)
                elif hasattr(supports, 'reason'):
                    reasons[repo.alias] = supports.reason
                else:
                    reasons[repo.alias] = '(unknown reason)'
                if fp:
                    status = {
                        200: "200 OK",
                        404: "404 Not found",
                        406: "406 Not Acceptable",
                        500: "500 Server error"
                    }[status]
                    iterdata = FileWrapper(fp)
                    break
        # no repo handled the path
        if not fp:
            if self.config.legacyapi:  # rewrite the path to some resources. FIXME:
                # shouldn't hardcode the "rsrc" path of the path
                if path == "/json-ld/context.json":
                    fullpath = self.config.documentroot + "/rsrc/api/context.json"
                elif path == "/var/terms":
                    fullpath = self.config.documentroot + "/rsrc/api/terms.json"
                elif path == "/var/common":
                    fullpath = self.config.documentroot + "/rsrc/api/common.json"
            if os.path.isdir(fullpath):
                fullpath = fullpath + "index.html"
            if os.path.exists(fullpath):
                ext = os.path.splitext(fullpath)[1]
                # if not mimetypes.inited:
                #     mimetypes.init()
                mimetype = mimetypes.types_map.get(ext, 'text/plain')
                status = "200 OK"
                length = os.path.getsize(fullpath)
                fp = open(fullpath, "rb")
                iterdata = FileWrapper(fp)
            else:
                mimetype = "text/html"
                reasonmsg = "\n".join(
                    ["%s: %s" % (k, reasons[k]) for k in reasons])
                msgbody = html.Body([
                    html.H1("Document not found"),
                    html.P([
                        "The path %s was not found at %s" % (path, fullpath)
                    ]),
                    html.P(["Examined %s repos" % (len(self.repos))]),
                    html.Pre([reasonmsg])
                ])
                iterdata = self._transform("404 Not found", msgbody, environ)
                status = "404 Not Found"
                length = None
        return self._return_response(iterdata, start_response, status,
                                     mimetype, length)