def exception(self, environ, start_response): import traceback from pprint import pformat exc_type, exc_value, tb = sys.exc_info() tblines = traceback.format_exception(exc_type, exc_value, tb) tbstr = "\n".join(tblines) # render the error title = tblines[-1] body = html.Body([ html.Div([ html.H1(self.exception_heading), html.P([self.exception_description]), html.H2("Traceback"), html.Pre([tbstr]), html.H2("Variables"), html.Pre([ "request_uri: %s\nos.getcwd(): %s" % (request_uri(environ), os.getcwd()) ]), html.H2("environ"), html.Pre([pformat(environ)]), html.H2("sys.path"), html.Pre([pformat(sys.path)]), html.H2("os.environ"), html.Pre([pformat(dict(os.environ))]) ]) ]) msg = self._transform(title, body, environ) return self._return_response(msg, start_response, status="500 Internal Server Error", contenttype="text/html")
def search(self, environ, start_response): """WSGI method, called by the wsgi app for requests that matches ``searchendpoint``.""" queryparams = self._search_parse_query(environ['QUERY_STRING']) res, pager = self._search_run_query(queryparams) if pager['totalresults'] == 1: title = "1 match" else: title = "%s matches" % pager['totalresults'] title += " for '%s'" % queryparams.get("q") body = html.Body() for r in res: if not 'dcterms_title' in r or r['dcterms_title'] is None: r['dcterms_title'] = r['uri'] if r.get('dcterms_identifier', False): r['dcterms_title'] = r['dcterms_identifier'] + ": " + r[ 'dcterms_title'] body.append( html.Div([ html.H2([elements.Link(r['dcterms_title'], uri=r['uri'])]), r.get('text', '') ], **{'class': 'hit'})) pagerelem = self._search_render_pager(pager, queryparams, environ['PATH_INFO']) body.append( html.Div([ html.P([ "Results %(firstresult)s-%(lastresult)s " "of %(totalresults)s" % pager ]), pagerelem ], **{'class': 'pager'})) data = self._transform(title, body, environ, template="xsl/search.xsl") return self._return_response(data, start_response)
def search(self, environ, start_response): """WSGI method, called by the wsgi app for requests that matches ``searchendpoint``.""" queryparams = self._search_parse_query(environ['QUERY_STRING']) # massage queryparams['issued'] if present, then restore it y = None if 'issued' in queryparams: y = int(queryparams['issued']) queryparams['issued'] = Between(datetime(y, 1, 1), datetime(y, 12, 31, 23, 59, 59)) boost_types = [("sfs", 10)] res, pager = self._search_run_query(queryparams, boost_types=boost_types) if y: queryparams['issued'] = str(y) if pager['totalresults'] == 1: title = "1 träff" else: title = "%s träffar" % pager['totalresults'] title += " för '%s'" % queryparams.get("q") body = html.Body() if hasattr(res, 'aggregations'): body.append( self._search_render_facets(res.aggregations, queryparams, environ)) for r in res: if 'label' not in r: label = r['uri'] elif isinstance(r['label'], list): label = str(r['label']) # flattens any nested element # structure, eg # <p><strong><em>foo</em></strong></p> # -> foo else: label = r['label'] rendered_hit = html.Div([ html.B([elements.Link(label, uri=r['uri'])], ** {'class': 'lead'}) ], **{'class': 'hit'}) if r.get('text'): rendered_hit.append(html.P([r.get('text', '')])) if 'innerhits' in r: for innerhit in r['innerhits']: rendered_hit.append(self._search_render_innerhit(innerhit)) body.append(rendered_hit) pagerelem = self._search_render_pager(pager, queryparams, environ['PATH_INFO']) body.append( html.Div([ html.P([ "Träff %(firstresult)s-%(lastresult)s " "av %(totalresults)s" % pager ]), pagerelem ], **{'class': 'pager'})) data = self._transform(title, body, environ, template="xsl/search.xsl") return self._return_response(data, start_response)
def test_elements_from_soup(self): soup = BeautifulSoup( """<html> <head> <title>Example doc</title> </head> <body> <marquee>Hello world</marquee> <!-- Hello world --> <center>Hello world</center> <p>That's enough of this nonsense</p> </body>""", "lxml") got = html.elements_from_soup(soup.html) self.assertEqual( html.HTML([ html.Head([html.Title(["Example doc"])]), html.Body([html.P(["That's enough of this nonsense"])]) ]), got)
def stream(self, environ, start_response): """WSGI method, called by the wsgi app for requests that indicate the need for a streaming response.""" path = environ['PATH_INFO'] if not isinstance(path, str): path = path.decode("utf-8") fullpath = self.config.documentroot + path # we start by asking all repos "do you handle this path"? # default impl is to say yes if 1st seg == self.alias and the # rest can be treated as basefile yielding a existing # generated file. a yes answer contains a FileWrapper around # the repo-selected file and optionally length (but not # status, always 200, or mimetype, always text/html). None # means no. fp = None reasons = OrderedDict() if not ((path.startswith("/rsrc") or path == "/robots.txt") and os.path.exists(fullpath)): for repo in self.repos: supports = repo.requesthandler.supports(environ) if supports: return repo.requesthandler.stream(environ, start_response) elif hasattr(supports, 'reason'): reasons[repo.alias] = supports.reason else: reasons[repo.alias] = '(unknown reason)' # if we reach this, no repo handled the path mimetype = "text/html" reasonmsg = "\n".join(["%s: %s" % (k, reasons[k]) for k in reasons]) msgbody = html.Body([ html.H1("Document not found"), html.P(["The path %s was not found at %s" % (path, fullpath)]), html.P(["Examined %s repos" % (len(self.repos))]), html.Pre([reasonmsg]) ]) iterdata = self._transform("404 Not found", msgbody, environ) status = "404 Not Found" length = None return self._return_response(iterdata, start_response, status, mimetype, length)
def test_html(self): # test 2: use element.html elements only, to make a similar # document (although without metadata about # sections/subsection and classses). Uses some HTML5 elements # that are converted to divs when rendering as XHTML 1.1 body = html.Body([ html.H1(['Toplevel heading']), html.Summary(['Introductory preamble']), html.Section([ html.H2(['First section']), html.P(['Some text']), html.Section( [html.H3(['First subsection']), html.P(['More text'])]) ]), html.Section( [html.H2(['Second section']), html.P(['Even more text'])]) ]) want = """ <body xmlns="http://www.w3.org/1999/xhtml" about="http://localhost:8000/res/base/basefile"> <h1>Toplevel heading</h1> <div class="summary">Introductory preamble</div> <div class="section"> <h2>First section</h2> <p>Some text</p> <div class="section"> <h3>First subsection</h3> <p>More text</p> </div> </div> <div class="section"> <h2>Second section</h2> <p>Even more text</p> </div> </body> """ self._test_asxhtml(want, body)
def test_elements_from_soup(self): from ferenda.elements import html soup = BeautifulSoup( """<body> <h1>Sample</h1> <div class="main"> <img src="xyz.png"/> <p>Some <b>text</b></p> <dl> <dt>Term 1</dt> <dd>Definition 1</dd> </dl> </div> <div id="foot"> <hr/> <a href="/">home</a> - <a href="/about">about</a> </div> </body>""", "lxml") body = html.elements_from_soup(soup.body) # print("Body: \n%s" % serialize(body)) result = html.Body([ html.H1(["Sample"]), html.Div([ html.Img(src="xyz.png"), html.P(["Some ", html.B(["text"])]), html.DL([html.DT(["Term 1"]), html.DD(["Definition 1"])]) ], **{"class": "main"}), html.Div([ html.HR(), html.A(["home"], href="/"), " - ", html.A(["about"], href="/about") ], id="foot") ]) self.maxDiff = 4096 self.assertEqual(serialize(body), serialize(result))
def handle_search(self, request, **values): # return Response("<h1>Hello search: " + request.args.get("q") +" </h1>", mimetype="text/html") res, pager = self._search_run_query(request.args) if pager['totalresults'] == 1: title = "1 match" else: title = "%s matches" % pager['totalresults'] title += " for '%s'" % request.args.get("q") body = html.Body() for r in res: if not 'dcterms_title' in r or r['dcterms_title'] is None: r['dcterms_title'] = r['uri'] if r.get('dcterms_identifier', False): r['dcterms_title'] = r['dcterms_identifier'] + ": " + r[ 'dcterms_title'] body.append( html.Div([ html.H2([elements.Link(r['dcterms_title'], uri=r['uri'])]), r.get('text', '') ], **{'class': 'hit'})) pagerelem = self._search_render_pager(pager, dict(request.args), request.path) body.append( html.Div([ html.P([ "Results %(firstresult)s-%(lastresult)s " "of %(totalresults)s" % pager ]), pagerelem ], **{'class': 'pager'})) data = self._transform(title, body, request.environ, template="xsl/search.xsl") return Response(data, mimetype="text/html")
def static(self, environ, start_response): """WSGI method, called by the wsgi app for all other requests not handled by :py:func:`~ferenda.Manager.search` or :py:func:`~ferenda.Manager.api` """ path = environ['PATH_INFO'] if not isinstance(path, str): path = path.decode("utf-8") fullpath = self.config.documentroot + path # we start by asking all repos "do you handle this path"? # default impl is to say yes if 1st seg == self.alias and the # rest can be treated as basefile yielding a existing # generated file. a yes answer contains a FileWrapper around # the repo-selected file and optionally length (but not # status, always 200, or mimetype, always text/html). None # means no. fp = None reasons = OrderedDict() if not ((path.startswith("/rsrc") or path == "/robots.txt") and os.path.exists(fullpath)): for repo in self.repos: supports = repo.requesthandler.supports(environ) if supports: fp, length, status, mimetype = repo.requesthandler.handle( environ) elif hasattr(supports, 'reason'): reasons[repo.alias] = supports.reason else: reasons[repo.alias] = '(unknown reason)' if fp: status = { 200: "200 OK", 404: "404 Not found", 406: "406 Not Acceptable", 500: "500 Server error" }[status] iterdata = FileWrapper(fp) break # no repo handled the path if not fp: if self.config.legacyapi: # rewrite the path to some resources. FIXME: # shouldn't hardcode the "rsrc" path of the path if path == "/json-ld/context.json": fullpath = self.config.documentroot + "/rsrc/api/context.json" elif path == "/var/terms": fullpath = self.config.documentroot + "/rsrc/api/terms.json" elif path == "/var/common": fullpath = self.config.documentroot + "/rsrc/api/common.json" if os.path.isdir(fullpath): fullpath = fullpath + "index.html" if os.path.exists(fullpath): ext = os.path.splitext(fullpath)[1] # if not mimetypes.inited: # mimetypes.init() mimetype = mimetypes.types_map.get(ext, 'text/plain') status = "200 OK" length = os.path.getsize(fullpath) fp = open(fullpath, "rb") iterdata = FileWrapper(fp) else: mimetype = "text/html" reasonmsg = "\n".join( ["%s: %s" % (k, reasons[k]) for k in reasons]) msgbody = html.Body([ html.H1("Document not found"), html.P([ "The path %s was not found at %s" % (path, fullpath) ]), html.P(["Examined %s repos" % (len(self.repos))]), html.Pre([reasonmsg]) ]) iterdata = self._transform("404 Not found", msgbody, environ) status = "404 Not Found" length = None return self._return_response(iterdata, start_response, status, mimetype, length)