def jinja2_environment(): """Return the Jinja2 environment singleton used by all views. For information on Jinja2 custom filters, see http://jinja.pocoo.org/docs/api/#custom-filters """ global __jinja2_environment if __jinja2_environment: return __jinja2_environment env = jinja2.Environment( loader=jinja2.FileSystemLoader(str(sc.templates_dir)), extensions=[AssetsExtension], trim_blocks=True, lstrip_blocks=True, ) env.assets_environment = assets.get_env() env.filters['date'] = util.format_date env.filters['time'] = util.format_time env.filters['max'] = max env.filters['min'] = min env.filters['datetime'] = util.format_datetime env.filters['timedelta'] = util.format_timedelta env.filters['uid_to_name'] = lambda uid: scimm.imm().uid_to_name(uid) env.filters['uid_to_acro'] = lambda uid: scimm.imm().uid_to_acro(uid) def sub_filter(string, pattern, repl): return regex.sub(pattern, repl, string) env.filters['sub'] = sub_filter def sht_expansion(string): """Add links from SHT vol/page references to the sht-lookup page.""" if 'SHT' in string: # Replace with spaces string = string.replace(' ', ' ') baseurl = '/sht-lookup/' def replacement(m): # Ignore 'also cf. ix p. 393ff' and ' A' first, second = m[1], m[2] if regex.match(r'.+p\.\s*', first) or \ regex.match(r'.+A', first): return '{}{}'.format(first, second) else: # replace n-dash with dash path = second.replace('−', '-') return '{}<a href="{}{}" target="_blank">{}</a>'.format( first, baseurl, path, second) string = regex.sub(r'([^0-9]+)([0-9]{1,4}(?:\.?[\−\+0-9a-zA-Z]+)?)', replacement, string) return string env.filters['sht_expansion'] = sht_expansion __jinja2_environment = env return env
def search(query=None, limit=25, offset=0): out = classes.SuttaResultsCategory() searchlang = None quoted = '"' in query query = query.replace('"', '') if len(query) <= 3: imm = scimm.imm() if query in imm.languages: searchlang = query if not searchlang and len(query) <= 2: out.total = 0 out.add("Search term too short.", []) return out if searchlang: ranks, suttas = get_and_rank_results(query='', lang=searchlang) else: ranks, suttas = get_and_rank_results(query) ranks = ranks[offset:offset+limit] suttas = suttas[offset:offset+limit] breakpoint = bisect.bisect(ranks, 700) e_results = suttas[:breakpoint] s_results = suttas[breakpoint:] if quoted: s_results = [] count = len(e_results) + len(s_results) out.total = count if count == 0: out.add("No results", []) return out if count > offset+limit: start = limit + offset href = '/search/?query={}&target=suttas&limit={}&offset={}'.format( query, limit, start) out.footurl = '<a href="{}">Results {}–{}</a>'.format( escape(href), start + 1, min(count, start + limit)) if e_results: out.add("Exact results", e_results) if s_results and not quoted: out.add("Similar results", s_results) return out
def stress(count=1000): import time, concurrent.futures, random imm = scimm.imm() terms = [s.name[:4] for s in imm.suttas.values()] random.shuffle(terms) terms = terms[:1000] * int(1 + count / 1000) queries = random.sample(terms, count) getter = concurrent.futures.ThreadPoolExecutor(4) start = time.time() for r in getter.map(search, queries): pass done = time.time() print("Performed {} queries in {} seconds.".format(len(queries), done-start)) print("{} queries per second.".format(len(queries) / (done-start)))
def search(query=None, limit=25, offset=0): out = classes.SuttaResultsCategory() searchlang = None quoted = '"' in query query = query.replace('"', '') if len(query) <= 3: imm = scimm.imm() if query in imm.languages: searchlang = query if not searchlang and len(query) <= 2: out.total = 0 out.add("Search term too short.", []) return out if searchlang: ranks, suttas = get_and_rank_results(query='', lang=searchlang) else: ranks, suttas = get_and_rank_results(query) ranks = ranks[offset:offset + limit] suttas = suttas[offset:offset + limit] breakpoint = bisect.bisect(ranks, 700) e_results = suttas[:breakpoint] s_results = suttas[breakpoint:] if quoted: s_results = [] count = len(e_results) + len(s_results) out.total = count if count == 0: out.add("No results", []) return out if count > offset + limit: start = limit + offset href = '/search/?query={}&target=suttas&limit={}&offset={}'.format( query, limit, start) out.footurl = '<a href="{}">Results {}–{}</a>'.format( escape(href), start + 1, min(count, start + limit)) if e_results: out.add("Exact results", e_results) if s_results and not quoted: out.add("Similar results", s_results) return out
def stress(count=1000): import time, concurrent.futures, random imm = scimm.imm() terms = [s.name[:4] for s in imm.suttas.values()] random.shuffle(terms) terms = terms[:1000] * int(1 + count / 1000) queries = random.sample(terms, count) getter = concurrent.futures.ThreadPoolExecutor(4) start = time.time() for r in getter.map(search, queries): pass done = time.time() print("Performed {} queries in {} seconds.".format(len(queries), done - start)) print("{} queries per second.".format(len(queries) / (done - start)))
def search_imm(query, lang): imm = scimm.imm() # The structure of imm.searchstrings is : # ( sutta, searchstring, searchstring_cased, suttaname simplified) # First try matching query as a whole if lang: cf_query = " " + lang + " " sm_query = None else: cf_query = query.casefold() sm_query = textfunctions.simplify_pali(query) results = set(s for s in imm.searchstrings if cf_query in s[1]) if sm_query and cf_query != sm_query: results_s = set(s for s in imm.searchstrings if sm_query in s[3]) results.update(results_s) return results
def paths(): dbr = scimm.imm() for page in show.STATIC_PAGES: yield ('/' + page, 'STATIC') for division in dbr.divisions.values(): yield ('/' + division.uid, division) if division.has_subdivisions(): yield ('/' + division.uid + '/full', division) for subdivision in division.subdivisions: yield ('/' + subdivision.uid, subdivision) for sutta in dbr.suttas.values(): yield ('/' + sutta.uid, sutta) if sutta.text_ref: url = sutta.text_ref.url if url and url.startswith('/'): yield (url, sutta) for translation in sutta.translations: if translation.url and translation.url.startswith('/'): yield (translation.url, translation)
def setup_context(self, context): from sc.tools import html m = self.content_regex.search(self.get_html()) m.detach_string() # Free up memory now. imm = scimm.imm() context.uid = self.uid context.sutta = imm.suttas.get(self.uid) context.division = imm.divisions.get(self.uid) context.canonical = self.canonical context.textdata = textdata = imm.get_text_data(self.uid, self.lang_code) context.title = textdata.name if textdata else '?' context.text = m['content'] if context.embed: context.text = self.shorter_text(context.text) context.has_quotes = '‘' in context.text or '“' in context.text try: context.snippet = self.get_snippet(context.text) except Exception as e: logger.error('Failed to generated snippet for {} ({})'.format(self.uid, str(e))) context.snippet = '' # Eliminate newlines from Full-width-glyph languages like Chinese # because they convert into spaces when rendered. # TODO: This check should use 'language' table if self.lang_code in {'zh'}: context.text = self.massage_cjk(context.text) context.lang_code = self.lang_code context.text_refs = [] if context.sutta: if context.sutta.text_ref: context.text_refs.append(context.sutta.text_ref) context.text_refs.extend(context.sutta.translations) elif context.division: if context.division.text_ref: context.text_refs.append(context.division.text_ref) #context.text_refs.extend(context.division.translations) nextprev = imm.get_next_prev(uid=self.uid, lang_uid=self.lang_code) context.next_data = nextprev['next'] context.prev_data = nextprev['prev']
def setup_context(self, context): imm = scimm.imm() context.imm = imm atoz = ''.join(chr(97 + i) for i in range(0, 26)) alltwo = set(a + b for a in atoz for b in atoz) used = set() for uid in imm.divisions: used.update(uid.split('-')) for uid in imm.languages: used.update(uid.split('-')) for uid in imm.subdivisions: used.update(uid.split('-')) unused = alltwo - used context.unused = sorted(unused) context.used = sorted(u for u in used if u.isalpha()) context.atoz = atoz
def setup_context(self, context): imm = scimm.imm() context.imm = imm atoz = "".join(chr(97 + i) for i in range(0, 26)) alltwo = set(a + b for a in atoz for b in atoz) used = set() for uid in imm.divisions: used.update(uid.split("-")) for uid in imm.languages: used.update(uid.split("-")) for uid in imm.subdivisions: used.update(uid.split("-")) unused = alltwo - used context.unused = sorted(unused) context.used = sorted(u for u in used if u.isalpha()) context.atoz = atoz
def setup_context(self, context): super().setup_context(context) context.data_last_update_request = data_repo.last_update() context.data_scm = data_scm context.imm_build_time = scimm.imm().build_time
def path(self): relative_path = scimm.imm().text_path(self.uid, self.lang_code) if not relative_path: return None return sc.text_dir / relative_path
def setup_context(self, context): from sc.tools import html m = self.content_regex.search(self.get_html()) m.detach_string() # Free up memory now. imm = scimm.imm() context.uid = self.uid context.sutta = imm.suttas.get(self.uid) context.subdivision = imm.subdivisions.get(self.uid) context.division = imm.divisions.get(self.uid) context.canonical = self.canonical context.textdata = textdata = imm.get_text_data(self.uid, self.lang_code) context.title = textdata.name if textdata else "?" context.text = m["content"] if self.lang_code in imm.font_data["css_font_class"]: context.font_class = imm.font_data["css_font_class"][self.lang_code] context.discourse_link = None context.discourse_results = None if ( self.uid in imm.suttas and sc.config.discourse["forum_url"] and self.lang_code == "en" or imm.languages[self.lang_code].isroot ): sutta = imm.suttas query = "%22{}%22|{}".format(uid_to_acro(self.uid).replace(" ", " "), self.uid) context.discourse_link = "{}?search={}".format(sc.config.discourse["forum_url"], query) context.discourse_url = sc.config.discourse["forum_url"] try: context.discourse_results = sc.search.discourse.search(self.uid) except: logger.exception("Failed to retrieve discourse_results") if context.embed: context.text = self.shorter_text(context.text) context.has_quotes = "‘" in context.text or "“" in context.text try: context.snippet = self.get_snippet(context.text) except Exception as e: logger.error("Failed to generated snippet for {} ({})".format(self.uid, str(e))) context.snippet = "" # Eliminate newlines from Full-width-glyph languages like Chinese # because they convert into spaces when rendered. # TODO: This check should use 'language' table if self.lang_code in {"zh", "lzh", "ko", "jp"}: context.text = self.massage_cjk(context.text) context.lang_code = self.lang_code context.root_lang = None if context.sutta: context.root_lang = context.sutta.lang elif context.division: context.root_lang = context.division.collection.lang elif context.subdivision: context.root_lang = context.subdivision.division.collection.lang context.text_refs = imm.get_text_refs(context.uid) nextprev = imm.get_next_prev(uid=self.uid, lang_uid=self.lang_code) context.next_data = nextprev["next"] context.prev_data = nextprev["prev"] context.exports["uid"] = self.uid context.exports["lang"] = self.lang_code
def default(*args, **kwargs): """Parse the path arguments and call the correct view function. We use the following (fairly flat) URL structure: <static_page> : static page, e.g., help <division_uid> : division view or subdivision headings view (depending on if the division supports full division view) <division_uid>/full : full division view (for those divisions that support it) <subdivision_uid> : subdivision view <sutta_uid> : sutta parallel view <text_sutta_uid>/<lang> : sutta text view <text_translation_uid>/<lang> : translation text view TODO: Collections? """ imm = scimm.imm() full = len(args) == 2 and args[1] == 'full' if len(args) == 1 or full: uid = args[0] # Static Pages if uid in STATIC_PAGES: return InfoView(uid).render() if uid in imm.pitakas: return PitakaView(imm.pitakas[uid]).render() if uid == 'uids': return UidsView().render() uid = uid.replace('_', '#') # Divisions division = imm.divisions.get(uid) if division: if division.collection.pitaka.always_full: if len(division.subdivisions) > 1: if division.collection.pitaka.always_full: full = True if division.has_subdivisions(): if full: return DivisionView(division).render() else: return SubdivisionHeadingsView(division).render() elif not full: return DivisionView(division).render() # Subdivisions subdivision = imm.subdivisions.get(uid) if subdivision: return SubdivisionView(subdivision).render() # Sutta Parallels sutta = imm.suttas.get(uid) if sutta: return ParallelView(sutta).render() elif len(args) >= 2: if args[1] == 'citation.txt': # Citation cherrypy.response.headers['Content-Type'] = "text/plain" sutta = imm.suttas.get(args[0]) if sutta: return SuttaCitationView(sutta).render() else: raise cherrypy.NotFound() # Sutta or Translation Texts # New style urls have the language code first then the uid lang_code = args[0] uid = args[1] if not imm.text_exists(uid, lang_code): redirect = False if imm.text_exists(lang_code, uid): redirect = True uid, lang_code = lang_code, uid elif lang_code == 'zh' and imm.text_exists(uid, 'lzh'): redirect = True lang_code = 'lzh' if redirect: # This is an old-style url, redirect to new-style url. if len(args) == 2: new_url = '/{}/{}'.format(lang_code, uid) else: new_url = '/{}/{}/{}'.format(lang_code, uid, args[2]) # Don't be transparent, we want to keep things canonical # and also, use 301. This is a permament change. raise cherrypy.HTTPRedirect(new_url, 301) else: raise cherrypy.NotFound() sutta = imm.suttas.get(uid) lang = imm.languages[lang_code] if len(args) == 3 and 'embed' in cherrypy.request.params: return TextSelectionView(uid, lang_code, args[2]).render() canonical = False if len(args) == 3 else True if 'raw' in kwargs: return TextRawView(uid, lang_code).render() if 'edit' in kwargs: if sc.config.app['editor']: return EditView(sutta, lang_code, canonical).render() if sutta: return SuttaView(sutta, lang, canonical).render() else: return TextView(uid, lang_code, canonical).render() raise cherrypy.NotFound()
def external_urls(): imm = scimm.imm() for sutta in imm.suttas.values(): for translation in sutta.translations: if not translation.url.startswith('/'): yield (translation.url, translation)
def setup_context(self, context): from sc.tools import html m = self.content_regex.search(self.get_html()) m.detach_string() # Free up memory now. imm = scimm.imm() context.uid = self.uid context.sutta = imm.suttas.get(self.uid) context.subdivision = imm.subdivisions.get(self.uid) context.division = imm.divisions.get(self.uid) context.canonical = self.canonical context.textdata = textdata = imm.get_text_data( self.uid, self.lang_code) context.title = textdata.name if textdata else '?' context.text = m['content'] if self.lang_code in imm.font_data['css_font_class']: context.font_class = imm.font_data['css_font_class'][ self.lang_code] context.discourse_link = None context.discourse_results = None if (self.uid in imm.suttas and sc.config.discourse['forum_url'] and self.lang_code == 'en' or imm.languages[self.lang_code].isroot): sutta = imm.suttas query = '%22{}%20%22'.format( uid_to_acro(self.uid).replace(' ', ' '), self.uid) context.discourse_link = '{}search?q={}'.format( sc.config.discourse['forum_url'], query) context.discourse_url = sc.config.discourse['forum_url'] try: context.discourse_results = sc.search.discourse.search( self.uid) except: logger.exception('Failed to retrieve discourse_results') if context.embed: context.text = self.shorter_text(context.text) context.has_quotes = '‘' in context.text or '“' in context.text try: context.snippet = self.get_snippet(context.text) except Exception as e: logger.error('Failed to generated snippet for {} ({})'.format( self.uid, str(e))) context.snippet = '' # Eliminate newlines from Full-width-glyph languages like Chinese # because they convert into spaces when rendered. # TODO: This check should use 'language' table if self.lang_code in {'zh', 'lzh', 'ko', 'jp'}: context.text = self.massage_cjk(context.text) context.lang_code = self.lang_code context.root_lang = None if context.sutta: context.root_lang = context.sutta.lang elif context.division: context.root_lang = context.division.collection.lang elif context.subdivision: context.root_lang = context.subdivision.division.collection.lang context.text_refs = imm.get_text_refs(context.uid) nextprev = imm.get_next_prev(uid=self.uid, lang_uid=self.lang_code) context.next_data = nextprev['next'] context.prev_data = nextprev['prev'] context.exports['uid'] = self.uid context.exports['lang'] = self.lang_code
def default(*args, **kwargs): """Parse the path arguments and call the correct view function. We use the following (fairly flat) URL structure: <static_page> : static page, e.g., help <division_uid> : division view or subdivision headings view (depending on if the division supports full division view) <division_uid>/full : full division view (for those divisions that support it) <subdivision_uid> : subdivision view <sutta_uid> : sutta parallel view <text_sutta_uid>/<lang> : sutta text view <text_translation_uid>/<lang> : translation text view TODO: Collections? """ imm = scimm.imm() if args[0] == 'panel': max_age = 86400 * 7 cherrypy.response.headers['cache-control'] = 'public, max-age={}'.format(max_age) return GenericView('panel', {}).render() full = len(args) == 2 and args[1] == 'full' if args[-1] == 'discussion': uid = args[-2] lang_code = None if len(args) == 2 else args[-3] result = TextDiscussionView(uid, lang_code, embed=kwargs.get('embed')).render() if result: # Permit these to be cached for 1 hour (note, we make # sure we have a valid result first!) cherrypy.response.headers['cache-control'] = 'public, max-age=3600' return result if len(args) <= 2: if args[0] in imm.languages: div_uid = None if len(args) == 1 else args[1] if div_uid is None or (div_uid in imm.divisions or div_uid in imm.subdivisions) and not imm.tim.get(lang_uid=args[0], uid=div_uid): return LanguageView(lang=args[0], div_uid=div_uid).render() if len(args) == 1 or full: uid = args[0] # Static Pages if uid in STATIC_PAGES: return InfoView('static/{}'.format(uid)).render() if uid in imm.pitakas: return PitakaView(imm.pitakas[uid]).render() if uid == 'uids': return UidsView().render() uid = uid.replace('_', '#') if regex.match(r'it\d+', uid): new_url = '/{}'.format(uid.replace('it', 'iti')) raise cherrypy.HTTPRedirect(new_url, 301) # Divisions division = imm.divisions.get(uid) if division: if division.collection.pitaka.always_full: if len(division.subdivisions) > 1: if division.collection.pitaka.always_full: full = True if division.has_subdivisions(): if full: return DivisionView(division).render() else: return SubdivisionHeadingsView(division).render() elif not full: return DivisionView(division).render() # Subdivisions subdivision = imm.subdivisions.get(uid) if subdivision: return SubdivisionView(subdivision).render() # Sutta Parallels sutta = imm.suttas.get(uid) if sutta: return ParallelView(sutta).render() elif len(args) >= 2: if args[1] == 'citation.txt': # Citation cherrypy.response.headers['Content-Type'] = "text/plain" sutta = imm.suttas.get(args[0]) if sutta: return SuttaCitationView(sutta).render() else: raise cherrypy.NotFound() # Sutta or Translation Texts # New style urls have the language code first then the uid lang_code = args[0] uid = args[1] redirect = False bookmark = None if not imm.text_exists(uid, lang_code): if imm.text_exists(lang_code, uid): redirect = True uid, lang_code = lang_code, uid elif lang_code == 'zh' and imm.text_exists(uid, 'lzh'): redirect = True lang_code = 'lzh' elif uid.startswith('it') and imm.text_exists(uid.replace('it', 'iti'), lang_code): redirect = True uid = uid.replace('it', 'iti') else: raise cherrypy.NotFound() textinfo = imm.tim.get(uid, lang_code) if textinfo.file_uid != textinfo.uid: redirect = True uid = textinfo.file_uid if textinfo.bookmark: bookmark = textinfo.bookmark else: bookmark = textinfo.uid if redirect: # This is an old-style url, redirect to new-style url. if len(args) == 2: new_url = '/{}/{}'.format(lang_code, uid) else: new_url = '/{}/{}/{}'.format(lang_code, uid, args[2]) if bookmark: new_url += '#' + bookmark # Don't be transparent, we want to keep things canonical # and also, use 301. This is a permament change. raise cherrypy.HTTPRedirect(new_url, 301) sutta = imm.suttas.get(uid) lang = imm.languages[lang_code] if len(args) == 3 and 'embed' in cherrypy.request.params: return TextSelectionView(uid, lang_code, args[2]).render() canonical = False if len(args) == 3 else True if 'raw' in kwargs: return TextRawView(uid, lang_code).render() if 'edit' in kwargs: if sc.config.app['editor']: return EditView(sutta, lang_code, canonical).render() if sutta: return SuttaView(sutta, lang, canonical).render() else: return TextView(uid, lang_code, canonical).render() raise cherrypy.NotFound()