def get_active_lists_in_random(limit=20, preload=True): f = cache.memcache_memoize(_get_active_lists_in_random, key_prefix="lists.get_active_lists_in_random", timeout=0) lists = f(limit=limit, preload=preload) # convert rawdata into models. return [web.ctx.site.new(xlist['key'], xlist) for xlist in lists]
def cached_work_authors_and_subjects(work_id): try: return cache.memcache_memoize( get_work_authors_and_related_subjects, 'works_authors_and_subjects', timeout=dateutil.HALF_DAY_SECS)(work_id) except AttributeError: return {'authors': [], 'subject': []}
def get_cached_featured_subjects(): return cache.memcache_memoize( get_featured_subjects, f"home.featured_subjects.{web.ctx.lang}", timeout=dateutil.HOUR_SECS, prethread=caching_prethread(), )()
def square_memoize(self): def square(x): return x * x m = cache.memcache_memoize(square, key_prefix="square") m._memcache = mock_memcache.Client([]) return m
def get_cached_homepage(): five_minutes = 5 * dateutil.MINUTE_SECS lang = web.ctx.lang pd = web.cookies().get('pd', False) key = "home.homepage." + lang if pd: key += '.pd' # Because of caching, memcache will call `get_homepage` on another thread! So we # need a way to carry some information to that computation on the other thread. # We do that by using a python closure. The outer function is executed on the main # thread, so all the web.* stuff is correct. The inner function is executed on the # other thread, so all the web.* stuff will be dummy. def prethread(): # web.ctx.lang is undefined on the new thread, so need to transfer it over lang = web.ctx.lang def main(): # Leaving this in since this is a bit strange, but you can see it clearly # in action with this debug line: # web.debug(f'XXXXXXXXXXX web.ctx.lang={web.ctx.get("lang")}; {lang=}') delegate.fakeload() web.ctx.lang = lang return main return cache.memcache_memoize(get_homepage, key, timeout=five_minutes, prethread=prethread())()
def generic_carousel(query=None, subject=None, work_id=None, _type=None, sorts=None, limit=None, timeout=None): memcache_key = 'home.ia_carousel_books' cached_ia_carousel_books = cache.memcache_memoize( get_ia_carousel_books, memcache_key, timeout=timeout or cache.DEFAULT_CACHE_LIFETIME) books = cached_ia_carousel_books(query=query, subject=subject, work_id=work_id, _type=_type, sorts=sorts, limit=limit) if not books: books = cached_ia_carousel_books.update(query=query, subject=subject, work_id=work_id, _type=_type, sorts=sorts, limit=limit)[0] return storify(books) if books else books
def get_cached_recently_modified_lists(limit, offset=0): f = cache.memcache_memoize( _get_recently_modified_lists, key_prefix="lists.get_recently_modified_lists", timeout=0, ) # dateutil.HALF_HOUR_SECS) return f(limit, offset=offset)
def cached_work_authors_and_subjects(work_id): try: return cache.memcache_memoize(get_work_authors_and_related_subjects, 'works_authors_and_subjects', timeout=dateutil.HALF_DAY_SECS)(work_id) except AttributeError: return {'authors': [], 'subject': []}
def get_cached_relatedcarousels_component(*args, **kwargs): memoized_get_component_metadata = cache.memcache_memoize( _get_relatedcarousels_component, "book.bookspage.component.relatedcarousels", timeout=dateutil.HALF_DAY_SECS) return (memoized_get_component_metadata(*args, **kwargs) or memoized_get_component_metadata.update(*args, **kwargs)[0])
def get_active_lists_in_random(limit=20, preload=True): f = cache.memcache_memoize( _get_active_lists_in_random, key_prefix="lists.get_active_lists_in_random", timeout=0) lists = f(limit=limit, preload=preload) # convert rawdata into models. return [web.ctx.site.new(xlist['key'], xlist) for xlist in lists]
def rewrite_list_query(q, page, offset, limit): """Takes a solr query. If it doesn't contain a /lists/ key, then return the query, unchanged, exactly as it entered the function. If it does contain a lists key, then use the pagination information to fetch the right block of keys from the lists_editions and lists_works API and then feed these editions resulting work keys into solr with the form key:(OL123W, OL234W). This way, we can use the solr API to fetch list works and render them in carousels in the right format. """ def cached_get_list_book_keys(key, offset, limit): # make cacheable if 'env' not in web.ctx: delegate.fakeload() lst = web.ctx.site.get(key) return lst.get_book_keys(offset=offset, limit=limit) if '/lists/' in q: # we're making an assumption that q is just a list key book_keys = cache.memcache_memoize(cached_get_list_book_keys, "search.list_books_query", timeout=5 * 60)(q, offset, limit) q = f"key:({' OR '.join(book_keys)})" # We've applied the offset to fetching get_list_editions to # produce the right set of discrete work IDs. We don't want # it applied to paginate our resulting solr query. offset = 0 page = 1 return q, page, offset, limit
def get_cached_homepage(): five_minutes = 5 * dateutil.MINUTE_SECS lang = web.ctx.get("lang", "en") pd = web.cookies().get('pd', False) key = "home.homepage." + lang if pd: key += '.pd' return cache.memcache_memoize(get_homepage, key, timeout=five_minutes)()
def generic_carousel(query=None, subject=None, work_id=None, _type=None, sorts=None, limit=None, timeout=None): memcache_key = 'home.ia_carousel_books' cached_ia_carousel_books = cache.memcache_memoize( get_ia_carousel_books, memcache_key, timeout=timeout or cache.DEFAULT_CACHE_LIFETIME) books = cached_ia_carousel_books( query=query, subject=subject, work_id=work_id, _type=_type, sorts=sorts, limit=limit) return storify(books)
def test_encode_args(self): m = cache.memcache_memoize(None, key_prefix="foo") assert m.encode_args([]) == '' assert m.encode_args(["a"]) == '"a"' assert m.encode_args([1]) == '1' assert m.encode_args(["a", 1]) == '"a",1' assert m.encode_args([{"a": 1}]) == '{"a":1}' assert m.encode_args([["a", 1]]) == '["a",1]'
def test_random_string(self): m = cache.memcache_memoize(None, "foo") assert m._random_string(0) == "" s1 = m._random_string(1) assert isinstance(s1, str) assert len(s1) == 1 s10 = m._random_string(10) assert isinstance(s10, str) assert len(s10) == 10
def get_cached_homepage(): five_minutes = 5 * dateutil.MINUTE_SECS lang = web.ctx.lang pd = web.cookies().get('pd', False) key = "home.homepage." + lang if pd: key += '.pd' return cache.memcache_memoize( get_homepage, key, timeout=five_minutes, prethread=caching_prethread() )()
def cached_get_amazon_metadata(*args, **kwargs): """If the cached data is `None`, likely a 503 throttling occurred on Amazon's side. Try again to fetch the value instead of using the cached value. It may 503 again, in which case the next access of this page will trigger another re-cache. If the amazon API call succeeds but the book has no price data, then {"price": None} will be cached as to not trigger a re-cache (only the value `None` will cause re-cache) """ # fetch/compose a cache controller obj for # "upstream.code._get_amazon_metadata" memoized_get_amazon_metadata = cache.memcache_memoize( _get_amazon_metadata, "upstream.code._get_amazon_metadata", timeout=dateutil.WEEK_SECS) # fetch cached value from this controller result = memoized_get_amazon_metadata(*args, **kwargs) if result is None: # recache / update this controller's cached value # (corresponding to these input args) result = memoized_get_amazon_metadata.update(*args, **kwargs)[0] return result
def get_recently_modified_lists(limit, offset=0): """Returns the most recently modified lists as list of dictionaries. This function is memoized for better performance. """ # this function is memozied with background=True option. # web.ctx must be initialized as it won't be avaiable to the background thread. if 'env' not in web.ctx: delegate.fakeload() keys = web.ctx.site.things({"type": "/type/list", "sort": "-last_modified", "limit": limit, "offset": offset}) lists = web.ctx.site.get_many(keys) return [lst.dict() for lst in lists] get_recently_modified_lists = cache.memcache_memoize(get_recently_modified_lists, key_prefix="get_recently_modified_lists", timeout=5*60) def _preload_lists(lists): """Preloads all referenced documents for each list. List can be either a dict of a model object. """ keys = set() for xlist in lists: if not isinstance(xlist, dict): xlist = xlist.dict() owner = xlist['key'].rsplit("/lists/", 1)[0] keys.add(owner) for seed in xlist.get("seeds", []):
return { 'url': product_url[0] if product_url else None, 'price': price, 'qlt': qlt } return result except urllib2.HTTPError as e: try: response = e.read() except simplejson.decoder.JSONDecodeError: return {'error': e.read(), 'code': e.code} return simplejson.loads(response) cached_get_betterworldbooks_metadata = cache.memcache_memoize( _get_betterworldbooks_metadata, "upstream.code._get_betterworldbooks_metadata", timeout=dateutil.HALF_DAY_SECS) class DynamicDocument: """Dynamic document is created by concatinating various rawtext documents in the DB. Used to generate combined js/css using multiple js/css files in the system. """ def __init__(self, root): self.root = web.rstrips(root, '/') self.docs = None self._text = None self.last_modified = None def update(self): keys = web.ctx.site.things({'type': '/type/rawtext', 'key~': self.root + '/*'}) docs = sorted(web.ctx.site.get_many(keys), key=lambda doc: doc.key) if docs:
value='false', limit=limit) identifiers = [ doc['identifier'] for k, doc in items if 'identifier' in doc ] keys = web.ctx.site.things({"type": "/type/edition", "ocaid": identifiers}) books = web.ctx.site.get_many(keys) return [ format_book_data(book) for book in books if book.type.key == '/type/edition' ] # cache the results of get_returncart in memcache for 60 sec get_returncart = cache.memcache_memoize(get_returncart, "home.get_returncart", timeout=60) @public def readonline_carousel(cssid='classics_carousel', pixel="CarouselClassics"): """Return template code for books pulled from search engine. TODO: If problems, use stock list. """ try: data = random_ebooks() if len(data) > 120: data = random.sample(data, 120) return render_template("books/carousel", storify(data), id=cssid,
def get_cached_homepage(): five_minutes = 5 * dateutil.MINUTE_SECS return cache.memcache_memoize(get_homepage, "home.homepage", timeout=five_minutes)()
query='has_fulltext:true -public_scan_b:false', rows=limit, sort=sort, fields=[ 'has_fulltext', 'key', 'ia', "title", "cover_edition_key", "author_key", "author_name", ]) return [format_work_data(doc) for doc in result.get('docs', []) if doc.get('ia')] # cache the results of random_ebooks in memcache for 15 minutes random_ebooks = cache.memcache_memoize(random_ebooks, "home.random_ebooks", timeout=15*60) def format_list_editions(key): """Formats the editions of a list suitable for display in carousel. """ if 'env' not in web.ctx: delegate.fakeload() seed_list = web.ctx.site.get(key) if not seed_list: return [] editions = {} for seed in seed_list.seeds: if not isinstance(seed, basestring): if seed.type.key == "/type/edition":
'ia', "title", "cover_edition_key", "author_key", "author_name", ]) return [ format_work_data(doc) for doc in result.get('docs', []) if doc.get('ia') ] # cache the results of random_ebooks in memcache for 15 minutes random_ebooks = cache.memcache_memoize(random_ebooks, "home.random_ebooks", timeout=15 * 60) def format_list_editions(key): """Formats the editions of a list suitable for display in carousel. """ if 'env' not in web.ctx: delegate.fakeload() seed_list = web.ctx.site.get(key) if not seed_list: return [] editions = {} for seed in seed_list.seeds:
def cached_work_authors_and_subjects(work_id): return cache.memcache_memoize( get_work_authors_and_related_subjects, 'works_authors_and_subjects', timeout=dateutil.HALF_DAY_SECS)(work_id)
def get_cached_homepage(): five_minutes = 5 * dateutil.MINUTE_SECS lang = web.ctx.get("lang", "en") return cache.memcache_memoize( get_homepage, "home.homepage." + lang, timeout=five_minutes)()
def get_cached_featured_subjects(): return cache.memcache_memoize( get_featured_subjects, "home.featured_subjects", timeout=dateutil.HOUR_SECS)()
def get_cached_sponsorable_editions(): return storify(cache.memcache_memoize( get_cachable_sponsorable_editions, "books.sponsorable_editions", timeout=dateutil.HOUR_SECS)())
delegate.fakeload() site = site or web.ctx.site keys = site.things(query={ "type": "/type/library", "limit": 1000, "status": "approved" }) libraries = site.get_many(sorted(keys)) return [lib.dict() for lib in libraries] # cache the result for an hour in memcache _get_libraries_memoized = cache.memcache_memoize(_get_libraries, "inlibrary._get_libraries", timeout=60 * 60) def _get_default_library(): """Returns the default library when the IP doesn't fall in any of the registered libraries. This is used to enable lending world-wide by making everyone else part of "Open Library of Richmond". """ libraries = _get_libraries_memoized() d = dict((lib['key'], lib) for lib in libraries) return d.get("/libraries/openlibrary_of_richmond") @cache.memoize(engine="memcache", key=lambda: "inlibrary.libraries-hash") def _get_libraries_hash():
def get_cached_featured_subjects(): return cache.memcache_memoize( get_featured_subjects, "home.featured_subjects", timeout=cache.HOUR)()
random.shuffle(data) data = data[:limit] return render_template("books/carousel", storify(data), id="returncart_carousel") def get_returncart(limit): if 'env' not in web.ctx: delegate.fakeload() items = web.ctx.site.store.items(type='ebook', name='borrowed', value='false', limit=limit) identifiers = [doc['identifier'] for k, doc in items if 'identifier' in doc] keys = web.ctx.site.things({"type": "/type/edition", "ocaid": identifiers}) books = web.ctx.site.get_many(keys) return [format_book_data(book) for book in books if book.type.key == '/type/edition'] # cache the results of get_returncart in memcache for 60 sec get_returncart = cache.memcache_memoize(get_returncart, "home.get_returncart", timeout=60) @public def readonline_carousel(id="read-carousel"): try: data = random_ebooks() if len(data) > 120: data = random.sample(data, 120) return render_template("books/carousel", storify(data), id=id) except Exception: logger.error("Failed to compute data for readonline_carousel", exc_info=True) return None def random_ebooks(limit=2000): solr = search.get_works_solr() sort = "edition_count desc"
except Exception: return {'price': ''} used = product._safe_get_element_text('OfferSummary.LowestUsedPrice.Amount') new = product._safe_get_element_text('OfferSummary.LowestNewPrice.Amount') price, qlt = (None, None) if used and new: price, qlt = (used, 'used') if int(used) < int(new) else (new, 'new') elif used or new: price, qlt = (used, 'used') if used else (new, 'new') return { 'price': "$%s (%s)" % ('{:00,.2f}'.format(int(price)/100.), qlt) if price and qlt else '' } cached_get_amazon_metadata = cache.memcache_memoize( _get_amazon_metadata, "upstream.code._get_amazon_metadata", timeout=HALF_DAY) @public def get_betterworldbooks_metadata(isbn): try: isbn = normalize_isbn(isbn) if isbn: return _get_betterworldbooks_metadata(isbn) except Exception: return {} def _get_betterworldbooks_metadata(isbn): url = BETTERWORLDBOOKS_API_URL + isbn try: req = urllib2.Request(url) f = urllib2.urlopen(req)
def get_cached_featured_subjects(): return cache.memcache_memoize(get_featured_subjects, "home.featured_subjects", timeout=cache.HOUR)()
def cached_work_authors_and_subjects(work_id): return cache.memcache_memoize(get_work_authors_and_related_subjects, 'works_authors_and_subjects', timeout=CACHE_WORKS_DURATION)(work_id)
price = _price qlt = 'new' market_price = ('$' + market_price[0]) if market_price else None return betterworldbooks_fmt(isbn, qlt, price, market_price) def betterworldbooks_fmt(isbn, qlt=None, price=None, market_price=None): """Defines a standard interface for returning bwb price info :param str isbn: :param str qlt: Quality of the book, e.g. "new", "used" :param str price: Price of the book as a decimal str, e.g. "4.28" :rtype: dict """ price_fmt = "$%s (%s)" % (price, qlt) if price and qlt else None return { 'url': BWB_AFFILIATE_LINK % isbn, 'isbn': isbn, 'market_price': market_price, 'price': price_fmt, 'price_amt': price, 'qlt': qlt } cached_get_betterworldbooks_metadata = cache.memcache_memoize( _get_betterworldbooks_metadata, "upstream.code._get_betterworldbooks_metadata", timeout=dateutil.HALF_DAY_SECS)
def get_cached_recently_modified_lists(limit, offset=0): f = cache.memcache_memoize(_get_recently_modified_lists, key_prefix="lists.get_recently_modified_lists", timeout=0) # dateutil.HALF_HOUR_SECS) return f(limit, offset=offset)
def test_generate_key_prefix(self): def foo(): pass m = cache.memcache_memoize(foo) assert m.key_prefix[:4] == "foo_"
# Handle error gracefully. logging.getLogger("openlibrary").error("Failed to fetch blog feeds", exc_info=True) return [] finally: stats.end() def parse_item(item): pubdate = datetime.datetime.strptime(item.find("pubDate").text, '%a, %d %b %Y %H:%M:%S +0000').isoformat() return dict( title=item.find("title").text, link=item.find("link").text, pubdate=pubdate ) return [parse_item(item) for item in tree.findall("//item")] _get_blog_feeds = cache.memcache_memoize(_get_blog_feeds, key_prefix="upstream.get_blog_feeds", timeout=5*60) @public def get_blog_feeds(): def process(post): post = web.storage(post) post.pubdate = parse_datetime(post.pubdate) return post return [process(post) for post in _get_blog_feeds()] class Request: path = property(lambda self: web.ctx.path) home = property(lambda self: web.ctx.home) domain = property(lambda self: web.ctx.host) @property
Fetches metadata by querying the archive.org metadata API, without local cacheing. :param str itemid: :param bool cache: if false, requests uncached metadata from archive.org :param bool only_metadata: whether to get the metadata without any processing :rtype: dict """ url = '%s/metadata/%s' % (IA_BASE_URL, web.safestr(itemid.strip())) params = {} if cache: params['dontcache'] = 1 full_json = get_api_response(url, params) return extract_item_metadata(full_json) if only_metadata else full_json get_metadata = cache.memcache_memoize(get_metadata_direct, key_prefix='ia.get_metadata', timeout=5 * cache.MINUTE_SECS) def extract_item_metadata(item_json): metadata = process_metadata_dict(item_json.get('metadata', {})) if metadata: # if any of the files is access restricted, consider it as # an access-restricted item. files = item_json.get('files', []) metadata['access-restricted'] = any( f.get('private') == 'true' for f in files) # remember the filenames to construct download links metadata['_filenames'] = [f['name'] for f in files] return metadata
finally: stats.end() def parse_item(item): pubdate = datetime.datetime.strptime( item.find("pubDate").text, '%a, %d %b %Y %H:%M:%S +0000').isoformat() return dict(title=item.find("title").text, link=item.find("link").text, pubdate=pubdate) return [parse_item(item) for item in tree.findall("//item")] _get_blog_feeds = cache.memcache_memoize(_get_blog_feeds, key_prefix="upstream.get_blog_feeds", timeout=5 * 60) def get_donation_include(): return urllib2.urlopen("https://archive.org/includes/donate.php").read() get_donation_include = cache.memcache_memoize( get_donation_include, key_prefix="upstream.get_donation_include", timeout=60) @public def get_blog_feeds():
def get_recently_modified_lists(limit, offset=0): """Returns the most recently modified lists as list of dictionaries. This function is memoized for better performance. """ # this function is memozied with background=True option. # web.ctx must be initialized as it won't be avaiable to the background thread. if 'env' not in web.ctx: delegate.fakeload() keys = web.ctx.site.things({"type": "/type/list", "sort": "-last_modified", "limit": limit, "offset": offset}) lists = web.ctx.site.get_many(keys) return [list.dict() for list in lists] get_recently_modified_lists = cache.memcache_memoize(get_recently_modified_lists, key_prefix="get_recently_modified_lists", timeout=5*60) def _preload_lists(lists): """Preloads all referenced documents for each list. List can be either a dict of a model object. """ keys = set() for xlist in lists: if not isinstance(xlist, dict): xlist = xlist.dict() owner = xlist['key'].rsplit("/lists/", 1)[0] keys.add(owner) for seed in xlist.get("seeds", []):
finally: stats.end() def parse_item(item): pubdate = datetime.datetime.strptime( item.find("pubDate").text, '%a, %d %b %Y %H:%M:%S +0000').isoformat() return dict(title=item.find("title").text, link=item.find("link").text, pubdate=pubdate) return [parse_item(item) for item in tree.findall("//item")] _get_blog_feeds = cache.memcache_memoize(_get_blog_feeds, key_prefix="upstream.get_blog_feeds", timeout=5 * 60) def get_donation_include(include): web_input = web.input() # The following allows archive.org staff to test banners without # needing to reload openlibrary services: dev_host = web_input.pop("dev_host", "") # e.g. `www-user` if dev_host and re.match('^[a-zA-Z0-9-.]+$', dev_host): dev_host += "." # e.g. `www-user.` url_banner_source = "https://%sarchive.org/includes/donate.php" % dev_host param = '?platform=ol' if 'ymd' in web_input: param += '&ymd=' + web_input.ymd
# Handle error gracefully. logging.getLogger("openlibrary").error("Failed to fetch blog feeds", exc_info=True) return [] finally: stats.end() def parse_item(item): pubdate = datetime.datetime.strptime(item.find("pubDate").text, '%a, %d %b %Y %H:%M:%S +0000').isoformat() return dict( title=item.find("title").text, link=item.find("link").text, pubdate=pubdate ) return [parse_item(item) for item in tree.findall("//item")] _get_blog_feeds = cache.memcache_memoize(_get_blog_feeds, key_prefix="upstream.get_blog_feeds", timeout=5*60) def get_donation_include(): return urllib2.urlopen("https://archive.org/includes/donate.php").read() get_donation_include = cache.memcache_memoize(get_donation_include, key_prefix="upstream.get_donation_include", timeout=60) @public def get_blog_feeds(): def process(post): post = web.storage(post) post.pubdate = parse_datetime(post.pubdate) return post return [process(post) for post in _get_blog_feeds()] class Request:
# web.ctx must be initialized as it won't be avaiable to the background thread. if 'env' not in web.ctx: delegate.fakeload() subjects = {} FEATURED_SUBJECTS = [ 'art', 'science_fiction', 'fantasy', 'biographies', 'recipes', 'romance', 'textbooks', 'children', 'history', 'medicine', 'religion', 'mystery_and_detective_stories', 'plays', 'music', 'science' ] for subject in FEATURED_SUBJECTS: subjects[subject] = get_subject('/subjects/' + subject, sort='edition_count') return subjects # cache the results in memcache for 1 hour get_featured_subjects = cache.memcache_memoize( get_featured_subjects, "get_featured_subjects", timeout=ONE_HOUR) class book_availability(delegate.page): path = "/availability/v2" def GET(self): i = web.input(type='', ids='') id_type = i.type ids = i.ids.split(',') result = self.get_book_availability(id_type, ids) return delegate.RawText(simplejson.dumps(result), content_type="application/json") def POST(self): i = web.input(type='') j = simplejson.loads(web.data())
if 'env' not in web.ctx: delegate.fakeload() versions = web.ctx.site.versions(query) for v in versions: v.created = v.created.isoformat() v.author = v.author and v.author.key # XXX-Anand: hack to avoid too big data to be stored in memcache. # v.changes is not used and it contrinutes to memcache bloat in a big way. v.changes = '[]' return versions _get_changes_v1_raw = cache.memcache_memoize(_get_changes_v1_raw, key_prefix="upstream._get_changes_v1_raw", timeout=10*60) def get_changes_v1(query, revision=None): # uses the cached function _get_changes_v1_raw to get the raw data # and processes to before returning. def process(v): v = web.storage(v) v.created = parse_datetime(v.created) v.author = v.author and web.ctx.site.get(v.author, lazy=True) return v return [process(v) for v in _get_changes_v1_raw(query, revision)] def _get_changes_v2_raw(query, revision=None): """Returns the raw recentchanges response.