def location_by_ips(ips): ips, is_single = tup(ips, ret_is_single=True) location_by_ip = sgm(g.cache, ips, miss_fn=_location_by_ips, prefix="location_by_ip", time=GEOIP_CACHE_TIME) if is_single and location_by_ip: return location_by_ip[ips[0]] else: return location_by_ip
def _byID(cls, ids): ids, is_single = tup(ids, True) if not len(ids): if is_single: raise InvariantException("whastis?") else: return {} # all keys must be strings or directly convertable to strings assert all(isinstance(_id, basestring) and str(_id) for _id in ids) def lookup(l_ids): rows = cls.cf.multiget(l_ids, column_count=max_column_count) l_ret = {} for t_id, row in rows.iteritems(): t = cls._from_serialized_columns(t_id, row) l_ret[t._id] = t return l_ret ret = cache.sgm(thing_cache, ids, lookup, prefix=cls._cache_prefix()) if is_single and not ret: raise NotFound("<%s %r>" % (cls.__name__, ids[0])) elif is_single: assert len(ret) == 1 return ret.values()[0] return ret
def get_live_promotions(sr_names): sanitized_names = [SPECIAL_NAMES.get(name, name) for name in sr_names] promos_by_sanitized_name = sgm( g.cache, sanitized_names, miss_fn=_get_live_promotions, prefix="live_promotions", time=60, stale=True ) promos_by_srname = {REVERSED_NAMES.get(name, name): val for name, val in promos_by_sanitized_name.iteritems()} return itertools.chain.from_iterable(promos_by_srname.itervalues())
def _byID(cls, ids, data=False, return_dict=True, extra_props=None): ids, single = tup(ids, True) prefix = thing_prefix(cls.__name__) def items_db(ids): items = cls._get_item(cls._type_id, ids) for i in items.keys(): items[i] = cls._build(i, items[i]) return items bases = sgm(cache, ids, items_db, prefix) #check to see if we found everything we asked for if any(i not in bases for i in ids): missing = [i for i in ids if i not in bases] raise NotFound, '%s %s' % (cls.__name__, missing) if data: need = [v for v in bases.itervalues() if not v._loaded] if need: cls._load_multi(need) #e.g. add the sort prop if extra_props: for _id, props in extra_props.iteritems(): for k, v in props.iteritems(): bases[_id].__setattr__(k, v, False) if single: return bases[ids[0]] elif return_dict: return bases else: return filter(None, (bases.get(i) for i in ids))
def _byID(cls, ids, data=False, return_dict=True, extra_props=None, stale=False, ignore_missing=False): ids, single = tup(ids, True) prefix = thing_prefix(cls.__name__) if not all(x <= tdb.MAX_THING_ID for x in ids): raise NotFound('huge thing_id in %r' % ids) def count_found(ret, still_need): cls._cache.stats.cache_report( hits=len(ret), misses=len(still_need), cache_name='sgm.%s' % cls.__name__) if not cls._cache.stats: count_found = None def items_db(ids): items = cls._get_item(cls._type_id, ids) for i in items.keys(): items[i] = cls._build(i, items[i]) return items bases = sgm(cls._cache, ids, items_db, prefix, stale=stale, found_fn=count_found) # Check to see if we found everything we asked for missing = [] for i in ids: if i not in bases: missing.append(i) elif bases[i] and bases[i]._id != i: g.log.error("thing.py: Doppleganger on byID: %s got %s for %s" % (cls.__name__, bases[i]._id, i)) bases[i] = items_db([i]).values()[0] bases[i]._cache_myself() if missing and not ignore_missing: raise NotFound, '%s %s' % (cls.__name__, missing) for i in missing: ids.remove(i) if data: need = [] for v in bases.itervalues(): if not v._loaded: need.append(v) if need: cls._load_multi(need) if extra_props: for _id, props in extra_props.iteritems(): for k, v in props.iteritems(): bases[_id].__setattr__(k, v, False) if single: return bases[ids[0]] if ids else None elif return_dict: return bases else: return filter(None, (bases.get(i) for i in ids))
def normalized_hot(sr_ids, obey_age_limit=True, ageweight=None): timer = g.stats.get_timer("normalized_hot") timer.start() if not sr_ids: return [] if ageweight and feature.is_enabled("scaled_normalized_hot"): tuples_by_srid = get_hot_tuples(sr_ids, ageweight=ageweight) else: tuples_by_srid = sgm(g.cache, sr_ids, miss_fn=get_hot_tuples, prefix='normalized_hot', time=g.page_cache_time) if obey_age_limit: cutoff = datetime.now(g.tz) - timedelta(days=g.HOT_PAGE_AGE) oldest = epoch_seconds(cutoff) else: oldest = 0. merged = heapq.merge(*tuples_by_srid.values()) generator = (link_name for ehot, hot, link_name, timestamp in merged if timestamp > oldest) ret = list(itertools.islice(generator, MAX_LINKS)) timer.stop() return ret
def _byID(cls, ids, data=False, return_dict=True, extra_props=None, stale=False): ids, single = tup(ids, True) prefix = thing_prefix(cls.__name__) if not all(x <= tdb.MAX_THING_ID for x in ids): raise NotFound("huge thing_id in %r" % ids) def count_found(ret, still_need): cache.stats.cache_report(hits=len(ret), misses=len(still_need), cache_name="sgm.%s" % cls.__name__) if not cache.stats: count_found = None def items_db(ids): items = cls._get_item(cls._type_id, ids) for i in items.keys(): items[i] = cls._build(i, items[i]) return items bases = sgm(cache, ids, items_db, prefix, stale=stale, found_fn=count_found) # check to see if we found everything we asked for for i in ids: if i not in bases: missing = [i for i in ids if i not in bases] raise NotFound, "%s %s" % (cls.__name__, missing) if bases[i] and bases[i]._id != i: g.log.error("thing.py: Doppleganger on byID: %s got %s for %s" % (cls.__name__, bases[i]._id, i)) bases[i] = items_db([i]).values()[0] bases[i]._cache_myself() if data: need = [] for v in bases.itervalues(): v._asked_for_data = True if not v._loaded: need.append(v) if need: cls._load_multi(need) ### The following is really handy for debugging who's forgetting data=True: # else: # for v in bases.itervalues(): # if v._id in (1, 2, 123): # raise ValueError # e.g. add the sort prop if extra_props: for _id, props in extra_props.iteritems(): for k, v in props.iteritems(): bases[_id].__setattr__(k, v, False) if single: return bases[ids[0]] elif return_dict: return bases else: return filter(None, (bases.get(i) for i in ids))
def _by_name(cls, names, stale=False, _update = False): ''' Usages: 1. Subreddit._by_name('funny') # single sr name Searches for a single subreddit. Returns a single Subreddit object or raises NotFound if the subreddit doesn't exist. 2. Subreddit._by_name(['aww','iama']) # list of sr names Searches for a list of subreddits. Returns a dict mapping srnames to Subreddit objects. Items that were not found are ommitted from the dict. If no items are found, an empty dict is returned. ''' #lower name here so there is only one cache names, single = tup(names, True) to_fetch = {} ret = {} for name in names: ascii_only = str(name.decode("ascii", errors="ignore")) lname = ascii_only.lower() if lname in cls._specials: ret[name] = cls._specials[lname] elif len(lname) > Subreddit.MAX_SRNAME_LENGTH: g.log.debug("Subreddit._by_name() ignoring invalid srname (too long): %s", lname) else: to_fetch[lname] = name if to_fetch: def _fetch(lnames): q = cls._query(lower(cls.c.name) == lnames, cls.c._spam == (True, False), limit = len(lnames), data=True) try: srs = list(q) except UnicodeEncodeError: print "Error looking up SRs %r" % (lnames,) raise return dict((sr.name.lower(), sr._id) for sr in srs) srs = {} srids = sgm(g.cache, to_fetch.keys(), _fetch, prefix='subreddit.byname', stale=stale) if srids: srs = cls._byID(srids.values(), data=True, return_dict=False, stale=stale) for sr in srs: ret[to_fetch[sr.name.lower()]] = sr if ret and single: return ret.values()[0] elif not ret and single: raise NotFound, 'Subreddit %s' % name else: return ret
def organization_by_ips(ips): ips, is_single = tup(ips, ret_is_single=True) organization_by_ip = sgm(g.cache, ips, miss_fn=_organization_by_ips, prefix='organization_by_ip', time=GEOIP_CACHE_TIME) if is_single and organization_by_ip: return organization_by_ip[ips[0]] else: return organization_by_ip
def _fast_query(cls, thing1s, thing2s, name, data=True): """looks up all the relationships between thing1_ids and thing2_ids and caches them""" prefix = thing_prefix(cls.__name__) thing1_dict = dict((t._id, t) for t in tup(thing1s)) thing2_dict = dict((t._id, t) for t in tup(thing2s)) thing1_ids = thing1_dict.keys() thing2_ids = thing2_dict.keys() name = tup(name) pairs = set((x, y, n) for x in thing1_ids for y in thing2_ids for n in name) def items_db(pairs): t1_ids = set() t2_ids = set() names = set() for t1, t2, name in pairs: t1_ids.add(t1) t2_ids.add(t2) names.add(name) q = cls._query(cls.c._thing1_id == t1_ids, cls.c._thing2_id == t2_ids, cls.c._name == names, eager_load = True, data = data) rel_ids = {} for rel in q: #TODO an alternative for multiple #relations with the same keys #l = rel_ids.setdefault((rel._thing1_id, rel._thing2_id), []) #l.append(rel._id) rel_ids[(rel._thing1._id, rel._thing2._id, rel._name)] = rel._id for p in pairs: if p not in rel_ids: rel_ids[p] = None return rel_ids res = sgm(cache, pairs, items_db, prefix) #convert the keys back into objects #we can assume the rels will be in the cache and just call #_byID lots res_obj = {} for k, rid in res.iteritems(): obj_key = (thing1_dict[k[0]], thing2_dict[k[1]], k[2]) res_obj[obj_key] = cls._byID(rid, data=data) if rid else None return res_obj
def normalized_rising(sr_ids): if not sr_ids: return [] tuples_by_srid = sgm(g.cache, sr_ids, miss_fn=get_rising_tuples, prefix='normalized_rising', time=g.page_cache_time) merged = heapq.merge(*tuples_by_srid.values()) return [link_name for norm_score, score, link_name in merged]
def _fast_query(cls, thing1s, thing2s, name, data=True, eager_load=True, thing_data=False): """looks up all the relationships between thing1_ids and thing2_ids and caches them""" prefix = thing_prefix(cls.__name__) thing1_dict = dict((t._id, t) for t in tup(thing1s)) thing2_dict = dict((t._id, t) for t in tup(thing2s)) thing1_ids = thing1_dict.keys() thing2_ids = thing2_dict.keys() name = tup(name) # permute all of the pairs pairs = set((x, y, n) for x in thing1_ids for y in thing2_ids for n in name) def lookup_rel_ids(pairs): rel_ids = {} t1_ids = set() t2_ids = set() names = set() for t1, t2, name in pairs: t1_ids.add(t1) t2_ids.add(t2) names.add(name) if t1_ids and t2_ids and names: q = cls._query(cls.c._thing1_id == t1_ids, cls.c._thing2_id == t2_ids, cls.c._name == names) else: q = [] for rel in q: rel_ids[(rel._thing1_id, rel._thing2_id, rel._name)] = rel._id for p in pairs: if p not in rel_ids: rel_ids[p] = None return rel_ids # get the relation ids from the cache or query the db res = sgm(cls._cache, pairs, lookup_rel_ids, prefix) # get the relation objects rel_ids = {rel_id for rel_id in res.itervalues() if rel_id is not None} rels = cls._byID_rel(rel_ids, data=data, eager_load=eager_load, thing_data=thing_data) res_obj = {} for (thing1_id, thing2_id, name), rel_id in res.iteritems(): pair = (thing1_dict[thing1_id], thing2_dict[thing2_id], name) rel = rels[rel_id] if rel_id is not None else None res_obj[pair] = rel return res_obj
def location_by_ips(ips): ips, is_single = tup(ips, ret_is_single=True) location_by_ip = sgm(g.cache, ips, miss_fn=_location_by_ips, prefix='location_by_ip', time=GEOIP_CACHE_TIME) if is_single and location_by_ip: return location_by_ip[ips[0]] else: return location_by_ip
def get(cls, sr_ids): """Return a dictionary of sr_id -> list of ads for each of sr_ids""" # Mangling: Caller convention is to use empty string for FRONT_PAGE sr_ids = [(sr_id or cls.FRONT_PAGE) for sr_id in sr_ids] adweights = sgm(cls.cache, sr_ids, cls._load_multi, prefix=cls.cache_prefix, stale=True) results = {sr_id: cls.from_columns(adweights[sr_id]) for sr_id in adweights} if cls.FRONT_PAGE in results: results[''] = results.pop(cls.FRONT_PAGE) return results
def _byID(cls, ids, data=False, return_dict=True, extra_props=None): ids, single = tup(ids, True) prefix = thing_prefix(cls.__name__) if not all(x <= tdb.MAX_THING_ID for x in ids): raise NotFound('huge thing_id in %r' % ids) def items_db(ids): items = cls._get_item(cls._type_id, ids) for i in items.keys(): items[i] = cls._build(i, items[i]) return items bases = sgm(cache, ids, items_db, prefix) #check to see if we found everything we asked for for i in ids: if i not in bases: missing = [i for i in ids if i not in bases] raise NotFound, '%s %s' % (cls.__name__, missing) if bases[i] and bases[i]._id != i: g.log.error("thing.py: Doppleganger on byID: %s got %s for %s" % (cls.__name__, bases[i]._id, i)) bases[i] = items_db([i]).values()[0] bases[i]._cache_myself() if data: need = [] for v in bases.itervalues(): v._asked_for_data = True if not v._loaded: need.append(v) if need: cls._load_multi(need) ### The following is really handy for debugging who's forgetting data=True: # else: # for v in bases.itervalues(): # if v._id in (1, 2, 123): # raise ValueError #e.g. add the sort prop if extra_props: for _id, props in extra_props.iteritems(): for k, v in props.iteritems(): bases[_id].__setattr__(k, v, False) if single: return bases[ids[0]] elif return_dict: return bases else: return filter(None, (bases.get(i) for i in ids))
def _fast_query_all_names(cls, thing1s, thing2s, data=True): """looks up all the relationships between thing1_ids and thing2_ids and caches them """ prefix = thing_prefix(cls.__name__) thing1_dict = dict((t._id, t) for t in thing1s) thing2_dict = dict((t._id, t) for t in thing2s) thing1_ids = thing1_dict.keys() thing2_ids = thing2_dict.keys() pairs = set((x, y) for x in thing1_ids for y in thing2_ids) def items_db(pairs): t1_ids = set() t2_ids = set() for t1, t2 in pairs: t1_ids.add(t1) t2_ids.add(t2) q = cls._query(cls.c._thing1_id == t1_ids, cls.c._thing2_id == t2_ids, eager_load = True, data = data) rel_ids = {} for rel in q: l = rel_ids.setdefault((rel._thing1_id, rel._thing2_id), []) l.append(rel._id) for p in pairs: if p not in rel_ids: rel_ids[p] = [] return rel_ids res = sgm(cache, pairs, items_db, prefix) #convert the keys back into objects #we can assume the rels will be in the cache and just call #_byID lots res_obj = {} for k, rids in res.iteritems(): for rid in rids: obj_key = (thing1_dict[k[0]], thing2_dict[k[1]]) result = cls._byID(rid, data=data) if rid else None if res_obj.get(obj_key) is None: res_obj[obj_key] = result return res_obj
def location_by_ips(ips): ips, is_single = tup(ips, ret_is_single=True) location_by_ip = sgm( cache=g.gencache, keys=ips, miss_fn=_location_by_ips, prefix='geoip:loc_', time=GEOIP_CACHE_TIME, ) if is_single and location_by_ip: return location_by_ip[ips[0]] else: return location_by_ip
def organization_by_ips(ips): ips, is_single = tup(ips, ret_is_single=True) organization_by_ip = sgm( cache=g.gencache, keys=ips, miss_fn=_organization_by_ips, prefix='geoip:org_', time=GEOIP_CACHE_TIME, ) if is_single and organization_by_ip: return organization_by_ip[ips[0]] else: return organization_by_ip
def get_live_promotions(sr_names): sanitized_names = [SPECIAL_NAMES.get(name, name) for name in sr_names] promos_by_sanitized_name = sgm(g.cache, sanitized_names, miss_fn=_get_live_promotions, prefix='live_promotions', time=60, stale=True) promos_by_srname = { REVERSED_NAMES.get(name, name): val for name, val in promos_by_sanitized_name.iteritems() } return itertools.chain.from_iterable(promos_by_srname.itervalues())
def _by_name(cls, names, stale=False, _update=False): #lower name here so there is only one cache names, single = tup(names, True) to_fetch = {} ret = {} for name in names: lname = name.lower() if lname in cls._specials: ret[name] = cls._specials[lname] else: to_fetch[lname] = name if to_fetch: def _fetch(lnames): q = cls._query(lower(cls.c.name) == lnames, cls.c._spam == (True, False), limit=len(lnames), data=True) try: srs = list(q) except UnicodeEncodeError: print "Error looking up SRs %r" % (lnames, ) raise return dict((sr.name.lower(), sr._id) for sr in srs) srs = {} srids = sgm(g.cache, to_fetch.keys(), _fetch, prefix='subreddit.byname', stale=stale) if srids: srs = cls._byID(srids.values(), data=True, return_dict=False, stale=stale) for sr in srs: ret[to_fetch[sr.name.lower()]] = sr if ret and single: return ret.values()[0] elif not ret and single: raise NotFound, 'Subreddit %s' % name else: return ret
def _by_name(cls, names, _update = False): #lower name here so there is only one cache names, single = tup(names, True) to_fetch = {} ret = {} _specials = dict(friends = Friends, randnsfw = RandomNSFW, random = Random, mod = Mod, contrib = Contrib, all = All) for name in names: lname = name.lower() if lname in _specials: ret[name] = _specials[lname] else: to_fetch[lname] = name if to_fetch: def _fetch(lnames): q = cls._query(lower(cls.c.name) == lnames, cls.c._spam == (True, False), limit = len(lnames), data=True) try: srs = list(q) except UnicodeEncodeError: print "Error looking up SRs %r" % (lnames,) raise return dict((sr.name.lower(), sr._id) for sr in srs) srs = {} srids = sgm(g.cache, to_fetch.keys(), _fetch, prefix='subreddit.byname') if srids: srs = cls._byID(srids.values(), data=True, return_dict=False) for sr in srs: ret[to_fetch[sr.name.lower()]] = sr if ret and single: return ret.values()[0] elif not ret and single: raise NotFound, 'Subreddit %s' % name else: return ret
def moderator_messages(sr_ids): from r2.models import Subreddit def multi_load_tree(sr_ids): srs = Subreddit._byID(sr_ids, return_dict=False) res = {} for sr in srs: trees = subreddit_messages_nocache(sr) if trees: res[sr._id] = trees return res res = sgm(g.permacache, sr_ids, miss_fn=multi_load_tree, prefix=sr_messages_key("")) return sorted(chain(*res.values()), key=tree_sort_fn, reverse=True)
def get(cls, sr_ids): """Return a dictionary of sr_id -> list of ads for each of sr_ids""" # Mangling: Caller convention is to use empty string for FRONT_PAGE sr_ids = [(sr_id or cls.FRONT_PAGE) for sr_id in sr_ids] adweights = sgm(cls.cache, sr_ids, cls._load_multi, prefix=cls.cache_prefix, stale=True) results = { sr_id: cls.from_columns(adweights[sr_id]) for sr_id in adweights } if cls.FRONT_PAGE in results: results[''] = results.pop(cls.FRONT_PAGE) return results
def moderator_messages(sr_ids): from r2.models import Subreddit srs = Subreddit._byID(sr_ids) sr_ids = [sr_id for sr_id, sr in srs.iteritems() if sr.is_moderator_with_perms(c.user, "mail")] def multi_load_tree(sr_ids): res = {} for sr_id in sr_ids: trees = subreddit_messages_nocache(srs[sr_id]) if trees: res[sr_id] = trees return res res = sgm(g.permacache, sr_ids, miss_fn=multi_load_tree, prefix=sr_messages_key("")) return sorted(chain(*res.values()), key=tree_sort_fn, reverse=True)
def moderator_messages(user): from r2.models import Subreddit sr_ids = Subreddit.reverse_moderator_ids(user) def multi_load_tree(sr_ids): srs = Subreddit._byID(sr_ids, return_dict = False) res = {} for sr in srs: trees = subreddit_messages_nocache(sr) if trees: res[sr._id] = trees return res res = sgm(g.permacache, sr_ids, miss_fn = multi_load_tree, prefix = sr_messages_key("")) return sorted(chain(*res.values()), key = tree_sort_fn, reverse = True)
def _byID(cls, ids, data=False, return_dict=True, extra_props=None): ids, single = tup(ids, True) prefix = thing_prefix(cls.__name__) if not all(x <= tdb.MAX_THING_ID for x in ids): raise NotFound('huge thing_id in %r' % ids) def items_db(ids): items = cls._get_item(cls._type_id, ids) for i in items.keys(): items[i] = cls._build(i, items[i]) return items bases = sgm(cache, ids, items_db, prefix) #check to see if we found everything we asked for for i in ids: if i not in bases: missing = [i for i in ids if i not in bases] raise NotFound, '%s %s' % (cls.__name__, missing) if bases[i] and bases[i]._id != i: g.log.error("thing.py: Doppleganger on byID: %s got %s for %s" % (cls.__name__, bases[i]._id, i)) bases[i] = items_db([i]).values()[0] bases[i]._cache_myself() if data: need = [v for v in bases.itervalues() if not v._loaded] if need: cls._load_multi(need) #e.g. add the sort prop if extra_props: for _id, props in extra_props.iteritems(): for k, v in props.iteritems(): bases[_id].__setattr__(k, v, False) if single: return bases[ids[0]] elif return_dict: return bases else: return filter(None, (bases.get(i) for i in ids))
def get_items(self, *a, **kw): """Wrapper around builder's get_items that caches the rendering.""" builder_items = self.builder.get_items(*a, **kw) #render cache #fn to render non-boring items fullnames = {} for i in self.builder.item_iter(builder_items): rs = c.render_style key = i.render_class.cache_key(i) if key: fullnames[key + rs + c.lang] = i def render_items(names): r = {} for i in names: item = fullnames[i] r[i] = item.render() return r rendered_items = sgm(g.rendercache, fullnames, render_items, 'render_', time=g.page_cache_time) #replace the render function for k, v in rendered_items.iteritems(): def make_fn(v): default = c.render_style default_render = fullnames[k].render def r(style=default): if style != c.render_style: return default_render(style=style) return v return r fullnames[k].render = make_fn(v) return builder_items
def moderator_messages(sr_ids): from r2.models import Subreddit srs = Subreddit._byID(sr_ids) sr_ids = [sr_id for sr_id, sr in srs.iteritems() if sr.is_moderator_with_perms(c.user, 'mail')] def multi_load_tree(sr_ids): res = {} for sr_id in sr_ids: trees = subreddit_messages_nocache(srs[sr_id]) if trees: res[sr_id] = trees return res res = sgm(g.permacache, sr_ids, miss_fn = multi_load_tree, prefix = sr_messages_key("")) return sorted(chain(*res.values()), key = tree_sort_fn, reverse = True)
def _byID(cls, ids, data=False, return_dict=True, extra_props=None): ids, single = tup(ids, True) prefix = thing_prefix(cls.__name__) def items_db(ids): items = cls._get_item(cls._type_id, ids) for i in items.keys(): items[i] = cls._build(i, items[i]) #avoid race condition when incrmenting int props (data int #props are set in load_multi) for prop in cls._int_props: keys = dict((i, getattr(item, prop)) for i, item in items.iteritems()) cache.set_multi(keys, prefix + prop + '_' ) return items bases = sgm(cache, ids, items_db, prefix) #check to see if we found everything we asked for if any(i not in bases for i in ids): missing = [i for i in ids if i not in bases] raise NotFound, '%s %s' % (cls.__name__, missing) if data: need = [v for v in bases.itervalues() if not v._loaded] if need: cls._load_multi(need) #e.g. add the sort prop if extra_props: for _id, props in extra_props.iteritems(): for k, v in props.iteritems(): bases[_id].__setattr__(k, v, False) if single: return bases[ids[0]] elif return_dict: return bases else: return filter(None, (bases.get(i) for i in ids))
def _by_name(cls, names, stale=False, _update=False): # lower name here so there is only one cache names, single = tup(names, True) to_fetch = {} ret = {} for name in names: lname = name.lower() if lname in cls._specials: ret[name] = cls._specials[lname] else: to_fetch[lname] = name if to_fetch: def _fetch(lnames): q = cls._query(lower(cls.c.name) == lnames, cls.c._spam == (True, False), limit=len(lnames), data=True) try: srs = list(q) except UnicodeEncodeError: print "Error looking up SRs %r" % (lnames,) raise return dict((sr.name.lower(), sr._id) for sr in srs) srs = {} srids = sgm(g.cache, to_fetch.keys(), _fetch, prefix="subreddit.byname", stale=stale) if srids: srs = cls._byID(srids.values(), data=True, return_dict=False, stale=stale) for sr in srs: ret[to_fetch[sr.name.lower()]] = sr if ret and single: return ret.values()[0] elif not ret and single: raise NotFound, "Subreddit %s" % name else: return ret
def _byID(cls, ids, data=False, return_dict=True, extra_props=None): ids, single = tup(ids, True) prefix = thing_prefix(cls.__name__) def items_db(ids): items = cls._get_item(cls._type_id, ids) for i in items.keys(): items[i] = cls._build(i, items[i]) #avoid race condition when incrmenting int props (data int #props are set in load_multi) for prop in cls._int_props: keys = dict((i, getattr(item, prop)) for i, item in items.iteritems()) cache.set_multi(keys, prefix + prop + '_' ) return items bases = sgm(cache, ids, items_db, prefix) if not bases: raise NotFound, '%s %s' % (cls.__name__, ids) if data: need = [v for v in bases.itervalues() if not v._loaded] if need: cls._load_multi(need) #e.g. add the sort prop if extra_props: for _id, props in extra_props.iteritems(): for k, v in props.iteritems(): bases[_id].__setattr__(k, v, False) if single: return bases[ids[0]] elif return_dict: return bases else: return filter(None, (bases.get(i) for i in ids))
def _byID(cls, ids): ids, is_single = tup(ids, True) if not len(ids): if is_single: raise InvariantException("whastis?") else: return {} # all keys must be strings or directly convertable to strings assert all(isinstance(_id, basestring) and str(_id) for _id in ids) def lookup(l_ids): # TODO: if we get back max_column_count columns for a # given row, check a flag on the class as to whether to # refetch for more of them. This could be important with # large Views, for instance rows = cls._cf.multiget(l_ids, column_count=max_column_count) l_ret = {} for t_id, row in rows.iteritems(): t = cls._from_serialized_columns(t_id, row) l_ret[t._id] = t return l_ret ret = cache.sgm(thing_cache, ids, lookup, prefix=cls._cache_prefix()) if is_single and not ret: raise NotFound("<%s %r>" % (cls.__name__, ids[0])) elif is_single: assert len(ret) == 1 return ret.values()[0] return ret
def get_items(self, *a, **kw): """Wrapper around builder's get_items that caches the rendering.""" builder_items = self.builder.get_items(*a, **kw) #render cache #fn to render non-boring items fullnames = {} for i in self.builder.item_iter(builder_items): rs = c.render_style key = i.cache_key(i) if key: fullnames[key + rs + c.lang] = i def render_items(names): r = {} for i in names: item = fullnames[i] r[i] = item.render() return r rendered_items = sgm(cache, fullnames, render_items, 'render_', time = g.page_cache_time) #replace the render function for k, v in rendered_items.iteritems(): def make_fn(v): default = c.render_style default_render = fullnames[k].render def r(style = default): if style != c.render_style: return default_render(style = style) return v return r fullnames[k].render = make_fn(v) return builder_items
def _fast_query(cls, thing1s, thing2s, name, data=True, eager_load=True, thing_data=False, timestamp_optimize = False): """looks up all the relationships between thing1_ids and thing2_ids and caches them""" prefix = thing_prefix(cls.__name__) thing1_dict = dict((t._id, t) for t in tup(thing1s)) thing2_dict = dict((t._id, t) for t in tup(thing2s)) thing1_ids = thing1_dict.keys() thing2_ids = thing2_dict.keys() name = tup(name) # permute all of the pairs pairs = set((x, y, n) for x in thing1_ids for y in thing2_ids for n in name) def items_db(pairs): rel_ids = {} t1_ids = set() t2_ids = set() names = set() for t1, t2, name in pairs: t1_ids.add(t1) t2_ids.add(t2) names.add(name) if t1_ids and t2_ids and names: q = cls._query(cls.c._thing1_id == t1_ids, cls.c._thing2_id == t2_ids, cls.c._name == names, eager_load = eager_load, thing_data = thing_data, data = data) else: q = [] for rel in q: #TODO an alternative for multiple #relations with the same keys #l = rel_ids.setdefault((rel._thing1_id, rel._thing2_id), []) #l.append(rel._id) rel_ids[(rel._thing1_id, rel._thing2_id, rel._name)] = rel._id for p in pairs: if p not in rel_ids: rel_ids[p] = None return rel_ids res = sgm(cache, pairs, items_db, prefix) #convert the keys back into objects # populate up the local-cache in batch cls._byID(filter(None, res.values()), data=data) # now we can assume the rels will be in the cache and just # call _byID lots res_obj = {} for k, rid in res.iteritems(): obj_key = (thing1_dict[k[0]], thing2_dict[k[1]], k[2]) res_obj[obj_key] = cls._byID(rid, data=data) if rid else None return res_obj
def reported(cls, users=None, things=None, return_dict=True, amount=None): # nothing given, nothing to give back if not users and not things: return {} if return_dict else [] if users: users = tup(users) if things: things = tup(things) # if both are given, we can use fast_query if users and things: return cls.fastreported(users, things) # type_dict stores id keyed on (type, rel_key) type_dict = {} # if users, we have to search all the rel types on thing1_id if users: db_key = '_thing1_id' uid = [t._id for t in users] for key in cls.rels.keys(): type_dict[(Account, key)] = uid # if things, we have to search only on types present in the list if things: db_key = '_thing2_id' for t in things: key = (t.__class__, (Account, t.__class__)) type_dict.setdefault(key, []).append(t._id) def db_func(rel, db_key, amount): def _db_func(ids): q = rel._query(getattr(rel.c, db_key) == ids, data=True) if amount is not None: q._filter(rel.c._name == str(amount)) r_ids = {} # fill up the report listing from the query for r in q: key = getattr(r, db_key) r_ids.setdefault(key, []).append(r._id) # add blanks where no results were returned for i in ids: if i not in r_ids: r_ids[i] = [] return r_ids return _db_func rval = [] for (thing_class, rel_key), ids in type_dict.iteritems(): rel = cls.rels[rel_key] prefix = cls._cache_prefix(rel, thing_class, amount=amount) # load from cache res = sgm(cache, ids, db_func(rel, db_key, amount), prefix) # append *objects* to end of list res1 = [] for x in res.values(): res1.extend(x) if res1: rval.extend(rel._byID(res1, data=True, return_dict=False)) if return_dict: return dict(((r._thing1, r._thing2, cls._field), r) for r in rval) return rval
def _fast_query(cls, thing1s, thing2s, name, data=True, eager_load=True, thing_data=False, thing_stale=False): """looks up all the relationships between thing1_ids and thing2_ids and caches them""" cache_key_lookup = dict() # We didn't find these keys in the cache, look them up in the # database def lookup_rel_ids(uncached_keys): rel_ids = {} # Lookup thing ids and name from cache key t1_ids = set() t2_ids = set() names = set() for cache_key in uncached_keys: (thing1, thing2, name) = cache_key_lookup[cache_key] t1_ids.add(thing1._id) t2_ids.add(thing2._id) names.add(name) q = cls._query(cls.c._thing1_id == t1_ids, cls.c._thing2_id == t2_ids, cls.c._name == names) for rel in q: rel_ids[cls._fast_cache_key_from_parts( cls.__name__, rel._thing1_id, rel._thing2_id, str(rel._name))] = rel._id for cache_key in uncached_keys: if cache_key not in rel_ids: rel_ids[cache_key] = None return rel_ids # make lookups for thing ids and names thing1_dict = dict((t._id, t) for t in tup(thing1s)) thing2_dict = dict((t._id, t) for t in tup(thing2s)) names = map(str, tup(name)) # permute all of the pairs via cartesian product rel_tuples = itertools.product(thing1_dict.values(), thing2_dict.values(), names) # create cache keys for all permutations and initialize lookup for t in rel_tuples: thing1, thing2, name = t cache_key = cls._fast_cache_key_from_parts( cls.__name__, thing1._id, thing2._id, name) cache_key_lookup[cache_key] = t # get the relation ids from the cache or query the db res = sgm(cls._fast_cache, cache_key_lookup.keys(), lookup_rel_ids) # get the relation objects rel_ids = { rel_id for rel_id in res.itervalues() if rel_id is not None } rels = cls._byID_rel(rel_ids, data=data, eager_load=eager_load, thing_data=thing_data, thing_stale=thing_stale) # Takes aggregated results from cache and db (res) and transforms # the values from ids to Relations. res_obj = {} for cache_key, rel_id in res.iteritems(): t = cache_key_lookup[cache_key] rel = rels[rel_id] if rel_id is not None else None res_obj[t] = rel return res_obj
def _byID(cls, ids, properties=None): ids, is_single = tup(ids, True) if properties is not None: asked_properties = frozenset(properties) willask_properties = set(properties) if not len(ids): if is_single: raise InvariantException("whastis?") return {} # all keys must be strings or directly convertable to strings assert all(isinstance(_id, basestring) and str(_id) for _id in ids) def reject_bad_partials(cached, still_need): # tell sgm that the match it found in the cache isn't good # enough if it's a partial that doesn't include our # properties. we still need to look those items up to get # the properties that we're after stillfind = set() for k, v in cached.iteritems(): if properties is None: if v._partial is not None: # there's a partial in the cache but we're not # looking for partials stillfind.add(k) elif v._partial is not None and not asked_properties.issubset( v._partial): # we asked for a partial, and this is a partial, # but it doesn't have all of the properties that # we need stillfind.add(k) # other callers in our request are now expecting # to find the properties that were on that # partial, so we'll have to preserve them for prop in v._partial: willask_properties.add(prop) for k in stillfind: del cached[k] still_need.add(k) def lookup(l_ids): # TODO: if we get back max_column_count columns for a # given row, check a flag on the class as to whether to # refetch for more of them. This could be important with # large Views, for instance if properties is None: rows = cls._cf.multiget(l_ids, column_count=max_column_count) else: rows = cls._cf.multiget(l_ids, columns=willask_properties) l_ret = {} for t_id, row in rows.iteritems(): t = cls._from_serialized_columns(t_id, row) if properties is not None: # make sure that the item is marked as a _partial t._partial = willask_properties l_ret[t._id] = t return l_ret ret = cache.sgm(thing_cache, ids, lookup, prefix=cls._cache_prefix(), found_fn=reject_bad_partials) if is_single and not ret: raise NotFound("<%s %r>" % (cls.__name__, ids[0])) elif is_single: assert len(ret) == 1 return ret.values()[0] return ret
def _fast_query(cls, thing1s, thing2s, name, data=True, eager_load=True, thing_data=False, thing_stale=False): """looks up all the relationships between thing1_ids and thing2_ids and caches them""" cache_key_lookup = dict() # We didn't find these keys in the cache, look them up in the # database def lookup_rel_ids(uncached_keys): rel_ids = {} # Lookup thing ids and name from cache key t1_ids = set() t2_ids = set() names = set() for cache_key in uncached_keys: (thing1, thing2, name) = cache_key_lookup[cache_key] t1_ids.add(thing1._id) t2_ids.add(thing2._id) names.add(name) q = cls._query( cls.c._thing1_id == t1_ids, cls.c._thing2_id == t2_ids, cls.c._name == names) for rel in q: rel_ids[cls._fast_cache_key_from_parts( cls.__name__, rel._thing1_id, rel._thing2_id, str(rel._name) )] = rel._id for cache_key in uncached_keys: if cache_key not in rel_ids: rel_ids[cache_key] = None return rel_ids # make lookups for thing ids and names thing1_dict = dict((t._id, t) for t in tup(thing1s)) thing2_dict = dict((t._id, t) for t in tup(thing2s)) names = map(str, tup(name)) # permute all of the pairs via cartesian product rel_tuples = itertools.product( thing1_dict.values(), thing2_dict.values(), names) # create cache keys for all permutations and initialize lookup for t in rel_tuples: thing1, thing2, name = t cache_key = cls._fast_cache_key_from_parts( cls.__name__, thing1._id, thing2._id, name) cache_key_lookup[cache_key] = t # get the relation ids from the cache or query the db res = sgm(cls._fast_cache, cache_key_lookup.keys(), lookup_rel_ids) # get the relation objects rel_ids = {rel_id for rel_id in res.itervalues() if rel_id is not None} rels = cls._byID_rel( rel_ids, data=data, eager_load=eager_load, thing_data=thing_data, thing_stale=thing_stale) # Takes aggregated results from cache and db (res) and transforms # the values from ids to Relations. res_obj = {} for cache_key, rel_id in res.iteritems(): t = cache_key_lookup[cache_key] rel = rels[rel_id] if rel_id is not None else None res_obj[t] = rel return res_obj
def _byID(cls, ids, data=False, return_dict=True, stale=False, ignore_missing=False): ids, single = tup(ids, ret_is_single=True) prefix = thing_prefix(cls.__name__) for x in ids: if not isinstance(x, (int, long)): raise ValueError('non-integer thing_id in %r' % ids) if x > tdb.MAX_THING_ID: raise NotFound('huge thing_id in %r' % ids) elif x < tdb.MIN_THING_ID: raise NotFound('negative thing_id in %r' % ids) if not single and not ids: if return_dict: return {} else: return [] cls.record_lookup(data=data, delta=len(ids)) def count_found(ret, still_need): cls._cache.stats.cache_report(hits=len(ret), misses=len(still_need), cache_name='sgm.%s' % cls.__name__) if not cls._cache.stats: count_found = None def items_db(ids): items = cls._get_item(cls._type_id, ids) for i in items.keys(): items[i] = cls._build(i, items[i]) # caching happens in sgm, but is less intrusive to count here cls.record_cache_write(event="cache", delta=len(items)) return items bases = sgm(cls._cache, ids, items_db, prefix, time=THING_CACHE_TTL, stale=stale, found_fn=count_found, stat_subname=cls.__name__) # Check to see if we found everything we asked for missing = [] for i in ids: if i not in bases: missing.append(i) elif bases[i] and bases[i]._id != i: g.log.error( "thing.py: Doppleganger on byID: %s got %s for %s" % (cls.__name__, bases[i]._id, i)) bases[i] = items_db([i]).values()[0] bases[i]._cache_myself() if missing and not ignore_missing: raise NotFound, '%s %s' % (cls.__name__, missing) for i in missing: ids.remove(i) if data: need = [] for v in bases.itervalues(): if not v._loaded: need.append(v) if need: cls._load_multi(need) if single: return bases[ids[0]] if ids else None elif return_dict: return bases else: return filter(None, (bases.get(i) for i in ids))
def _fast_query(cls, thing1s, thing2s, name, data=True, eager_load=True, thing_data=False, timestamp_optimize = False): """looks up all the relationships between thing1_ids and thing2_ids and caches them""" prefix = thing_prefix(cls.__name__) thing1_dict = dict((t._id, t) for t in tup(thing1s)) thing2_dict = dict((t._id, t) for t in tup(thing2s)) thing1_ids = thing1_dict.keys() thing2_ids = thing2_dict.keys() name = tup(name) def can_skip_lookup(t1, t2, name): # we can't possibly have voted on things that were # created after the last time we voted. for relations # that have an invariant like this we can avoid doing # these lookups as long as the relation takes # responsibility for keeping the timestamp up-to-date thing1 = thing1_dict[t1] thing2 = thing2_dict[t2] last_done = thing_utils.get_last_modified_for_cls( thing1, cls._type_name) if not last_done: return False if thing2._date > last_done: return True return False # permute all of the pairs pairs = set((x, y, n) for x in thing1_ids for y in thing2_ids for n in name) def items_db(pairs): rel_ids = {} t1_ids = set() t2_ids = set() names = set() for t1, t2, name in pairs: if timestamp_optimize and can_skip_lookup(t1, t2, name): continue t1_ids.add(t1) t2_ids.add(t2) names.add(name) if t1_ids and t2_ids and names: q = cls._query(cls.c._thing1_id == t1_ids, cls.c._thing2_id == t2_ids, cls.c._name == names, eager_load = eager_load, thing_data = thing_data, data = data) else: q = [] for rel in q: #TODO an alternative for multiple #relations with the same keys #l = rel_ids.setdefault((rel._thing1_id, rel._thing2_id), []) #l.append(rel._id) rel_ids[(rel._thing1_id, rel._thing2_id, rel._name)] = rel._id for p in pairs: if p not in rel_ids: rel_ids[p] = None return rel_ids res = sgm(cache, pairs, items_db, prefix) #convert the keys back into objects # populate up the local-cache in batch cls._byID(filter(None, res.values()), data=data) # now we can assume the rels will be in the cache and just # call _byID lots res_obj = {} for k, rid in res.iteritems(): obj_key = (thing1_dict[k[0]], thing2_dict[k[1]], k[2]) res_obj[obj_key] = cls._byID(rid, data=data) if rid else None return res_obj
def _fast_query(cls, thing1s, thing2s, name, data=True, eager_load=True, timestamp_optimize = False): """looks up all the relationships between thing1_ids and thing2_ids and caches them""" prefix = thing_prefix(cls.__name__) thing1_dict = dict((t._id, t) for t in tup(thing1s)) thing2_dict = dict((t._id, t) for t in tup(thing2s)) thing1_ids = thing1_dict.keys() thing2_ids = thing2_dict.keys() name = tup(name) def can_skip_lookup(t1, t2, name): # we can't possibly have voted on things that were # created after the last time we voted. for relations # that have an invariant like this we can avoid doing # these lookups as long as the relation takes # responsibility for keeping the timestamp up-to-date thing1 = thing1_dict[t1] thing2 = thing2_dict[t2] # check to see if we have the history information if not thing1._loaded: return False if not hasattr(thing1, 'fast_query_timestamp'): return False last_done = thing1.fast_query_timestamp.get(cls._type_name,None) if not last_done: return False if thing2._date > last_done: return True return False # permute all of the pairs pairs = set((x, y, n) for x in thing1_ids for y in thing2_ids for n in name) def items_db(pairs): rel_ids = {} t1_ids = set() t2_ids = set() names = set() for t1, t2, name in pairs: if timestamp_optimize and can_skip_lookup(t1, t2, name): continue t1_ids.add(t1) t2_ids.add(t2) names.add(name) q = cls._query(cls.c._thing1_id == t1_ids, cls.c._thing2_id == t2_ids, cls.c._name == names, eager_load = eager_load, data = data) for rel in q: #TODO an alternative for multiple #relations with the same keys #l = rel_ids.setdefault((rel._thing1_id, rel._thing2_id), []) #l.append(rel._id) rel_ids[(rel._thing1_id, rel._thing2_id, rel._name)] = rel._id for p in pairs: if p not in rel_ids: rel_ids[p] = None return rel_ids res = sgm(cache, pairs, items_db, prefix) #convert the keys back into objects # populate up the local-cache in batch cls._byID(filter(None, res.values()), data=data) # now we can assume the rels will be in the cache and just # call _byID lots res_obj = {} for k, rid in res.iteritems(): obj_key = (thing1_dict[k[0]], thing2_dict[k[1]], k[2]) res_obj[obj_key] = cls._byID(rid, data=data) if rid else None return res_obj
def _byID(cls, ids, return_dict=True, properties=None): ids, is_single = tup(ids, True) if properties is not None: asked_properties = frozenset(properties) willask_properties = set(properties) if not len(ids): if is_single: raise InvariantException("whastis?") return {} # all keys must be strings or directly convertable to strings assert all(isinstance(_id, basestring) or str(_id) for _id in ids) def reject_bad_partials(cached, still_need): # tell sgm that the match it found in the cache isn't good # enough if it's a partial that doesn't include our # properties. we still need to look those items up to get # the properties that we're after stillfind = set() for k, v in cached.iteritems(): if properties is None: if v._partial is not None: # there's a partial in the cache but we're not # looking for partials stillfind.add(k) elif v._partial is not None and not asked_properties.issubset(v._partial): # we asked for a partial, and this is a partial, # but it doesn't have all of the properties that # we need stillfind.add(k) # other callers in our request are now expecting # to find the properties that were on that # partial, so we'll have to preserve them for prop in v._partial: willask_properties.add(prop) for k in stillfind: del cached[k] still_need.add(k) def lookup(l_ids): # TODO: if we get back max_column_count columns for a # given row, check a flag on the class as to whether to # refetch for more of them. This could be important with # large Views, for instance if properties is None: rows = cls._cf.multiget(l_ids, column_count=max_column_count) else: rows = cls._cf.multiget(l_ids, columns = willask_properties) l_ret = {} for t_id, row in rows.iteritems(): t = cls._from_serialized_columns(t_id, row) if properties is not None: # make sure that the item is marked as a _partial t._partial = willask_properties l_ret[t._id] = t return l_ret ret = cache.sgm(thing_cache, ids, lookup, prefix=cls._cache_prefix(), found_fn=reject_bad_partials) if is_single and not ret: raise NotFound("<%s %r>" % (cls.__name__, ids[0])) elif is_single: assert len(ret) == 1 return ret.values()[0] elif return_dict: return ret else: return filter(None, (ret.get(i) for i in ids))
def _byID(cls, ids, data=False, return_dict=True, extra_props=None, stale=False, ignore_missing=False): ids, single = tup(ids, True) prefix = thing_prefix(cls.__name__) for x in ids: if not isinstance(x, (int, long)): raise ValueError('non-integer thing_id in %r' % ids) if x > tdb.MAX_THING_ID: raise NotFound('huge thing_id in %r' % ids) elif x < tdb.MIN_THING_ID: raise NotFound('negative thing_id in %r' % ids) def count_found(ret, still_need): cls._cache.stats.cache_report(hits=len(ret), misses=len(still_need), cache_name='sgm.%s' % cls.__name__) if not cls._cache.stats: count_found = None def items_db(ids): items = cls._get_item(cls._type_id, ids) for i in items.keys(): items[i] = cls._build(i, items[i]) return items bases = sgm(cls._cache, ids, items_db, prefix, stale=stale, found_fn=count_found) # Check to see if we found everything we asked for missing = [] for i in ids: if i not in bases: missing.append(i) elif bases[i] and bases[i]._id != i: g.log.error( "thing.py: Doppleganger on byID: %s got %s for %s" % (cls.__name__, bases[i]._id, i)) bases[i] = items_db([i]).values()[0] bases[i]._cache_myself() if missing and not ignore_missing: raise NotFound, '%s %s' % (cls.__name__, missing) for i in missing: ids.remove(i) if data: need = [] for v in bases.itervalues(): if not v._loaded: need.append(v) if need: cls._load_multi(need) if extra_props: for _id, props in extra_props.iteritems(): for k, v in props.iteritems(): bases[_id].__setattr__(k, v, False) if single: return bases[ids[0]] if ids else None elif return_dict: return bases else: return filter(None, (bases.get(i) for i in ids))
def get_live_promotions(sr_names): promos_by_srname = sgm(g.cache, sr_names, miss_fn=_get_live_promotions, prefix='live_promotions', time=60) return itertools.chain.from_iterable(promos_by_srname.itervalues())
def _byID(cls, ids, data=True, return_dict=True, stale=False, ignore_missing=False): # data props are ALWAYS loaded, data keyword is meaningless ids, single = tup(ids, ret_is_single=True) for x in ids: if not isinstance(x, (int, long)): raise ValueError('non-integer thing_id in %r' % ids) if x > tdb.MAX_THING_ID: raise NotFound('huge thing_id in %r' % ids) elif x < tdb.MIN_THING_ID: raise NotFound('negative thing_id in %r' % ids) if not single and not ids: if return_dict: return {} else: return [] cls.record_lookup(data=data, delta=len(ids)) def count_found_and_reject_unloaded(ret, still_need): unloaded_ids = { _id for _id, thing in ret.iteritems() if not thing._loaded} for _id in unloaded_ids: del ret[_id] still_need.add(_id) cls._cache.stats.cache_report( hits=len(ret), misses=len(still_need), cache_name='sgm.%s' % cls.__name__) if not cls._cache.stats: count_found = None def get_things_from_db(ids): props_by_id = cls._get_item(cls._type_id, ids) data_props_by_id = cls._get_data(cls._type_id, ids) try: essentials = object.__getattribute__(cls, "_essentials") except AttributeError: essentials = () things_by_id = {} for _id, props in props_by_id.iteritems(): thing = cls._build(_id, props) data_props = data_props_by_id.get(_id, {}) thing._t.update(data_props) thing._loaded = True for data_prop in essentials: if data_prop not in thing._t: print "Warning: %s is missing %s" % (thing._fullname, data_prop) things_by_id[_id] = thing # caching happens in sgm, but is less intrusive to count here cls.record_cache_write(event="cache", delta=len(things_by_id)) return things_by_id things_by_id = sgm(cls._cache, ids, miss_fn=get_things_from_db, prefix=cls._cache_prefix(), time=THING_CACHE_TTL, stale=stale, found_fn=count_found_and_reject_unloaded, stat_subname=cls.__name__) # Check to see if we found everything we asked for missing = [_id for _id in ids if _id not in things_by_id] if missing and not ignore_missing: raise NotFound, '%s %s' % (cls.__name__, missing) if missing: ids = [_id for _id in ids if _id not in missing] if single: return things_by_id[ids[0]] if ids else None elif return_dict: return things_by_id else: return filter(None, (things_by_id.get(_id) for _id in ids))
def _fast_query(cls, thing1s, thing2s, name, data=True, eager_load=True, thing_data=False): """looks up all the relationships between thing1_ids and thing2_ids and caches them""" prefix = thing_prefix(cls.__name__) thing1_dict = dict((t._id, t) for t in tup(thing1s)) thing2_dict = dict((t._id, t) for t in tup(thing2s)) thing1_ids = thing1_dict.keys() thing2_ids = thing2_dict.keys() name = tup(name) # permute all of the pairs pairs = set((x, y, n) for x in thing1_ids for y in thing2_ids for n in name) def lookup_rel_ids(pairs): rel_ids = {} t1_ids = set() t2_ids = set() names = set() for t1, t2, name in pairs: t1_ids.add(t1) t2_ids.add(t2) names.add(name) if t1_ids and t2_ids and names: q = cls._query(cls.c._thing1_id == t1_ids, cls.c._thing2_id == t2_ids, cls.c._name == names) else: q = [] for rel in q: rel_ids[(rel._thing1_id, rel._thing2_id, rel._name)] = rel._id for p in pairs: if p not in rel_ids: rel_ids[p] = None return rel_ids # get the relation ids from the cache or query the db res = sgm(cls._cache, pairs, lookup_rel_ids, prefix) # get the relation objects rel_ids = { rel_id for rel_id in res.itervalues() if rel_id is not None } rels = cls._byID_rel(rel_ids, data=data, eager_load=eager_load, thing_data=thing_data) res_obj = {} for (thing1_id, thing2_id, name), rel_id in res.iteritems(): pair = (thing1_dict[thing1_id], thing2_dict[thing2_id], name) rel = rels[rel_id] if rel_id is not None else None res_obj[pair] = rel return res_obj
def _byID(cls, ids, data=True, return_dict=True, stale=False, ignore_missing=False): # data props are ALWAYS loaded, data keyword is meaningless ids, single = tup(ids, ret_is_single=True) for x in ids: if not isinstance(x, (int, long)): raise ValueError('non-integer thing_id in %r' % ids) if x > tdb.MAX_THING_ID: raise NotFound('huge thing_id in %r' % ids) elif x < tdb.MIN_THING_ID: raise NotFound('negative thing_id in %r' % ids) if not single and not ids: if return_dict: return {} else: return [] cls.record_lookup(data=data, delta=len(ids)) def count_found_and_reject_unloaded(ret, still_need): unloaded_ids = { _id for _id, thing in ret.iteritems() if not thing._loaded } for _id in unloaded_ids: del ret[_id] still_need.add(_id) if cls._cache.stats: cls._cache.stats.cache_report(hits=len(ret), misses=len(still_need), cache_name='sgm.%s' % cls.__name__) def get_things_from_db(ids): props_by_id = cls._get_item(cls._type_id, ids) data_props_by_id = cls._get_data(cls._type_id, ids) things_by_id = {} for _id, props in props_by_id.iteritems(): thing = cls._build(_id, props) data_props = data_props_by_id.get(_id, {}) thing._t.update(data_props) thing._loaded = True if not all(data_prop in thing._t for data_prop in cls._essentials): # a Thing missing an essential prop is invalid # this can happen if a process looks up the Thing as it's # created but between when the props and the data props are # written g.log.error("%s missing essentials, got %s", thing, thing._t) g.stats.simple_event("thing.load.missing_essentials") continue things_by_id[_id] = thing # caching happens in sgm, but is less intrusive to count here cls.record_cache_write(event="cache", delta=len(things_by_id)) return things_by_id things_by_id = sgm(cls._cache, ids, miss_fn=get_things_from_db, prefix=cls._cache_prefix(), time=THING_CACHE_TTL, stale=stale, found_fn=count_found_and_reject_unloaded, stat_subname=cls.__name__) # Check to see if we found everything we asked for missing = [_id for _id in ids if _id not in things_by_id] if missing and not ignore_missing: raise NotFound, '%s %s' % (cls.__name__, missing) if missing: ids = [_id for _id in ids if _id not in missing] if single: return things_by_id[ids[0]] if ids else None elif return_dict: return things_by_id else: return filter(None, (things_by_id.get(_id) for _id in ids))
def cup_info_multi(cls, ids): ids = [int(i) for i in ids] # Is this dumb? Why call sgm() with miss_fn=None, rather than just # calling g.hardcache.get_multi()? return sgm(g.hardcache, ids, miss_fn=None, prefix="cup_info-")
def _by_name(cls, names, stale=False, _update=False): ''' Usages: 1. Subreddit._by_name('funny') # single sr name Searches for a single subreddit. Returns a single Subreddit object or raises NotFound if the subreddit doesn't exist. 2. Subreddit._by_name(['aww','iama']) # list of sr names Searches for a list of subreddits. Returns a dict mapping srnames to Subreddit objects. Items that were not found are ommitted from the dict. If no items are found, an empty dict is returned. ''' #lower name here so there is only one cache names, single = tup(names, True) to_fetch = {} ret = {} for name in names: lname = name.lower() if lname in cls._specials: ret[name] = cls._specials[lname] elif len(lname) > Subreddit.MAX_SRNAME_LENGTH: g.log.debug( "Subreddit._by_name() ignoring invalid srname (too long): %s", lname) else: to_fetch[lname] = name if to_fetch: def _fetch(lnames): q = cls._query(lower(cls.c.name) == lnames, cls.c._spam == (True, False), limit=len(lnames), data=True) try: srs = list(q) except UnicodeEncodeError: print "Error looking up SRs %r" % (lnames, ) raise return dict((sr.name.lower(), sr._id) for sr in srs) srs = {} srids = sgm(g.cache, to_fetch.keys(), _fetch, prefix='subreddit.byname', stale=stale) if srids: srs = cls._byID(srids.values(), data=True, return_dict=False, stale=stale) for sr in srs: ret[to_fetch[sr.name.lower()]] = sr if ret and single: return ret.values()[0] elif not ret and single: raise NotFound, 'Subreddit %s' % name else: return ret