def store_keys(cls, key, listing): """Look up query based on key, and update with provided listing. :param str key: key generated by :py:method:`make_key` :param list listing: sorted listing generated by `mr_reduce_max_per_key`, generally by :py:method:`write_permacache` """ category, thing_cls, sort, time, uid = cls.split_key(key) query = None if category == "user": if thing_cls == "link": query = queries._get_submitted(int(uid), sort, time) elif thing_cls == "comment": query = queries._get_comments(int(uid), sort, time) elif category == "sr": if thing_cls == "link": query = queries._get_links(int(uid), sort, time) elif category == "domain": if thing_cls == "link": query = queries.get_domain_links(uid, sort, time) assert query, 'unknown query type for {}'.format(key) item_tuples = [ (thing_fullname, float(value), float(timestamp)) for value, timestamp, thing_fullname in listing ] # we only need locking updates for non-time-based listings, since for # time- based ones we're the only ones that ever update it lock = time == 'all' query._replace(item_tuples, lock=lock)
def store_keys(key, maxes): category, thing_cls, sort, time, id = key.split("/") query = None if category == "user": if thing_cls == "link": query = queries._get_submitted(int(id), sort, time) elif thing_cls == "comment": query = queries._get_comments(int(id), sort, time) elif category == "sr": if thing_cls == "link": query = queries._get_links(int(id), sort, time) elif category == "domain": if thing_cls == "link": query = queries.get_domain_links(id, sort, time) assert query, 'unknown query type for %s' % (key,) item_tuples = [tuple([item[-1]] + [float(x) for x in item[:-1]]) for item in maxes] # we only need locking updates for non-time-based listings, since for time- # based ones we're the only ones that ever update it lock = time == 'all' query._replace(item_tuples, lock=lock)
def process_message(msgs, chan): """Update get_domain_links(), the Links by domain precomputed query. get_domain_links() is a CachedResult which is stored in permacache. To update these objects we need to do a read-modify-write which requires obtaining a lock. Sharding these updates by domain allows us to run multiple consumers (but ideally just one per shard) to avoid lock contention. """ from r2.lib.db.queries import add_queries, get_domain_links link_names = {msg.body for msg in msgs} links = Link._by_fullname(link_names, return_dict=False) print 'Processing %r' % (links,) links_by_domain = defaultdict(list) for link in links: parsed = UrlParser(link.url) # update the listings for all permutations of the link's domain for domain in parsed.domain_permutations(): links_by_domain[domain].append(link) for d, links in links_by_domain.iteritems(): with g.stats.get_timer("link_vote_processor.domain_queries"): add_queries( queries=[ get_domain_links(d, sort, "all") for sort in SORTS], insert_items=links, )
def store_keys(key, maxes): # we're building queries using queries.py, but we could make the # queries ourselves if we wanted to avoid the individual lookups # for accounts and subreddits. # Note that we're only generating the 'sr-' type queries here, but # we're also able to process the other listings generated by the # old migrate.mr_permacache for convenience userrel_fns = dict(liked = queries.get_liked, disliked = queries.get_disliked, saved = queries.get_saved, hidden = queries.get_hidden) if key.startswith('user-'): acc_str, keytype, account_id = key.split('-') account_id = int(account_id) fn = queries.get_submitted if keytype == 'submitted' else queries.get_comments q = fn(Account._byID(account_id), 'new', 'all') q._insert_tuples([(fname, float(timestamp)) for (timestamp, fname) in maxes]) elif key.startswith('sr-'): sr_str, sort, time, sr_id = key.split('-') sr_id = int(sr_id) if sort == 'controversy': # I screwed this up in the mapper and it's too late to fix # it sort = 'controversial' q = queries.get_links(Subreddit._byID(sr_id), sort, time) q._insert_tuples([tuple([item[-1]] + map(float, item[:-1])) for item in maxes]) elif key.startswith('domain/'): d_str, sort, time, domain = key.split('/') q = queries.get_domain_links(domain, sort, time) q._insert_tuples([tuple([item[-1]] + map(float, item[:-1])) for item in maxes]) elif key.split('-')[0] in userrel_fns: key_type, account_id = key.split('-') account_id = int(account_id) fn = userrel_fns[key_type] q = fn(Account._byID(account_id)) q._insert_tuples([tuple([item[-1]] + map(float, item[:-1])) for item in maxes])
def store_keys(key, maxes): category, thing_cls, sort, time, id = key.split("/") query = None if category == "user": if thing_cls == "link": query = queries._get_submitted(int(id), sort, time) elif thing_cls == "comment": query = queries._get_comments(int(id), sort, time) elif category == "sr": if thing_cls == "link": query = queries._get_links(int(id), sort, time) elif category == "domain": if thing_cls == "link": query = queries.get_domain_links(id, sort, time) assert query item_tuples = [tuple([item[-1]] + [float(x) for x in item[:-1]]) for item in maxes] query._replace(item_tuples)
def get_links(self, sort, time): from r2.lib.db import queries return queries.get_domain_links(self.domain, sort, time)
def get_links(self, sort, time): from r2.lib.db import queries # TODO: once the lists are precomputed properly, this can be # switched over to use the non-_old variety. return queries.get_domain_links(self.domain, sort, time)