def process_message(msgs, chan): """Update get_domain_links(), the Links by domain precomputed query. get_domain_links() is a CachedResult which is stored in permacache. To update these objects we need to do a read-modify-write which requires obtaining a lock. Sharding these updates by domain allows us to run multiple consumers (but ideally just one per shard) to avoid lock contention. """ from r2.lib.db.queries import add_queries, get_domain_links link_names = {msg.body for msg in msgs} links = Link._by_fullname(link_names, return_dict=False) print 'Processing %r' % (links,) links_by_domain = defaultdict(list) for link in links: parsed = UrlParser(link.url) # update the listings for all permutations of the link's domain for domain in parsed.domain_permutations(): links_by_domain[domain].append(link) for d, links in links_by_domain.iteritems(): with g.stats.get_timer("link_vote_processor.domain_queries"): add_queries( queries=[ get_domain_links(d, sort, "all") for sort in SORTS], insert_items=links, )
def process_message(msgs, chan): """Update get_domain_links(), the Links by domain precomputed query. get_domain_links() is a CachedResult which is stored in permacache. To update these objects we need to do a read-modify-write which requires obtaining a lock. Sharding these updates by domain allows us to run multiple consumers (but ideally just one per shard) to avoid lock contention. """ from r2.lib.db.queries import add_queries, get_domain_links link_names = {msg.body for msg in msgs} links = Link._by_fullname(link_names, return_dict=False) print 'Processing %r' % (links, ) links_by_domain = defaultdict(list) for link in links: parsed = UrlParser(link.url) # update the listings for all permutations of the link's domain for domain in parsed.domain_permutations(): links_by_domain[domain].append(link) for d, links in links_by_domain.iteritems(): with g.stats.get_timer("link_vote_processor.domain_queries"): add_queries( queries=[ get_domain_links(d, sort, "all") for sort in SORTS ], insert_items=links, )
def add_to_domain_query_q(link): parsed = UrlParser(link.url) if not parsed.domain_permutations(): # no valid domains found return if g.shard_domain_query_queues: domain_shard = hash(parsed.hostname) % 10 queue_name = "domain_query_%s_q" % domain_shard else: queue_name = "domain_query_q" amqp.add_item(queue_name, link._fullname)
def add_to_domain_query_q(link): parsed = UrlParser(link.url) if not parsed.domain_permutations(): # no valid domains found return if g.shard_domain_query_queues: domain_shard = hash(parsed.hostname) % 10 queue_name = "domain_query_%s_q" % domain_shard else: queue_name = "domain_query_q" amqp.add_item(queue_name, link._fullname)
def process(thing): if thing.deleted: return thing_cls = thingcls_by_name[thing.thing_type] fname = make_fullname(thing_cls, thing.thing_id) thing_score = score(thing.ups, thing.downs) thing_upvotes = upvotes(thing.ups) thing_controversy = controversy(thing.ups, thing.downs) for interval, cutoff in cutoff_by_interval.iteritems(): if thing.timestamp < cutoff: continue yield ("user/%s/top/%s/%d" % (thing.thing_type, interval, thing.author_id), thing_score, thing.timestamp, fname) yield ("user/%s/%s/%s/%d" % (thing.thing_type, g.voting_upvote_path, interval, thing.author_id), thing_upvotes, thing.timestamp, fname) yield ("user/%s/%s/%s/%d" % (thing.thing_type, g.voting_controversial_path, interval, thing.author_id), thing_controversy, thing.timestamp, fname) if thing.spam: continue if thing.thing_type == "link": yield ("sr/link/top/%s/%d" % (interval, thing.sr_id), thing_score, thing.timestamp, fname) yield ("sr/link/%s/%s/%d" % (g.voting_upvote_path, interval, thing.sr_id), thing_upvotes, thing.timestamp, fname) yield ("sr/link/%s/%s/%d" % (g.voting_controversial_path, interval, thing.sr_id), thing_controversy, thing.timestamp, fname) if thing.url: try: parsed = UrlParser(thing.url) except ValueError: continue for domain in parsed.domain_permutations(): yield ("domain/link/top/%s/%s" % (interval, domain), thing_score, thing.timestamp, fname) yield ("domain/link/%s/%s/%s" % (g.voting_upvote_path, interval, domain), thing_upvotes, thing.timestamp, fname) yield ("domain/link/%s/%s/%s" % (g.voting_controversial_path, interval, domain), thing_controversy, thing.timestamp, fname)
def process(thing): if thing.deleted: return thing_cls = thingcls_by_name[thing.thing_type] fname = make_fullname(thing_cls, thing.thing_id) thing_score = score(thing.ups, thing.downs) thing_controversy = controversy(thing.ups, thing.downs) for interval, cutoff in cutoff_by_interval.iteritems(): if thing.timestamp < cutoff: continue yield ("user/%s/top/%s/%d" % (thing.thing_type, interval, thing.author_id), thing_score, thing.timestamp, fname) yield ("user/%s/controversial/%s/%d" % (thing.thing_type, interval, thing.author_id), thing_controversy, thing.timestamp, fname) if thing.spam: continue if thing.thing_type == "link": yield ("sr/link/top/%s/%d" % (interval, thing.sr_id), thing_score, thing.timestamp, fname) yield ("sr/link/controversial/%s/%d" % (interval, thing.sr_id), thing_controversy, thing.timestamp, fname) if thing.url: try: parsed = UrlParser(thing.url) except ValueError: continue for domain in parsed.domain_permutations(): yield ("domain/link/top/%s/%s" % (interval, domain), thing_score, thing.timestamp, fname) yield ("domain/link/controversial/%s/%s" % (interval, domain), thing_controversy, thing.timestamp, fname)
def time_listing_iter(self, thing, cutoff_by_interval): if thing.deleted: return thing_cls = self.thing_cls fname = make_fullname(thing_cls, thing.thing_id) scores = {k: func(thing) for k, func in self.LISTING_SORTS.iteritems()} for interval, cutoff in cutoff_by_interval.iteritems(): if thing.timestamp < cutoff: continue for sort, value in scores.iteritems(): aid = thing.author_id key = self.make_key("user", sort, interval, aid) yield (key, value, thing.timestamp, fname) if thing.spam: continue if thing.thing_type == "link": for sort, value in scores.iteritems(): sr_id = thing.sr_id key = self.make_key("sr", sort, interval, sr_id) yield (key, value, thing.timestamp, fname) if not thing.url: continue try: parsed = UrlParser(thing.url) except ValueError: continue for d in parsed.domain_permutations(): for sort, value in scores.iteritems(): key = self.make_key("domain", sort, interval, d) yield (key, value, thing.timestamp, fname)
def time_listing_iter(self, thing, cutoff_by_interval): if thing.deleted: return thing_cls = self.thing_cls fname = make_fullname(thing_cls, thing.thing_id) scores = {k: func(thing) for k, func in self.LISTING_SORTS.iteritems()} for interval, cutoff in cutoff_by_interval.iteritems(): if thing.timestamp < cutoff: continue for sort, value in scores.iteritems(): aid = thing.author_id key = self.make_key("user", sort, interval, aid) yield (key, value, thing.timestamp, fname) if thing.spam: continue if thing.thing_type == "link": for sort, value in scores.iteritems(): sr_id = thing.sr_id key = self.make_key("sr", sort, interval, sr_id) yield (key, value, thing.timestamp, fname) if not thing.url: continue try: parsed = UrlParser(thing.url) except ValueError: continue for d in parsed.domain_permutations(): for sort, value in scores.iteritems(): key = self.make_key("domain", sort, interval, d) yield (key, value, thing.timestamp, fname)