def process_message(msgs, chan): """Update get_links(), the Links by Subreddit precomputed query. get_links() is a CachedResult which is stored in permacache. To update these objects we need to do a read-modify-write which requires obtaining a lock. Sharding these updates by subreddit allows us to run multiple consumers (but ideally just one per shard) to avoid lock contention. """ from r2.lib.db.queries import add_queries, get_links link_names = {msg.body for msg in msgs} links = Link._by_fullname(link_names, return_dict=False) print 'Processing %r' % (links, ) links_by_sr_id = defaultdict(list) for link in links: links_by_sr_id[link.sr_id].append(link) srs_by_id = Subreddit._byID(links_by_sr_id.keys(), stale=True) for sr_id, links in links_by_sr_id.iteritems(): with g.stats.get_timer("link_vote_processor.subreddit_queries"): sr = srs_by_id[sr_id] add_queries( queries=[get_links(sr, sort, "all") for sort in SORTS], insert_items=links, )
def process_message(msgs, chan): """Update get_domain_links(), the Links by domain precomputed query. get_domain_links() is a CachedResult which is stored in permacache. To update these objects we need to do a read-modify-write which requires obtaining a lock. Sharding these updates by domain allows us to run multiple consumers (but ideally just one per shard) to avoid lock contention. """ from r2.lib.db.queries import add_queries, get_domain_links link_names = {msg.body for msg in msgs} links = Link._by_fullname(link_names, return_dict=False) print 'Processing %r' % (links, ) links_by_domain = defaultdict(list) for link in links: parsed = UrlParser(link.url) # update the listings for all permutations of the link's domain for domain in parsed.domain_permutations(): links_by_domain[domain].append(link) for d, links in links_by_domain.iteritems(): with g.stats.get_timer("link_vote_processor.domain_queries"): add_queries( queries=[ get_domain_links(d, sort, "all") for sort in SORTS ], insert_items=links, )
def process_message(msg): from r2.lib.db.queries import ( add_queries, add_to_commentstree_q, get_comments, ) vote_data = json.loads(msg.body) hook = hooks.get_hook('vote.validate_vote_data') if hook.call_until_return(msg=msg, vote_data=vote_data) is False: # Corrupt records in the queue. Ignore them. print "Ignoring invalid vote by %s on %s %s" % ( vote_data.get('user_id', '<unknown>'), vote_data.get('thing_fullname', '<unknown>'), vote_data) return timer = g.stats.get_timer("comment_vote_processor") timer.start() user = Account._byID(vote_data.pop("user_id")) comment = Comment._by_fullname(vote_data.pop("thing_fullname")) print "Processing vote by %s on %s %s" % (user, comment, vote_data) try: vote = Vote( user, comment, direction=vote_data["direction"], date=datetime.utcfromtimestamp(vote_data["date"]), data=vote_data["data"], event_data=vote_data.get("event_data"), ) except TypeError as e: # a vote on an invalid type got in the queue, just skip it g.log.exception("Invalid type: %r", e.message) return vote.commit() timer.intermediate("create_vote_object") vote_valid = vote.is_automatic_initial_vote or vote.effects.affects_score comment_valid = not (comment._spam or comment._deleted) if vote_valid and comment_valid: author = Account._byID(comment.author_id) add_queries( queries=[get_comments(author, sort, 'all') for sort in SORTS], insert_items=comment, ) timer.intermediate("author_queries") # update the score periodically when a comment has many votes update_threshold = g.live_config['comment_vote_update_threshold'] update_period = g.live_config['comment_vote_update_period'] num_votes = comment.num_votes if num_votes <= update_threshold or num_votes % update_period == 0: add_to_commentstree_q(comment) timer.stop() timer.flush()
def set_status(l, status, onchange=None): # keep this out here. Useful for updating the queue if there is a bug # and for initial migration add_queries( [ _sponsored_link_query(None, l.author_id), _sponsored_link_query(None), _sponsored_link_query(status, l.author_id), _sponsored_link_query(status), ], insert_items=[l], ) # no need to delete or commit of the status is unchanged if status != getattr(l, "promote_status", None): # new links won't even have a promote_status yet if hasattr(l, "promote_status"): add_queries( [_sponsored_link_query(l.promote_status, l.author_id), _sponsored_link_query(l.promote_status)], delete_items=[l], ) l.promote_status = status l._commit() if onchange: onchange()
def process_message(msgs, chan): """Update get_domain_links(), the Links by domain precomputed query. get_domain_links() is a CachedResult which is stored in permacache. To update these objects we need to do a read-modify-write which requires obtaining a lock. Sharding these updates by domain allows us to run multiple consumers (but ideally just one per shard) to avoid lock contention. """ from r2.lib.db.queries import add_queries, get_domain_links link_names = {msg.body for msg in msgs} links = Link._by_fullname(link_names, return_dict=False) print 'Processing %r' % (links,) links_by_domain = defaultdict(list) for link in links: parsed = UrlParser(link.url) # update the listings for all permutations of the link's domain for domain in parsed.domain_permutations(): links_by_domain[domain].append(link) for d, links in links_by_domain.iteritems(): with g.stats.get_timer("link_vote_processor.domain_queries"): add_queries( queries=[ get_domain_links(d, sort, "all") for sort in SORTS], insert_items=links, )
def process_message(msgs, chan): """Update get_links(), the Links by Subreddit precomputed query. get_links() is a CachedResult which is stored in permacache. To update these objects we need to do a read-modify-write which requires obtaining a lock. Sharding these updates by subreddit allows us to run multiple consumers (but ideally just one per shard) to avoid lock contention. """ from r2.lib.db.queries import add_queries, get_links link_names = {msg.body for msg in msgs} links = Link._by_fullname(link_names, return_dict=False) print 'Processing %r' % (links,) links_by_sr_id = defaultdict(list) for link in links: links_by_sr_id[link.sr_id].append(link) srs_by_id = Subreddit._byID(links_by_sr_id.keys(), stale=True) for sr_id, links in links_by_sr_id.iteritems(): with g.stats.get_timer("link_vote_processor.subreddit_queries"): sr = srs_by_id[sr_id] add_queries( queries=[get_links(sr, sort, "all") for sort in SORTS], insert_items=links, )
def set_status(l, status, onchange=None): # keep this out here. Useful for updating the queue if there is a bug # and for initial migration add_queries([ _sponsored_link_query(None, l.author_id), _sponsored_link_query(None), _sponsored_link_query(status, l.author_id), _sponsored_link_query(status) ], insert_items=[l]) # no need to delete or commit of the status is unchanged if status != getattr(l, "promote_status", None): # new links won't even have a promote_status yet if hasattr(l, "promote_status"): add_queries([ _sponsored_link_query(l.promote_status, l.author_id), _sponsored_link_query(l.promote_status) ], delete_items=[l]) l.promote_status = status l._commit() if onchange: onchange()
def process_message(msg): from r2.lib.comment_tree import write_comment_scores from r2.lib.db.queries import ( add_queries, add_to_commentstree_q, get_comments, ) from r2.models.builder import get_active_sort_orders_for_link vote_data = json.loads(msg.body) hook = hooks.get_hook('vote.validate_vote_data') if hook.call_until_return(msg=msg, vote_data=vote_data) is False: # Corrupt records in the queue. Ignore them. print "Ignoring invalid vote by %s on %s %s" % ( vote_data.get('user_id', '<unknown>'), vote_data.get('thing_fullname', '<unknown>'), vote_data) return timer = g.stats.get_timer("comment_vote_processor") timer.start() user = Account._byID(vote_data.pop("user_id")) comment = Comment._by_fullname(vote_data.pop("thing_fullname")) print "Processing vote by %s on %s %s" % (user, comment, vote_data) try: vote = Vote( user, comment, direction=vote_data["direction"], date=datetime.utcfromtimestamp(vote_data["date"]), data=vote_data["data"], event_data=vote_data.get("event_data"), ) except TypeError as e: # a vote on an invalid type got in the queue, just skip it g.log.exception("Invalid type: %r", e.message) return vote.commit() timer.intermediate("create_vote_object") vote_invalid = (not vote.effects.affects_score and not vote.is_automatic_initial_vote) comment_invalid = comment._spam or comment._deleted if vote_invalid or comment_invalid: timer.stop() timer.flush() return author = Account._byID(comment.author_id) add_queries( queries=[get_comments(author, sort, 'all') for sort in SORTS], insert_items=comment, ) timer.intermediate("author_queries") update_threshold = g.live_config['comment_vote_update_threshold'] update_period = g.live_config['comment_vote_update_period'] skip_score_update = (comment.num_votes > update_threshold and comment.num_votes % update_period != 0) # skip updating scores if this was the automatic initial vote. those # updates will be handled by new_comment. Also only update scores # periodically once a comment has many votes. if not vote.is_automatic_initial_vote and not skip_score_update: # check whether this link is using precomputed sorts, if it is # we'll need to push an update to commentstree_q link = Link._byID(comment.link_id) if get_active_sort_orders_for_link(link): # send this comment to commentstree_q where we will update # CommentScoresByLink, CommentTree (noop), and CommentOrderer add_to_commentstree_q(comment) else: # the link isn't using precomputed sorts, so just update the # scores write_comment_scores(link, [comment]) timer.intermediate("update_scores") timer.stop() timer.flush()