def process_message(msg): from r2.lib.db.queries import ( add_queries, add_to_commentstree_q, get_comments, ) vote_data = json.loads(msg.body) hook = hooks.get_hook('vote.validate_vote_data') if hook.call_until_return(msg=msg, vote_data=vote_data) is False: # Corrupt records in the queue. Ignore them. print "Ignoring invalid vote by %s on %s %s" % ( vote_data.get('user_id', '<unknown>'), vote_data.get('thing_fullname', '<unknown>'), vote_data) return timer = g.stats.get_timer("comment_vote_processor") timer.start() user = Account._byID(vote_data.pop("user_id")) comment = Comment._by_fullname(vote_data.pop("thing_fullname")) print "Processing vote by %s on %s %s" % (user, comment, vote_data) try: vote = Vote( user, comment, direction=vote_data["direction"], date=datetime.utcfromtimestamp(vote_data["date"]), data=vote_data["data"], event_data=vote_data.get("event_data"), ) except TypeError as e: # a vote on an invalid type got in the queue, just skip it g.log.exception("Invalid type: %r", e.message) return vote.commit() timer.intermediate("create_vote_object") vote_valid = vote.is_automatic_initial_vote or vote.effects.affects_score comment_valid = not (comment._spam or comment._deleted) if vote_valid and comment_valid: author = Account._byID(comment.author_id) add_queries( queries=[get_comments(author, sort, 'all') for sort in SORTS], insert_items=comment, ) timer.intermediate("author_queries") # update the score periodically when a comment has many votes update_threshold = g.live_config['comment_vote_update_threshold'] update_period = g.live_config['comment_vote_update_period'] num_votes = comment.num_votes if num_votes <= update_threshold or num_votes % update_period == 0: add_to_commentstree_q(comment) timer.stop() timer.flush()
def get_comment_scores(link, sort, comment_ids, timer): """Retrieve cached sort values for all comments on a post. Arguments: * link_id -- id of the Link containing the comments. * sort -- a string indicating the attribute on the comments to use for generating sort values. Returns a dictionary from cid to a numeric sort value. """ from r2.lib.db import queries from r2.models import CommentScoresByLink if not comment_ids: # no comments means no scores return {} if sort == "_date": # comment ids are monotonically increasing, so we can use them as a # substitute for creation date scores_by_id = {comment_id: comment_id for comment_id in comment_ids} else: scores_by_id36 = CommentScoresByLink.get_scores(link, sort) # we store these id36ed, but there are still bits of the code that # want to deal in integer IDs scores_by_id = { int(id36, 36): score for id36, score in scores_by_id36.iteritems() } scores_needed = set(comment_ids) - set(scores_by_id.keys()) if scores_needed: g.stats.simple_event('comment_tree_bad_sorter') missing_comments = Comment._byID(scores_needed, data=True, return_dict=False) # queue the missing comments to be added to the comments tree, which # will trigger adding their scores for comment in missing_comments: queries.add_to_commentstree_q(comment) if sort == "_qa": scores_by_missing_id36 = _get_qa_comment_scores( link, missing_comments) scores_by_missing = { int(id36, 36): score for id36, score in scores_by_missing_id36.iteritems() } else: scores_by_missing = { comment._id: getattr(comment, sort) for comment in missing_comments } scores_by_id.update(scores_by_missing) timer.intermediate('sort') return scores_by_id
def process_message(msg): from r2.lib.comment_tree import write_comment_scores from r2.lib.db.queries import ( add_queries, add_to_commentstree_q, get_comments, ) from r2.models.builder import get_active_sort_orders_for_link vote_data = json.loads(msg.body) hook = hooks.get_hook('vote.validate_vote_data') if hook.call_until_return(msg=msg, vote_data=vote_data) is False: # Corrupt records in the queue. Ignore them. print "Ignoring invalid vote by %s on %s %s" % ( vote_data.get('user_id', '<unknown>'), vote_data.get('thing_fullname', '<unknown>'), vote_data) return timer = g.stats.get_timer("comment_vote_processor") timer.start() user = Account._byID(vote_data.pop("user_id")) comment = Comment._by_fullname(vote_data.pop("thing_fullname")) print "Processing vote by %s on %s %s" % (user, comment, vote_data) try: vote = Vote( user, comment, direction=vote_data["direction"], date=datetime.utcfromtimestamp(vote_data["date"]), data=vote_data["data"], event_data=vote_data.get("event_data"), ) except TypeError as e: # a vote on an invalid type got in the queue, just skip it g.log.exception("Invalid type: %r", e.message) return vote.commit() timer.intermediate("create_vote_object") vote_invalid = (not vote.effects.affects_score and not vote.is_automatic_initial_vote) comment_invalid = comment._spam or comment._deleted if vote_invalid or comment_invalid: timer.stop() timer.flush() return author = Account._byID(comment.author_id) add_queries( queries=[get_comments(author, sort, 'all') for sort in SORTS], insert_items=comment, ) timer.intermediate("author_queries") update_threshold = g.live_config['comment_vote_update_threshold'] update_period = g.live_config['comment_vote_update_period'] skip_score_update = (comment.num_votes > update_threshold and comment.num_votes % update_period != 0) # skip updating scores if this was the automatic initial vote. those # updates will be handled by new_comment. Also only update scores # periodically once a comment has many votes. if not vote.is_automatic_initial_vote and not skip_score_update: # check whether this link is using precomputed sorts, if it is # we'll need to push an update to commentstree_q link = Link._byID(comment.link_id) if get_active_sort_orders_for_link(link): # send this comment to commentstree_q where we will update # CommentScoresByLink, CommentTree (noop), and CommentOrderer add_to_commentstree_q(comment) else: # the link isn't using precomputed sorts, so just update the # scores write_comment_scores(link, [comment]) timer.intermediate("update_scores") timer.stop() timer.flush()
def get_comment_scores(link, sort, comment_ids, timer): """Retrieve cached sort values for all comments on a post. Arguments: * link_id -- id of the Link containing the comments. * sort -- a string indicating the attribute on the comments to use for generating sort values. Returns a dictionary from cid to a numeric sort value. """ from r2.lib.db import queries from r2.models import CommentScoresByLink if not comment_ids: # no comments means no scores return {} if sort == "_date": # comment ids are monotonically increasing, so we can use them as a # substitute for creation date scores_by_id = {comment_id: comment_id for comment_id in comment_ids} else: scores_by_id36 = CommentScoresByLink.get_scores(link, sort) # we store these id36ed, but there are still bits of the code that # want to deal in integer IDs scores_by_id = { int(id36, 36): score for id36, score in scores_by_id36.iteritems() } scores_needed = set(comment_ids) - set(scores_by_id.keys()) if scores_needed: g.stats.simple_event('comment_tree_bad_sorter') missing_comments = Comment._byID( scores_needed, data=True, return_dict=False) # queue the missing comments to be added to the comments tree, which # will trigger adding their scores for comment in missing_comments: queries.add_to_commentstree_q(comment) if sort == "_qa": scores_by_missing_id36 = _get_qa_comment_scores( link, missing_comments) scores_by_missing = { int(id36, 36): score for id36, score in scores_by_missing_id36.iteritems() } else: scores_by_missing = { comment._id: getattr(comment, sort) for comment in missing_comments } scores_by_id.update(scores_by_missing) timer.intermediate('sort') return scores_by_id