def add_comments(comments): """Add comments to the CommentTree and update scores.""" from r2.models.builder import write_comment_orders link_ids = [comment.link_id for comment in tup(comments)] links = Link._byID(link_ids, data=True) comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) for link_id, link_comments in comments_by_link_id.iteritems(): link = links[link_id] timer = g.stats.get_timer( 'comment_tree.add.%s' % link.comment_tree_version) timer.start() # write scores before CommentTree because the scores must exist for all # comments in the tree for sort in ("_controversy", "_confidence", "_score"): scores_by_comment = { comment._id36: getattr(comment, sort) for comment in link_comments } CommentScoresByLink.set_scores(link, sort, scores_by_comment) scores_by_comment = _get_qa_comment_scores(link, link_comments) CommentScoresByLink.set_scores(link, "_qa", scores_by_comment) timer.intermediate('scores') with CommentTree.mutation_context(link, timeout=180): try: timer.intermediate('lock') comment_tree = CommentTree.by_link(link, timer) timer.intermediate('get') comment_tree.add_comments(link_comments) timer.intermediate('update') except InconsistentCommentTreeError: # failed to add a comment to the CommentTree because its parent # is missing from the tree. this comment will be lost forever # unless a rebuild is performed. comment_ids = [comment._id for comment in link_comments] g.log.error( "comment_tree_inconsistent: %s %s" % (link, comment_ids)) g.stats.simple_event('comment_tree_inconsistent') return # do this under the same lock because we want to ensure we are using # the same version of the CommentTree as was just written write_comment_orders(link) timer.intermediate('write_order') timer.stop()
def add_comments(comments): """Add comments to the CommentTree and update scores.""" from r2.models.builder import write_comment_orders link_ids = [comment.link_id for comment in tup(comments)] links = Link._byID(link_ids, data=True) comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) for link_id, link_comments in comments_by_link_id.iteritems(): link = links[link_id] timer = g.stats.get_timer("comment_tree.add.1") timer.start() # write scores before CommentTree because the scores must exist for all # comments in the tree for sort in ("_controversy", "_confidence", "_score"): scores_by_comment = {comment._id36: getattr(comment, sort) for comment in link_comments} CommentScoresByLink.set_scores(link, sort, scores_by_comment) scores_by_comment = _get_qa_comment_scores(link, link_comments) CommentScoresByLink.set_scores(link, "_qa", scores_by_comment) timer.intermediate("scores") CommentTree.add_comments(link, link_comments) timer.intermediate("update") write_comment_orders(link) timer.intermediate("write_order") timer.stop()
def get_comment_scores(link, sort, comment_ids, timer): """Retrieve cached sort values for all comments on a post. Arguments: * link_id -- id of the Link containing the comments. * sort -- a string indicating the attribute on the comments to use for generating sort values. Returns a dictionary from cid to a numeric sort value. """ from r2.lib.db import queries from r2.models import CommentScoresByLink if not comment_ids: # no comments means no scores return {} if sort == "_date": # comment ids are monotonically increasing, so we can use them as a # substitute for creation date scores_by_id = {comment_id: comment_id for comment_id in comment_ids} else: scores_by_id36 = CommentScoresByLink.get_scores(link, sort) # we store these id36ed, but there are still bits of the code that # want to deal in integer IDs scores_by_id = {int(id36, 36): score for id36, score in scores_by_id36.iteritems()} scores_needed = set(comment_ids) - set(scores_by_id.keys()) if scores_needed: # some scores were missing from CommentScoresByLink--lookup the # comments and calculate the scores. g.stats.simple_event("comment_tree_bad_sorter") missing_comments = Comment._byID(scores_needed, data=True, return_dict=False) if sort == "_qa": scores_by_missing_id36 = _get_qa_comment_scores(link, missing_comments) scores_by_missing = {int(id36, 36): score for id36, score in scores_by_missing_id36.iteritems()} else: scores_by_missing_id36 = {comment._id36: getattr(comment, sort) for comment in missing_comments} scores_by_missing = {int(id36, 36): score for id36, score in scores_by_missing_id36.iteritems()} # up to once per minute write the scores to limit writes but # eventually return us to the correct state. if not g.disallow_db_writes: write_key = "lock:score_{link}{sort}".format(link=link._id36, sort=sort) should_write = g.lock_cache.add(write_key, "", time=60) if should_write: CommentScoresByLink.set_scores(link, sort, scores_by_missing_id36) scores_by_id.update(scores_by_missing) timer.intermediate("sort") return scores_by_id
def add_comments(comments): """Add comments to the CommentTree and update scores.""" from r2.models.builder import write_comment_orders link_ids = [comment.link_id for comment in tup(comments)] links = Link._byID(link_ids, data=True) comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) for link_id, link_comments in comments_by_link_id.iteritems(): link = links[link_id] timer = g.stats.get_timer('comment_tree.add.%s' % link.comment_tree_version) timer.start() # write scores before CommentTree because the scores must exist for all # comments in the tree for sort in ("_controversy", "_confidence", "_score"): scores_by_comment = { comment._id36: getattr(comment, sort) for comment in link_comments } CommentScoresByLink.set_scores(link, sort, scores_by_comment) scores_by_comment = _get_qa_comment_scores(link, link_comments) CommentScoresByLink.set_scores(link, "_qa", scores_by_comment) timer.intermediate('scores') with CommentTree.mutation_context(link, timeout=180): try: timer.intermediate('lock') comment_tree = CommentTree.by_link(link, timer) timer.intermediate('get') comment_tree.add_comments(link_comments) timer.intermediate('update') except InconsistentCommentTreeError: # failed to add a comment to the CommentTree because its parent # is missing from the tree. this comment will be lost forever # unless a rebuild is performed. comment_ids = [comment._id for comment in link_comments] g.log.error("comment_tree_inconsistent: %s %s" % (link, comment_ids)) g.stats.simple_event('comment_tree_inconsistent') return # do this under the same lock because we want to ensure we are using # the same version of the CommentTree as was just written write_comment_orders(link) timer.intermediate('write_order') timer.stop()
def add_comments(comments): """Add comments to the CommentTree and update scores.""" from r2.models.builder import write_comment_orders link_ids = [comment.link_id for comment in tup(comments)] links = Link._byID(link_ids, data=True) comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) for link_id, link_comments in comments_by_link_id.iteritems(): link = links[link_id] timer = g.stats.get_timer('comment_tree.add.1') timer.start() # write scores before CommentTree because the scores must exist for all # comments in the tree for sort in ("_controversy", "_confidence", "_score"): scores_by_comment = { comment._id36: getattr(comment, sort) for comment in link_comments } CommentScoresByLink.set_scores(link, sort, scores_by_comment) scores_by_comment = _get_qa_comment_scores(link, link_comments) CommentScoresByLink.set_scores(link, "_qa", scores_by_comment) timer.intermediate('scores') CommentTree.add_comments(link, link_comments) timer.intermediate('update') write_comment_orders(link) timer.intermediate('write_order') timer.stop()
def add_comments(comments): """Add comments to the CommentTree and update scores.""" from r2.models.builder import write_comment_orders link_ids = [comment.link_id for comment in tup(comments)] links = Link._byID(link_ids, data=True) comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) for link_id, link_comments in comments_by_link_id.iteritems(): link = links[link_id] new_comments = [ comment for comment in link_comments if not comment._deleted ] deleted_comments = [ comment for comment in link_comments if comment._deleted ] timer = g.stats.get_timer('comment_tree.add.%s' % link.comment_tree_version) timer.start() # write scores before CommentTree because the scores must exist for all # comments in the tree for sort in ("_controversy", "_confidence", "_score"): scores_by_comment = { comment._id36: getattr(comment, sort) for comment in link_comments } CommentScoresByLink.set_scores(link, sort, scores_by_comment) scores_by_comment = _get_qa_comment_scores(link, link_comments) CommentScoresByLink.set_scores(link, "_qa", scores_by_comment) timer.intermediate('scores') with CommentTree.mutation_context(link, timeout=180): try: timer.intermediate('lock') comment_tree = CommentTree.by_link(link, timer) timer.intermediate('get') if new_comments: comment_tree.add_comments(new_comments) for comment in deleted_comments: comment_tree.delete_comment(comment, link) timer.intermediate('update') except InconsistentCommentTreeError: # this exception occurs when we add a comment to the tree but # its parent isn't in the tree yet, need to rebuild the tree # from scratch comment_ids = [comment._id for comment in link_comments] g.log.exception( 'add_comments_nolock failed for link %s %s, recomputing', link_id, comment_ids) comment_tree = CommentTree.rebuild(link) timer.intermediate('rebuild') # the tree rebuild updated the link's comment count, so schedule # it for search reindexing link.update_search_index() timer.intermediate('update_search_index') g.stats.simple_event('comment_tree_inconsistent') # do this under the same lock because we want to ensure we are using # the same version of the CommentTree as was just written write_comment_orders(link) timer.intermediate('write_order') timer.stop()
def get_comment_scores(link, sort, comment_ids, timer): """Retrieve cached sort values for all comments on a post. Arguments: * link_id -- id of the Link containing the comments. * sort -- a string indicating the attribute on the comments to use for generating sort values. Returns a dictionary from cid to a numeric sort value. """ from r2.lib.db import queries from r2.models import CommentScoresByLink if not comment_ids: # no comments means no scores return {} if sort == "_date": # comment ids are monotonically increasing, so we can use them as a # substitute for creation date scores_by_id = {comment_id: comment_id for comment_id in comment_ids} else: scores_by_id36 = CommentScoresByLink.get_scores(link, sort) # we store these id36ed, but there are still bits of the code that # want to deal in integer IDs scores_by_id = { int(id36, 36): score for id36, score in scores_by_id36.iteritems() } scores_needed = set(comment_ids) - set(scores_by_id.keys()) if scores_needed: # some scores were missing from CommentScoresByLink--lookup the # comments and calculate the scores. g.stats.simple_event('comment_tree_bad_sorter') missing_comments = Comment._byID(scores_needed, data=True, return_dict=False) if sort == "_qa": scores_by_missing_id36 = _get_qa_comment_scores( link, missing_comments) scores_by_missing = { int(id36, 36): score for id36, score in scores_by_missing_id36.iteritems() } else: scores_by_missing_id36 = { comment._id36: getattr(comment, sort) for comment in missing_comments } scores_by_missing = { int(id36, 36): score for id36, score in scores_by_missing_id36.iteritems() } # up to once per minute write the scores to limit writes but # eventually return us to the correct state. if not g.disallow_db_writes: write_key = "lock:score_{link}{sort}".format( link=link._id36, sort=sort, ) should_write = g.lock_cache.add(write_key, "", time=60) if should_write: CommentScoresByLink.set_scores(link, sort, scores_by_missing_id36) scores_by_id.update(scores_by_missing) timer.intermediate('sort') return scores_by_id
def write_comment_scores(link, comments): for sort in ("_upvotes", "_controversy", "_confidence", "_score", "_qa"): scores = calculate_comment_scores(link, sort, comments) CommentScoresByLink.set_scores(link, sort, scores)
def add_comments(comments): """Add comments to the CommentTree and update scores.""" from r2.models.builder import write_comment_orders link_ids = [comment.link_id for comment in tup(comments)] links = Link._byID(link_ids, data=True) comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) for link_id, link_comments in comments_by_link_id.iteritems(): link = links[link_id] new_comments = [ comment for comment in link_comments if not comment._deleted] deleted_comments = [ comment for comment in link_comments if comment._deleted] timer = g.stats.get_timer( 'comment_tree.add.%s' % link.comment_tree_version) timer.start() # write scores before CommentTree because the scores must exist for all # comments in the tree for sort in ("_controversy", "_confidence", "_score"): scores_by_comment = { comment._id36: getattr(comment, sort) for comment in link_comments } CommentScoresByLink.set_scores(link, sort, scores_by_comment) scores_by_comment = _get_qa_comment_scores(link, link_comments) CommentScoresByLink.set_scores(link, "_qa", scores_by_comment) timer.intermediate('scores') with CommentTree.mutation_context(link, timeout=180): try: timer.intermediate('lock') comment_tree = CommentTree.by_link(link, timer) timer.intermediate('get') if new_comments: comment_tree.add_comments(new_comments) for comment in deleted_comments: comment_tree.delete_comment(comment, link) timer.intermediate('update') except InconsistentCommentTreeError: # this exception occurs when we add a comment to the tree but # its parent isn't in the tree yet, need to rebuild the tree # from scratch comment_ids = [comment._id for comment in link_comments] g.log.exception( 'add_comments_nolock failed for link %s %s, recomputing', link_id, comment_ids) comment_tree = CommentTree.rebuild(link) timer.intermediate('rebuild') # the tree rebuild updated the link's comment count, so schedule # it for search reindexing link.update_search_index() timer.intermediate('update_search_index') g.stats.simple_event('comment_tree_inconsistent') # do this under the same lock because we want to ensure we are using # the same version of the CommentTree as was just written write_comment_orders(link, timer) timer.intermediate('write_order') timer.stop()
def write_comment_scores(link, comments): for sort in ("_controversy", "_confidence", "_score", "_qa"): scores = calculate_comment_scores(link, sort, comments) CommentScoresByLink.set_scores(link, sort, scores)