def add_comments(comments): """Add comments to the CommentTree and update scores.""" from r2.models.builder import write_comment_orders link_ids = [comment.link_id for comment in tup(comments)] links = Link._byID(link_ids, data=True) comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) for link_id, link_comments in comments_by_link_id.iteritems(): link = links[link_id] timer = g.stats.get_timer( 'comment_tree.add.%s' % link.comment_tree_version) timer.start() # write scores before CommentTree because the scores must exist for all # comments in the tree for sort in ("_controversy", "_confidence", "_score"): scores_by_comment = { comment._id36: getattr(comment, sort) for comment in link_comments } CommentScoresByLink.set_scores(link, sort, scores_by_comment) scores_by_comment = _get_qa_comment_scores(link, link_comments) CommentScoresByLink.set_scores(link, "_qa", scores_by_comment) timer.intermediate('scores') with CommentTree.mutation_context(link, timeout=180): try: timer.intermediate('lock') comment_tree = CommentTree.by_link(link, timer) timer.intermediate('get') comment_tree.add_comments(link_comments) timer.intermediate('update') except InconsistentCommentTreeError: # failed to add a comment to the CommentTree because its parent # is missing from the tree. this comment will be lost forever # unless a rebuild is performed. comment_ids = [comment._id for comment in link_comments] g.log.error( "comment_tree_inconsistent: %s %s" % (link, comment_ids)) g.stats.simple_event('comment_tree_inconsistent') return # do this under the same lock because we want to ensure we are using # the same version of the CommentTree as was just written write_comment_orders(link) timer.intermediate('write_order') timer.stop()
def add_comments(comments): """Add comments to the CommentTree and update scores.""" from r2.models.builder import write_comment_orders link_ids = [comment.link_id for comment in tup(comments)] links = Link._byID(link_ids, data=True) comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) for link_id, link_comments in comments_by_link_id.iteritems(): link = links[link_id] timer = g.stats.get_timer("comment_tree.add.1") timer.start() # write scores before CommentTree because the scores must exist for all # comments in the tree for sort in ("_controversy", "_confidence", "_score"): scores_by_comment = {comment._id36: getattr(comment, sort) for comment in link_comments} CommentScoresByLink.set_scores(link, sort, scores_by_comment) scores_by_comment = _get_qa_comment_scores(link, link_comments) CommentScoresByLink.set_scores(link, "_qa", scores_by_comment) timer.intermediate("scores") CommentTree.add_comments(link, link_comments) timer.intermediate("update") write_comment_orders(link) timer.intermediate("write_order") timer.stop()
def add_comments(comments): """Add comments to the CommentTree and update scores.""" from r2.models.builder import write_comment_orders link_ids = [comment.link_id for comment in tup(comments)] links_by_id = Link._byID(link_ids) comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) for link_id, link_comments in comments_by_link_id.iteritems(): link = links_by_id[link_id] timer = g.stats.get_timer('comment_tree.add.1') timer.start() write_comment_scores(link, link_comments) timer.intermediate('scores') CommentTree.add_comments(link, link_comments) timer.intermediate('update') write_comment_orders(link) timer.intermediate('write_order') timer.stop()
def rebuild_comment_tree(link, timer): with CommentTree.mutation_context(link, timeout=180): timer.intermediate('lock') cache = CommentTree.rebuild(link) timer.intermediate('rebuild') # the tree rebuild updated the link's comment count, so schedule it for # search reindexing link.update_search_index() timer.intermediate('update_search_index') return cache
def add_comments(comments): """Add comments to the CommentTree and update scores.""" from r2.models.builder import write_comment_orders link_ids = [comment.link_id for comment in tup(comments)] links = Link._byID(link_ids, data=True) comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) for link_id, link_comments in comments_by_link_id.iteritems(): link = links[link_id] timer = g.stats.get_timer('comment_tree.add.%s' % link.comment_tree_version) timer.start() # write scores before CommentTree because the scores must exist for all # comments in the tree for sort in ("_controversy", "_confidence", "_score"): scores_by_comment = { comment._id36: getattr(comment, sort) for comment in link_comments } CommentScoresByLink.set_scores(link, sort, scores_by_comment) scores_by_comment = _get_qa_comment_scores(link, link_comments) CommentScoresByLink.set_scores(link, "_qa", scores_by_comment) timer.intermediate('scores') with CommentTree.mutation_context(link, timeout=180): try: timer.intermediate('lock') comment_tree = CommentTree.by_link(link, timer) timer.intermediate('get') comment_tree.add_comments(link_comments) timer.intermediate('update') except InconsistentCommentTreeError: # failed to add a comment to the CommentTree because its parent # is missing from the tree. this comment will be lost forever # unless a rebuild is performed. comment_ids = [comment._id for comment in link_comments] g.log.error("comment_tree_inconsistent: %s %s" % (link, comment_ids)) g.stats.simple_event('comment_tree_inconsistent') return # do this under the same lock because we want to ensure we are using # the same version of the CommentTree as was just written write_comment_orders(link) timer.intermediate('write_order') timer.stop()
def add_comments(comments): links = Link._byID([com.link_id for com in tup(comments)], data=True) comments = tup(comments) link_map = {} for com in comments: link_map.setdefault(com.link_id, []).append(com) for link_id, coms in link_map.iteritems(): link = links[link_id] add_comments = [comment for comment in coms if not comment._deleted] delete_comments = (comment for comment in coms if comment._deleted) timer = g.stats.get_timer('comment_tree.add.%s' % link.comment_tree_version) timer.start() try: with CommentTree.mutation_context(link, timeout=30): timer.intermediate('lock') cache = get_comment_tree(link, timer=timer) timer.intermediate('get') if add_comments: cache.add_comments(add_comments) for comment in delete_comments: cache.delete_comment(comment, link) timer.intermediate('update') except InconsistentCommentTreeError: comment_ids = [comment._id for comment in coms] g.log.exception( 'add_comments_nolock failed for link %s %s, recomputing', link_id, comment_ids) rebuild_comment_tree(link, timer=timer) timer.stop() update_comment_votes(coms)
def add_comments(comments): links = Link._byID([com.link_id for com in tup(comments)], data=True) comments = tup(comments) link_map = {} for com in comments: link_map.setdefault(com.link_id, []).append(com) for link_id, coms in link_map.iteritems(): link = links[link_id] add_comments = [comment for comment in coms if not comment._deleted] delete_comments = (comment for comment in coms if comment._deleted) timer = g.stats.get_timer("comment_tree.add.%s" % link.comment_tree_version) timer.start() try: with CommentTree.mutation_context(link): timer.intermediate("lock") cache = get_comment_tree(link, timer=timer) timer.intermediate("get") if add_comments: cache.add_comments(add_comments) for comment in delete_comments: cache.delete_comment(comment, link) timer.intermediate("update") except: g.log.exception("add_comments_nolock failed for link %s, recomputing tree", link_id) # calculate it from scratch get_comment_tree(link, _update=True, timer=timer) timer.stop() update_comment_votes(coms)
def get_comment_tree(link, _update=False, timer=None): if timer is None: timer = SimpleSillyStub() cache = CommentTree.by_link(link) timer.intermediate('load') if cache and not _update: return cache with CommentTree.mutation_context(link, timeout=180): timer.intermediate('lock') cache = CommentTree.rebuild(link) timer.intermediate('rebuild') # the tree rebuild updated the link's comment count, so schedule it for # search reindexing link.update_search_index() timer.intermediate('update_search_index') return cache
def add_comments(comments): links = Link._byID([com.link_id for com in tup(comments)], data=True) comments = tup(comments) link_map = {} for com in comments: link_map.setdefault(com.link_id, []).append(com) for link_id, coms in link_map.iteritems(): link = links[link_id] timer = g.stats.get_timer('comment_tree.add.%s' % link.comment_tree_version) timer.start() try: with CommentTree.mutation_context(link): timer.intermediate('lock') cache = get_comment_tree(link, timer=timer) timer.intermediate('get') cache.add_comments(coms) timer.intermediate('update') except: g.log.exception( 'add_comments_nolock failed for link %s, recomputing tree', link_id) # calculate it from scratch get_comment_tree(link, _update=True, timer=timer) timer.stop() update_comment_votes(coms)
def activate_names_requested_in(link): comment_tree = CommentTree.by_link(link) acceptable_names = [] if comment_tree.tree: top_level_cids = comment_tree.tree[None] comments = chain.from_iterable(Comment._byID(chunk, return_dict=False, data=True) for chunk in in_chunks(top_level_cids)) for comment in sorted(comments, key=lambda c: c._ups, reverse=True): if comment._spam or comment._deleted: continue sanitized = comment.body.strip() match = valid_name_re.search(sanitized) if match: acceptable_names.append((comment, match.group(1))) # we activate one name for each 100% of rev goal met names = acceptable_names[:link.revenue_bucket] activate_names(link, names) activated_names = [name for comment, name in names] link.server_names = activated_names link.flair_text = ", ".join(activated_names) if names else "/dev/null" link.flair_css_class = "goal-bucket-%d" % link.revenue_bucket link._commit()
def add_comments(comments): links = Link._byID([com.link_id for com in tup(comments)], data=True) comments = tup(comments) link_map = {} for com in comments: link_map.setdefault(com.link_id, []).append(com) for link_id, coms in link_map.iteritems(): link = links[link_id] add_comments = [comment for comment in coms if not comment._deleted] delete_comments = (comment for comment in coms if comment._deleted) timer = g.stats.get_timer('comment_tree.add.%s' % link.comment_tree_version) timer.start() try: with CommentTree.mutation_context(link, timeout=30): timer.intermediate('lock') cache = get_comment_tree(link, timer=timer) timer.intermediate('get') if add_comments: cache.add_comments(add_comments) for comment in delete_comments: cache.delete_comment(comment, link) timer.intermediate('update') except InconsistentCommentTreeError: comment_ids = [comment._id for comment in coms] g.log.exception( 'add_comments_nolock failed for link %s %s, recomputing', link_id, comment_ids) rebuild_comment_tree(link, timer=timer) g.stats.simple_event('comment_tree_inconsistent') timer.stop() update_comment_votes(coms)
def get_comment_tree(link, _update=False, timer=None): if timer is None: timer = SimpleSillyStub() cache = CommentTree.by_link(link) timer.intermediate("load") if cache and not _update: return cache with CommentTree.mutation_context(link, timeout=180): timer.intermediate("lock") cache = CommentTree.rebuild(link) timer.intermediate("rebuild") # the tree rebuild updated the link's comment count, so schedule it for # search reindexing from r2.lib.db.queries import changed changed([link]) timer.intermediate("changed") return cache
def add_comments(comments): """Add comments to the CommentTree and update scores.""" from r2.models.builder import write_comment_orders link_ids = [comment.link_id for comment in tup(comments)] links = Link._byID(link_ids, data=True) comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) for link_id, link_comments in comments_by_link_id.iteritems(): link = links[link_id] timer = g.stats.get_timer('comment_tree.add.1') timer.start() # write scores before CommentTree because the scores must exist for all # comments in the tree for sort in ("_controversy", "_confidence", "_score"): scores_by_comment = { comment._id36: getattr(comment, sort) for comment in link_comments } CommentScoresByLink.set_scores(link, sort, scores_by_comment) scores_by_comment = _get_qa_comment_scores(link, link_comments) CommentScoresByLink.set_scores(link, "_qa", scores_by_comment) timer.intermediate('scores') CommentTree.add_comments(link, link_comments) timer.intermediate('update') write_comment_orders(link) timer.intermediate('write_order') timer.stop()
def calculate_comment_scores(link, sort, comments): if sort in ("_controversy", "_confidence", "_score"): scores = { comment._id36: getattr(comment, sort) for comment in comments } elif sort == "_qa": comment_tree = CommentTree.by_link(link) cid_tree = comment_tree.tree scores = _calculate_qa_comment_scores(link, cid_tree, comments) else: raise ValueError("unsupported comment sort %s" % sort) return scores
def calculate_comment_scores(link, sort, comments): if sort in ("_upvotes", "_controversy", "_confidence", "_score"): scores = { comment._id36: getattr(comment, sort) for comment in comments } elif sort == "_qa": comment_tree = CommentTree.by_link(link) cid_tree = comment_tree.tree scores = _calculate_qa_comment_scores(link, cid_tree, comments) else: raise ValueError("unsupported comment sort %s" % sort) return scores
def delete_comment(comment): link = Link._byID(comment.link_id, data=True) timer = g.stats.get_timer('comment_tree.delete.%s' % link.comment_tree_version) timer.start() with CommentTree.mutation_context(link): timer.intermediate('lock') cache = get_comment_tree(link) timer.intermediate('get') cache.delete_comment(comment, link) timer.intermediate('update') from r2.lib.db.queries import changed changed([link]) timer.intermediate('changed') timer.stop()
def make_comment_tree(link): tree = {} def _add_comment(comment, parent): tree[comment.id] = [child.id for child in comment.children] for child in comment.children: _add_comment(child, parent=comment) tree[None] = [comment.id for comment in TREE] for comment in TREE: _add_comment(comment, parent=None) cids, depth, parents = get_tree_details(tree) num_children = calc_num_children(tree) num_children = defaultdict(int, num_children) return CommentTree(link, cids, tree, depth, parents, num_children)
def make_comment_tree(link): cids = [] depth = {} tree = {} parents = {} def _add_comment(comment, parent): cids.append(comment.id) depth[comment.id] = 0 if parent is None else depth[parent.id] + 1 tree[comment.id] = [child.id for child in comment.children] parents[comment.id] = None if parent is None else parent.id for child in comment.children: _add_comment(child, parent=comment) tree[None] = [comment.id for comment in TREE] for comment in TREE: _add_comment(comment, parent=None) return CommentTree(link, cids, tree, depth, parents)
def _get_qa_comment_scores(link, comments): """Return a dict of comment_id36 -> qa score""" # Responder is usually the OP, but there could be support for adding # other answerers in the future. responder_ids = link.responder_ids # An OP response will change the sort value for its parent, so we need # to process the parent, too. parent_cids = [] for comment in comments: if comment.author_id in responder_ids and comment.parent_id: parent_cids.append(comment.parent_id) parent_comments = Comment._byID(parent_cids, data=True, return_dict=False) comments.extend(parent_comments) comment_tree = CommentTree.by_link(link) cid_tree = comment_tree.tree # Fetch the comments in batch to avoid a bunch of separate calls down # the line. all_child_cids = [] for comment in comments: child_cids = cid_tree.get(comment._id, None) if child_cids: all_child_cids.extend(child_cids) all_child_comments = Comment._byID(all_child_cids, data=True) comment_sorter = {} for comment in comments: child_cids = cid_tree.get(comment._id, ()) child_comments = (all_child_comments[cid] for cid in child_cids) sort_value = comment._qa(child_comments, responder_ids) comment_sorter[comment._id36] = sort_value return comment_sorter
def add_comments(comments): """Add comments to the CommentTree and update scores.""" from r2.models.builder import write_comment_orders link_ids = [comment.link_id for comment in tup(comments)] links = Link._byID(link_ids, data=True) comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) for link_id, link_comments in comments_by_link_id.iteritems(): link = links[link_id] new_comments = [ comment for comment in link_comments if not comment._deleted ] deleted_comments = [ comment for comment in link_comments if comment._deleted ] timer = g.stats.get_timer('comment_tree.add.%s' % link.comment_tree_version) timer.start() # write scores before CommentTree because the scores must exist for all # comments in the tree for sort in ("_controversy", "_confidence", "_score"): scores_by_comment = { comment._id36: getattr(comment, sort) for comment in link_comments } CommentScoresByLink.set_scores(link, sort, scores_by_comment) scores_by_comment = _get_qa_comment_scores(link, link_comments) CommentScoresByLink.set_scores(link, "_qa", scores_by_comment) timer.intermediate('scores') with CommentTree.mutation_context(link, timeout=180): try: timer.intermediate('lock') comment_tree = CommentTree.by_link(link, timer) timer.intermediate('get') if new_comments: comment_tree.add_comments(new_comments) for comment in deleted_comments: comment_tree.delete_comment(comment, link) timer.intermediate('update') except InconsistentCommentTreeError: # this exception occurs when we add a comment to the tree but # its parent isn't in the tree yet, need to rebuild the tree # from scratch comment_ids = [comment._id for comment in link_comments] g.log.exception( 'add_comments_nolock failed for link %s %s, recomputing', link_id, comment_ids) comment_tree = CommentTree.rebuild(link) timer.intermediate('rebuild') # the tree rebuild updated the link's comment count, so schedule # it for search reindexing link.update_search_index() timer.intermediate('update_search_index') g.stats.simple_event('comment_tree_inconsistent') # do this under the same lock because we want to ensure we are using # the same version of the CommentTree as was just written write_comment_orders(link) timer.intermediate('write_order') timer.stop()
def link_comments_and_sort(link, sort): from r2.models import CommentSortsCache # This has grown sort of organically over time. Right now the # cache of the comments tree consists in three keys: # 1. The comments_key: A tuple of # (cids, comment_tree, depth, num_children) # given: # cids =:= [comment_id] # comment_tree =:= dict(comment_id -> [comment_id]) # depth =:= dict(comment_id -> int depth) # num_children =:= dict(comment_id -> int num_children) # 2. The parent_comments_key =:= dict(comment_id -> parent_id) # 3. The comments_sorts keys =:= dict(comment_id36 -> float). # These are represented by a Cassandra model # (CommentSortsCache) rather than a permacache key. One of # these exists for each sort (hot, new, etc) timer = g.stats.get_timer('comment_tree.get.%s' % link.comment_tree_version) timer.start() link_id = link._id cache = get_comment_tree(link, timer=timer) cids = cache.cids tree = cache.tree depth = cache.depth num_children = cache.num_children parents = cache.parents # load the sorter sorter = _get_comment_sorter(link_id, sort) sorter_needed = [] if cids and not sorter: sorter_needed = cids g.log.debug("comment_tree.py: sorter (%s) cache miss for Link %s" % (sort, link_id)) sorter = {} sorter_needed = [x for x in cids if x not in sorter] if cids and sorter_needed: g.log.debug( "Error in comment_tree: sorter %r inconsistent (missing %d e.g. %r)" % (sort_comments_key( link_id, sort), len(sorter_needed), sorter_needed[:10])) if not g.disallow_db_writes: update_comment_votes( Comment._byID(sorter_needed, data=True, return_dict=False)) sorter.update(_comment_sorter_from_cids(sorter_needed, sort)) timer.intermediate('sort') if parents is None: g.log.debug("comment_tree.py: parents cache miss for Link %s" % link_id) parents = {} elif cids and not all(x in parents for x in cids): g.log.debug("Error in comment_tree: parents inconsistent for Link %s" % link_id) parents = {} if not parents and len(cids) > 0: with CommentTree.mutation_context(link): # reload under lock so the sorter and parents are consistent timer.intermediate('lock') cache = get_comment_tree(link, timer=timer) cache.parents = cache.parent_dict_from_tree(cache.tree) timer.stop() return (cache.cids, cache.tree, cache.depth, cache.num_children, cache.parents, sorter)
def add_comments(comments): """Add comments to the CommentTree and update scores.""" from r2.models.builder import write_comment_orders link_ids = [comment.link_id for comment in tup(comments)] links = Link._byID(link_ids, data=True) comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) for link_id, link_comments in comments_by_link_id.iteritems(): link = links[link_id] new_comments = [ comment for comment in link_comments if not comment._deleted] deleted_comments = [ comment for comment in link_comments if comment._deleted] timer = g.stats.get_timer( 'comment_tree.add.%s' % link.comment_tree_version) timer.start() # write scores before CommentTree because the scores must exist for all # comments in the tree for sort in ("_controversy", "_confidence", "_score"): scores_by_comment = { comment._id36: getattr(comment, sort) for comment in link_comments } CommentScoresByLink.set_scores(link, sort, scores_by_comment) scores_by_comment = _get_qa_comment_scores(link, link_comments) CommentScoresByLink.set_scores(link, "_qa", scores_by_comment) timer.intermediate('scores') with CommentTree.mutation_context(link, timeout=180): try: timer.intermediate('lock') comment_tree = CommentTree.by_link(link, timer) timer.intermediate('get') if new_comments: comment_tree.add_comments(new_comments) for comment in deleted_comments: comment_tree.delete_comment(comment, link) timer.intermediate('update') except InconsistentCommentTreeError: # this exception occurs when we add a comment to the tree but # its parent isn't in the tree yet, need to rebuild the tree # from scratch comment_ids = [comment._id for comment in link_comments] g.log.exception( 'add_comments_nolock failed for link %s %s, recomputing', link_id, comment_ids) comment_tree = CommentTree.rebuild(link) timer.intermediate('rebuild') # the tree rebuild updated the link's comment count, so schedule # it for search reindexing link.update_search_index() timer.intermediate('update_search_index') g.stats.simple_event('comment_tree_inconsistent') # do this under the same lock because we want to ensure we are using # the same version of the CommentTree as was just written write_comment_orders(link, timer) timer.intermediate('write_order') timer.stop()
def get_comment_tree(link, timer=None): if timer is None: timer = SimpleSillyStub() cache = CommentTree.by_link(link, timer) return cache
def link_comments_and_sort(link, sort): """Fetch and sort the comments on a post. Arguments: * link -- the Link whose comments we want to sort. * sort -- a string indicating the attribute on the comments to use for generating sort values. Returns a tuple in the form (cids, cid_tree, depth, parents, sorter), where the values are as follows: * cids -- a list of the ids of all comments in the thread. * cid_tree -- a dictionary from parent cid to children cids. * depth -- a dictionary from cid to the depth that comment resides in the tree. A top-level comment has depth 0. * parents -- a dictionary from child cid to parent cid. * sorter -- a dictionary from cid to a numeric value to be used for sorting. """ # This has grown sort of organically over time. Right now the # cache of the comments tree consists in three keys: # 1. The comments_key: A tuple of # (cids, comment_tree, depth) # given: # cids =:= [comment_id] # comment_tree =:= dict(comment_id -> [comment_id]) # depth =:= dict(comment_id -> int depth) # 2. The parent_comments_key =:= dict(comment_id -> parent_id) # 3. The comments_sorts keys =:= dict(comment_id36 -> float). # These are represented by a Cassandra model # (CommentScoresByLink) rather than a permacache key. One of # these exists for each sort (hot, new, etc) timer = g.stats.get_timer('comment_tree.get.%s' % link.comment_tree_version) timer.start() cache = get_comment_tree(link, timer=timer) cids = cache.cids tree = cache.tree depth = cache.depth parents = cache.parents # load the sorter sorter = _get_comment_sorter(link, sort) # find comments for which the sort values weren't in the cache sorter_needed = [] if cids and not sorter: sorter_needed = cids g.log.debug("comment_tree.py: sorter %s cache miss for %s", sort, link) sorter = {} sorter_needed = [x for x in cids if x not in sorter] if cids and sorter_needed: g.log.debug( "Error in comment_tree: sorter %s/%s inconsistent (missing %d e.g. %r)" % (link, sort, len(sorter_needed), sorter_needed[:10])) if not g.disallow_db_writes: update_comment_votes( Comment._byID(sorter_needed, data=True, return_dict=False)) # The Q&A sort needs access to attributes the others don't, so save the # extra lookups if we can. data_needed = (sort == '_qa') comments = Comment._byID(sorter_needed, data=data_needed, return_dict=False) sorter.update(_comment_sorter_from_cids(comments, sort, link, tree)) timer.intermediate('sort') if parents is None: g.log.debug("comment_tree.py: parents cache miss for %s", link) parents = {} elif cids and not all(x in parents for x in cids): g.log.debug("Error in comment_tree: parents inconsistent for %s", link) parents = {} if not parents and len(cids) > 0: with CommentTree.mutation_context(link): # reload under lock so the sorter and parents are consistent timer.intermediate('lock') cache = get_comment_tree(link, timer=timer) cache.parents = cache.parent_dict_from_tree(cache.tree) timer.stop() return (cache.cids, cache.tree, cache.depth, cache.parents, sorter)
def link_comments_and_sort(link, sort): from r2.models import CommentSortsCache # This has grown sort of organically over time. Right now the # cache of the comments tree consists in three keys: # 1. The comments_key: A tuple of # (cids, comment_tree, depth, num_children) # given: # cids =:= [comment_id] # comment_tree =:= dict(comment_id -> [comment_id]) # depth =:= dict(comment_id -> int depth) # num_children =:= dict(comment_id -> int num_children) # 2. The parent_comments_key =:= dict(comment_id -> parent_id) # 3. The comments_sorts keys =:= dict(comment_id36 -> float). # These are represented by a Cassandra model # (CommentSortsCache) rather than a permacache key. One of # these exists for each sort (hot, new, etc) timer = g.stats.get_timer('comment_tree.get.%s' % link.comment_tree_version) timer.start() link_id = link._id cache = get_comment_tree(link, timer=timer) cids = cache.cids tree = cache.tree depth = cache.depth num_children = cache.num_children parents = cache.parents # load the sorter sorter = _get_comment_sorter(link_id, sort) sorter_needed = [] if cids and not sorter: sorter_needed = cids g.log.debug("comment_tree.py: sorter (%s) cache miss for Link %s" % (sort, link_id)) sorter = {} sorter_needed = [x for x in cids if x not in sorter] if cids and sorter_needed: g.log.debug( "Error in comment_tree: sorter %r inconsistent (missing %d e.g. %r)" % (sort_comments_key(link_id, sort), len(sorter_needed), sorter_needed[:10])) if not g.disallow_db_writes: update_comment_votes(Comment._byID(sorter_needed, data=True, return_dict=False)) sorter.update(_comment_sorter_from_cids(sorter_needed, sort)) timer.intermediate('sort') if parents is None: g.log.debug("comment_tree.py: parents cache miss for Link %s" % link_id) parents = {} elif cids and not all(x in parents for x in cids): g.log.debug("Error in comment_tree: parents inconsistent for Link %s" % link_id) parents = {} if not parents and len(cids) > 0: with CommentTree.mutation_context(link): # reload under lock so the sorter and parents are consistent timer.intermediate('lock') cache = get_comment_tree(link, timer=timer) cache.parents = cache.parent_dict_from_tree(cache.tree) timer.stop() return (cache.cids, cache.tree, cache.depth, cache.num_children, cache.parents, sorter)
def link_comments_and_sort(link, sort): """Fetch and sort the comments on a post. Arguments: * link -- the Link whose comments we want to sort. * sort -- a string indicating the attribute on the comments to use for generating sort values. Returns a tuple in the form (cids, cid_tree, depth, parents, sorter), where the values are as follows: * cids -- a list of the ids of all comments in the thread. * cid_tree -- a dictionary from parent cid to children cids. * depth -- a dictionary from cid to the depth that comment resides in the tree. A top-level comment has depth 0. * parents -- a dictionary from child cid to parent cid. * sorter -- a dictionary from cid to a numeric value to be used for sorting. """ # This has grown sort of organically over time. Right now the # cache of the comments tree consists in three keys: # 1. The comments_key: A tuple of # (cids, comment_tree, depth) # given: # cids =:= [comment_id] # comment_tree =:= dict(comment_id -> [comment_id]) # depth =:= dict(comment_id -> int depth) # 2. The parent_comments_key =:= dict(comment_id -> parent_id) # 3. The comments_sorts keys =:= dict(comment_id36 -> float). # These are represented by a Cassandra model # (CommentScoresByLink) rather than a permacache key. One of # these exists for each sort (hot, new, etc) timer = g.stats.get_timer('comment_tree.get.%s' % link.comment_tree_version) timer.start() cache = get_comment_tree(link, timer=timer) cids = cache.cids tree = cache.tree depth = cache.depth parents = cache.parents # load the sorter sorter = _get_comment_sorter(link, sort) # find comments for which the sort values weren't in the cache sorter_needed = [] if cids and not sorter: sorter_needed = cids g.log.debug("comment_tree.py: sorter %s cache miss for %s", sort, link) sorter = {} sorter_needed = [x for x in cids if x not in sorter] if cids and sorter_needed: g.log.debug( "Error in comment_tree: sorter %s/%s inconsistent (missing %d e.g. %r)" % (link, sort, len(sorter_needed), sorter_needed[:10])) g.stats.simple_event('comment_tree_bad_sorter') if not g.disallow_db_writes: update_comment_votes(Comment._byID(sorter_needed, data=True, return_dict=False)) # The Q&A sort needs access to attributes the others don't, so save the # extra lookups if we can. data_needed = (sort == '_qa') comments = Comment._byID(sorter_needed, data=data_needed, return_dict=False) sorter.update(_comment_sorter_from_cids(comments, sort, link, tree)) timer.intermediate('sort') if parents is None: g.log.debug("comment_tree.py: parents cache miss for %s", link) parents = {} elif cids and not all(x in parents for x in cids): g.log.debug("Error in comment_tree: parents inconsistent for %s", link) parents = {} if not parents and len(cids) > 0: with CommentTree.mutation_context(link): # reload under lock so the sorter and parents are consistent timer.intermediate('lock') cache = get_comment_tree(link, timer=timer) cache.parents = cache.parent_dict_from_tree(cache.tree) timer.stop() return (cache.cids, cache.tree, cache.depth, cache.parents, sorter)