Exemple #1
3
def add_comments(comments):
    """Add comments to the CommentTree and update scores."""
    from r2.models.builder import write_comment_orders

    link_ids = [comment.link_id for comment in tup(comments)]
    links = Link._byID(link_ids, data=True)

    comments = tup(comments)
    comments_by_link_id = defaultdict(list)
    for comment in comments:
        comments_by_link_id[comment.link_id].append(comment)

    for link_id, link_comments in comments_by_link_id.iteritems():
        link = links[link_id]

        timer = g.stats.get_timer(
            'comment_tree.add.%s' % link.comment_tree_version)
        timer.start()

        # write scores before CommentTree because the scores must exist for all
        # comments in the tree
        for sort in ("_controversy", "_confidence", "_score"):
            scores_by_comment = {
                comment._id36: getattr(comment, sort)
                for comment in link_comments
            }
            CommentScoresByLink.set_scores(link, sort, scores_by_comment)

        scores_by_comment = _get_qa_comment_scores(link, link_comments)
        CommentScoresByLink.set_scores(link, "_qa", scores_by_comment)
        timer.intermediate('scores')

        with CommentTree.mutation_context(link, timeout=180):
            try:
                timer.intermediate('lock')
                comment_tree = CommentTree.by_link(link, timer)
                timer.intermediate('get')
                comment_tree.add_comments(link_comments)
                timer.intermediate('update')
            except InconsistentCommentTreeError:
                # failed to add a comment to the CommentTree because its parent
                # is missing from the tree. this comment will be lost forever
                # unless a rebuild is performed.
                comment_ids = [comment._id for comment in link_comments]
                g.log.error(
                    "comment_tree_inconsistent: %s %s" % (link, comment_ids))
                g.stats.simple_event('comment_tree_inconsistent')
                return

            # do this under the same lock because we want to ensure we are using
            # the same version of the CommentTree as was just written
            write_comment_orders(link)
            timer.intermediate('write_order')

        timer.stop()
Exemple #2
2
def add_comments(comments):
    """Add comments to the CommentTree and update scores."""
    from r2.models.builder import write_comment_orders

    link_ids = [comment.link_id for comment in tup(comments)]
    links = Link._byID(link_ids, data=True)

    comments = tup(comments)
    comments_by_link_id = defaultdict(list)
    for comment in comments:
        comments_by_link_id[comment.link_id].append(comment)

    for link_id, link_comments in comments_by_link_id.iteritems():
        link = links[link_id]

        timer = g.stats.get_timer("comment_tree.add.1")
        timer.start()

        # write scores before CommentTree because the scores must exist for all
        # comments in the tree
        for sort in ("_controversy", "_confidence", "_score"):
            scores_by_comment = {comment._id36: getattr(comment, sort) for comment in link_comments}
            CommentScoresByLink.set_scores(link, sort, scores_by_comment)

        scores_by_comment = _get_qa_comment_scores(link, link_comments)
        CommentScoresByLink.set_scores(link, "_qa", scores_by_comment)
        timer.intermediate("scores")

        CommentTree.add_comments(link, link_comments)
        timer.intermediate("update")

        write_comment_orders(link)
        timer.intermediate("write_order")

        timer.stop()
Exemple #3
1
def add_comments(comments):
    """Add comments to the CommentTree and update scores."""
    from r2.models.builder import write_comment_orders

    link_ids = [comment.link_id for comment in tup(comments)]
    links_by_id = Link._byID(link_ids)

    comments = tup(comments)
    comments_by_link_id = defaultdict(list)
    for comment in comments:
        comments_by_link_id[comment.link_id].append(comment)

    for link_id, link_comments in comments_by_link_id.iteritems():
        link = links_by_id[link_id]

        timer = g.stats.get_timer('comment_tree.add.1')
        timer.start()

        write_comment_scores(link, link_comments)
        timer.intermediate('scores')

        CommentTree.add_comments(link, link_comments)
        timer.intermediate('update')

        write_comment_orders(link)
        timer.intermediate('write_order')

        timer.stop()
Exemple #4
0
def add_comments(comments):
    """Add comments to the CommentTree and update scores."""
    from r2.models.builder import write_comment_orders

    link_ids = [comment.link_id for comment in tup(comments)]
    links_by_id = Link._byID(link_ids)

    comments = tup(comments)
    comments_by_link_id = defaultdict(list)
    for comment in comments:
        comments_by_link_id[comment.link_id].append(comment)

    for link_id, link_comments in comments_by_link_id.iteritems():
        link = links_by_id[link_id]

        timer = g.stats.get_timer('comment_tree.add.1')
        timer.start()

        write_comment_scores(link, link_comments)
        timer.intermediate('scores')

        CommentTree.add_comments(link, link_comments)
        timer.intermediate('update')

        write_comment_orders(link)
        timer.intermediate('write_order')

        timer.stop()
Exemple #5
0
def rebuild_comment_tree(link, timer):
    with CommentTree.mutation_context(link, timeout=180):
        timer.intermediate('lock')
        cache = CommentTree.rebuild(link)
        timer.intermediate('rebuild')
        # the tree rebuild updated the link's comment count, so schedule it for
        # search reindexing
        link.update_search_index()
        timer.intermediate('update_search_index')
        return cache
Exemple #6
0
def rebuild_comment_tree(link, timer):
    with CommentTree.mutation_context(link, timeout=180):
        timer.intermediate('lock')
        cache = CommentTree.rebuild(link)
        timer.intermediate('rebuild')
        # the tree rebuild updated the link's comment count, so schedule it for
        # search reindexing
        link.update_search_index()
        timer.intermediate('update_search_index')
        return cache
Exemple #7
0
def add_comments(comments):
    """Add comments to the CommentTree and update scores."""
    from r2.models.builder import write_comment_orders

    link_ids = [comment.link_id for comment in tup(comments)]
    links = Link._byID(link_ids, data=True)

    comments = tup(comments)
    comments_by_link_id = defaultdict(list)
    for comment in comments:
        comments_by_link_id[comment.link_id].append(comment)

    for link_id, link_comments in comments_by_link_id.iteritems():
        link = links[link_id]

        timer = g.stats.get_timer('comment_tree.add.%s' %
                                  link.comment_tree_version)
        timer.start()

        # write scores before CommentTree because the scores must exist for all
        # comments in the tree
        for sort in ("_controversy", "_confidence", "_score"):
            scores_by_comment = {
                comment._id36: getattr(comment, sort)
                for comment in link_comments
            }
            CommentScoresByLink.set_scores(link, sort, scores_by_comment)

        scores_by_comment = _get_qa_comment_scores(link, link_comments)
        CommentScoresByLink.set_scores(link, "_qa", scores_by_comment)
        timer.intermediate('scores')

        with CommentTree.mutation_context(link, timeout=180):
            try:
                timer.intermediate('lock')
                comment_tree = CommentTree.by_link(link, timer)
                timer.intermediate('get')
                comment_tree.add_comments(link_comments)
                timer.intermediate('update')
            except InconsistentCommentTreeError:
                # failed to add a comment to the CommentTree because its parent
                # is missing from the tree. this comment will be lost forever
                # unless a rebuild is performed.
                comment_ids = [comment._id for comment in link_comments]
                g.log.error("comment_tree_inconsistent: %s %s" %
                            (link, comment_ids))
                g.stats.simple_event('comment_tree_inconsistent')
                return

            # do this under the same lock because we want to ensure we are using
            # the same version of the CommentTree as was just written
            write_comment_orders(link)
            timer.intermediate('write_order')

        timer.stop()
Exemple #8
0
def add_comments(comments):
    links = Link._byID([com.link_id for com in tup(comments)], data=True)
    comments = tup(comments)

    link_map = {}
    for com in comments:
        link_map.setdefault(com.link_id, []).append(com)

    for link_id, coms in link_map.iteritems():
        link = links[link_id]
        add_comments = [comment for comment in coms if not comment._deleted]
        delete_comments = (comment for comment in coms if comment._deleted)
        timer = g.stats.get_timer('comment_tree.add.%s'
                                  % link.comment_tree_version)
        timer.start()
        try:
            with CommentTree.mutation_context(link, timeout=30):
                timer.intermediate('lock')
                cache = get_comment_tree(link, timer=timer)
                timer.intermediate('get')
                if add_comments:
                    cache.add_comments(add_comments)
                for comment in delete_comments:
                    cache.delete_comment(comment, link)
                timer.intermediate('update')
        except InconsistentCommentTreeError:
            comment_ids = [comment._id for comment in coms]
            g.log.exception(
                'add_comments_nolock failed for link %s %s, recomputing',
                link_id, comment_ids)
            rebuild_comment_tree(link, timer=timer)

        timer.stop()
        update_comment_votes(coms)
def add_comments(comments):
    links = Link._byID([com.link_id for com in tup(comments)], data=True)
    comments = tup(comments)

    link_map = {}
    for com in comments:
        link_map.setdefault(com.link_id, []).append(com)

    for link_id, coms in link_map.iteritems():
        link = links[link_id]
        add_comments = [comment for comment in coms if not comment._deleted]
        delete_comments = (comment for comment in coms if comment._deleted)
        timer = g.stats.get_timer("comment_tree.add.%s" % link.comment_tree_version)
        timer.start()
        try:
            with CommentTree.mutation_context(link):
                timer.intermediate("lock")
                cache = get_comment_tree(link, timer=timer)
                timer.intermediate("get")
                if add_comments:
                    cache.add_comments(add_comments)
                for comment in delete_comments:
                    cache.delete_comment(comment, link)
                timer.intermediate("update")
        except:
            g.log.exception("add_comments_nolock failed for link %s, recomputing tree", link_id)

            # calculate it from scratch
            get_comment_tree(link, _update=True, timer=timer)
        timer.stop()
        update_comment_votes(coms)
Exemple #10
0
def get_comment_tree(link, _update=False, timer=None):
    if timer is None:
        timer = SimpleSillyStub()
    cache = CommentTree.by_link(link)
    timer.intermediate('load')
    if cache and not _update:
        return cache
    with CommentTree.mutation_context(link, timeout=180):
        timer.intermediate('lock')
        cache = CommentTree.rebuild(link)
        timer.intermediate('rebuild')
        # the tree rebuild updated the link's comment count, so schedule it for
        # search reindexing
        link.update_search_index()
        timer.intermediate('update_search_index')
        return cache
Exemple #11
0
def add_comments(comments):
    links = Link._byID([com.link_id for com in tup(comments)], data=True)
    comments = tup(comments)

    link_map = {}
    for com in comments:
        link_map.setdefault(com.link_id, []).append(com)

    for link_id, coms in link_map.iteritems():
        link = links[link_id]
        timer = g.stats.get_timer('comment_tree.add.%s' %
                                  link.comment_tree_version)
        timer.start()
        try:
            with CommentTree.mutation_context(link):
                timer.intermediate('lock')
                cache = get_comment_tree(link, timer=timer)
                timer.intermediate('get')
                cache.add_comments(coms)
                timer.intermediate('update')
        except:
            g.log.exception(
                'add_comments_nolock failed for link %s, recomputing tree',
                link_id)

            # calculate it from scratch
            get_comment_tree(link, _update=True, timer=timer)
        timer.stop()
        update_comment_votes(coms)
Exemple #12
0
def add_comments(comments):
    links = Link._byID([com.link_id for com in tup(comments)], data=True)
    comments = tup(comments)

    link_map = {}
    for com in comments:
        link_map.setdefault(com.link_id, []).append(com)

    for link_id, coms in link_map.iteritems():
        link = links[link_id]
        timer = g.stats.get_timer('comment_tree.add.%s'
                                  % link.comment_tree_version)
        timer.start()
        try:
            with CommentTree.mutation_context(link):
                timer.intermediate('lock')
                cache = get_comment_tree(link, timer=timer)
                timer.intermediate('get')
                cache.add_comments(coms)
                timer.intermediate('update')
        except:
            g.log.exception(
                'add_comments_nolock failed for link %s, recomputing tree',
                link_id)

            # calculate it from scratch
            get_comment_tree(link, _update=True, timer=timer)
        timer.stop()
        update_comment_votes(coms)
def activate_names_requested_in(link):
    comment_tree = CommentTree.by_link(link)
    acceptable_names = []
    if comment_tree.tree:
        top_level_cids = comment_tree.tree[None]
        comments = chain.from_iterable(Comment._byID(chunk, return_dict=False,
                                                     data=True)
                                       for chunk in in_chunks(top_level_cids))

        for comment in sorted(comments, key=lambda c: c._ups, reverse=True):
            if comment._spam or comment._deleted:
                continue

            sanitized = comment.body.strip()
            match = valid_name_re.search(sanitized)
            if match:
                acceptable_names.append((comment, match.group(1)))

    # we activate one name for each 100% of rev goal met
    names = acceptable_names[:link.revenue_bucket]
    activate_names(link, names)

    activated_names = [name for comment, name in names]
    link.server_names = activated_names
    link.flair_text = ", ".join(activated_names) if names else "/dev/null"
    link.flair_css_class = "goal-bucket-%d" % link.revenue_bucket
    link._commit()
Exemple #14
0
def add_comments(comments):
    links = Link._byID([com.link_id for com in tup(comments)], data=True)
    comments = tup(comments)

    link_map = {}
    for com in comments:
        link_map.setdefault(com.link_id, []).append(com)

    for link_id, coms in link_map.iteritems():
        link = links[link_id]
        add_comments = [comment for comment in coms if not comment._deleted]
        delete_comments = (comment for comment in coms if comment._deleted)
        timer = g.stats.get_timer('comment_tree.add.%s' %
                                  link.comment_tree_version)
        timer.start()
        try:
            with CommentTree.mutation_context(link, timeout=30):
                timer.intermediate('lock')
                cache = get_comment_tree(link, timer=timer)
                timer.intermediate('get')
                if add_comments:
                    cache.add_comments(add_comments)
                for comment in delete_comments:
                    cache.delete_comment(comment, link)
                timer.intermediate('update')
        except InconsistentCommentTreeError:
            comment_ids = [comment._id for comment in coms]
            g.log.exception(
                'add_comments_nolock failed for link %s %s, recomputing',
                link_id, comment_ids)
            rebuild_comment_tree(link, timer=timer)
            g.stats.simple_event('comment_tree_inconsistent')

        timer.stop()
        update_comment_votes(coms)
Exemple #15
0
def get_comment_tree(link, _update=False, timer=None):
    if timer is None:
        timer = SimpleSillyStub()
    cache = CommentTree.by_link(link)
    timer.intermediate('load')
    if cache and not _update:
        return cache
    with CommentTree.mutation_context(link, timeout=180):
        timer.intermediate('lock')
        cache = CommentTree.rebuild(link)
        timer.intermediate('rebuild')
        # the tree rebuild updated the link's comment count, so schedule it for
        # search reindexing
        link.update_search_index()
        timer.intermediate('update_search_index')
        return cache
Exemple #16
0
def get_comment_tree(link, _update=False, timer=None):
    if timer is None:
        timer = SimpleSillyStub()
    cache = CommentTree.by_link(link)
    timer.intermediate("load")
    if cache and not _update:
        return cache
    with CommentTree.mutation_context(link, timeout=180):
        timer.intermediate("lock")
        cache = CommentTree.rebuild(link)
        timer.intermediate("rebuild")
        # the tree rebuild updated the link's comment count, so schedule it for
        # search reindexing
        from r2.lib.db.queries import changed

        changed([link])
        timer.intermediate("changed")
        return cache
Exemple #17
0
def add_comments(comments):
    """Add comments to the CommentTree and update scores."""
    from r2.models.builder import write_comment_orders

    link_ids = [comment.link_id for comment in tup(comments)]
    links = Link._byID(link_ids, data=True)

    comments = tup(comments)
    comments_by_link_id = defaultdict(list)
    for comment in comments:
        comments_by_link_id[comment.link_id].append(comment)

    for link_id, link_comments in comments_by_link_id.iteritems():
        link = links[link_id]

        timer = g.stats.get_timer('comment_tree.add.1')
        timer.start()

        # write scores before CommentTree because the scores must exist for all
        # comments in the tree
        for sort in ("_controversy", "_confidence", "_score"):
            scores_by_comment = {
                comment._id36: getattr(comment, sort)
                for comment in link_comments
            }
            CommentScoresByLink.set_scores(link, sort, scores_by_comment)

        scores_by_comment = _get_qa_comment_scores(link, link_comments)
        CommentScoresByLink.set_scores(link, "_qa", scores_by_comment)
        timer.intermediate('scores')

        CommentTree.add_comments(link, link_comments)
        timer.intermediate('update')

        write_comment_orders(link)
        timer.intermediate('write_order')

        timer.stop()
Exemple #18
0
def calculate_comment_scores(link, sort, comments):
    if sort in ("_controversy", "_confidence", "_score"):
        scores = {
            comment._id36: getattr(comment, sort)
            for comment in comments
        }
    elif sort == "_qa":
        comment_tree = CommentTree.by_link(link)
        cid_tree = comment_tree.tree
        scores = _calculate_qa_comment_scores(link, cid_tree, comments)
    else:
        raise ValueError("unsupported comment sort %s" % sort)

    return scores
Exemple #19
0
def calculate_comment_scores(link, sort, comments):
    if sort in ("_upvotes", "_controversy", "_confidence", "_score"):
        scores = {
            comment._id36: getattr(comment, sort)
            for comment in comments
        }
    elif sort == "_qa":
        comment_tree = CommentTree.by_link(link)
        cid_tree = comment_tree.tree
        scores = _calculate_qa_comment_scores(link, cid_tree, comments)
    else:
        raise ValueError("unsupported comment sort %s" % sort)

    return scores
Exemple #20
0
def delete_comment(comment):
    link = Link._byID(comment.link_id, data=True)
    timer = g.stats.get_timer('comment_tree.delete.%s'
                              % link.comment_tree_version)
    timer.start()
    with CommentTree.mutation_context(link):
        timer.intermediate('lock')
        cache = get_comment_tree(link)
        timer.intermediate('get')
        cache.delete_comment(comment, link)
        timer.intermediate('update')
        from r2.lib.db.queries import changed
        changed([link])
        timer.intermediate('changed')
    timer.stop()
Exemple #21
0
def delete_comment(comment):
    link = Link._byID(comment.link_id, data=True)
    timer = g.stats.get_timer('comment_tree.delete.%s' %
                              link.comment_tree_version)
    timer.start()
    with CommentTree.mutation_context(link):
        timer.intermediate('lock')
        cache = get_comment_tree(link)
        timer.intermediate('get')
        cache.delete_comment(comment, link)
        timer.intermediate('update')
        from r2.lib.db.queries import changed
        changed([link])
        timer.intermediate('changed')
    timer.stop()
Exemple #22
0
def make_comment_tree(link):
    tree = {}

    def _add_comment(comment, parent):
        tree[comment.id] = [child.id for child in comment.children]
        for child in comment.children:
            _add_comment(child, parent=comment)

    tree[None] = [comment.id for comment in TREE]

    for comment in TREE:
        _add_comment(comment, parent=None)

    cids, depth, parents = get_tree_details(tree)
    num_children = calc_num_children(tree)
    num_children = defaultdict(int, num_children)

    return CommentTree(link, cids, tree, depth, parents, num_children)
Exemple #23
0
def make_comment_tree(link):
    cids = []
    depth = {}
    tree = {}
    parents = {}

    def _add_comment(comment, parent):
        cids.append(comment.id)
        depth[comment.id] = 0 if parent is None else depth[parent.id] + 1
        tree[comment.id] = [child.id for child in comment.children]
        parents[comment.id] = None if parent is None else parent.id

        for child in comment.children:
            _add_comment(child, parent=comment)

    tree[None] = [comment.id for comment in TREE]

    for comment in TREE:
        _add_comment(comment, parent=None)

    return CommentTree(link, cids, tree, depth, parents)
Exemple #24
0
def _get_qa_comment_scores(link, comments):
    """Return a dict of comment_id36 -> qa score"""

    # Responder is usually the OP, but there could be support for adding
    # other answerers in the future.
    responder_ids = link.responder_ids

    # An OP response will change the sort value for its parent, so we need
    # to process the parent, too.
    parent_cids = []
    for comment in comments:
        if comment.author_id in responder_ids and comment.parent_id:
            parent_cids.append(comment.parent_id)
    parent_comments = Comment._byID(parent_cids, data=True, return_dict=False)
    comments.extend(parent_comments)

    comment_tree = CommentTree.by_link(link)
    cid_tree = comment_tree.tree

    # Fetch the comments in batch to avoid a bunch of separate calls down
    # the line.
    all_child_cids = []
    for comment in comments:
        child_cids = cid_tree.get(comment._id, None)
        if child_cids:
            all_child_cids.extend(child_cids)
    all_child_comments = Comment._byID(all_child_cids, data=True)

    comment_sorter = {}
    for comment in comments:
        child_cids = cid_tree.get(comment._id, ())
        child_comments = (all_child_comments[cid] for cid in child_cids)
        sort_value = comment._qa(child_comments, responder_ids)
        comment_sorter[comment._id36] = sort_value

    return comment_sorter
Exemple #25
0
def _get_qa_comment_scores(link, comments):
    """Return a dict of comment_id36 -> qa score"""

    # Responder is usually the OP, but there could be support for adding
    # other answerers in the future.
    responder_ids = link.responder_ids

    # An OP response will change the sort value for its parent, so we need
    # to process the parent, too.
    parent_cids = []
    for comment in comments:
        if comment.author_id in responder_ids and comment.parent_id:
            parent_cids.append(comment.parent_id)
    parent_comments = Comment._byID(parent_cids, data=True, return_dict=False)
    comments.extend(parent_comments)

    comment_tree = CommentTree.by_link(link)
    cid_tree = comment_tree.tree

    # Fetch the comments in batch to avoid a bunch of separate calls down
    # the line.
    all_child_cids = []
    for comment in comments:
        child_cids = cid_tree.get(comment._id, None)
        if child_cids:
            all_child_cids.extend(child_cids)
    all_child_comments = Comment._byID(all_child_cids, data=True)

    comment_sorter = {}
    for comment in comments:
        child_cids = cid_tree.get(comment._id, ())
        child_comments = (all_child_comments[cid] for cid in child_cids)
        sort_value = comment._qa(child_comments, responder_ids)
        comment_sorter[comment._id36] = sort_value

    return comment_sorter
Exemple #26
0
def add_comments(comments):
    """Add comments to the CommentTree and update scores."""
    from r2.models.builder import write_comment_orders

    link_ids = [comment.link_id for comment in tup(comments)]
    links = Link._byID(link_ids, data=True)

    comments = tup(comments)
    comments_by_link_id = defaultdict(list)
    for comment in comments:
        comments_by_link_id[comment.link_id].append(comment)

    for link_id, link_comments in comments_by_link_id.iteritems():
        link = links[link_id]

        new_comments = [
            comment for comment in link_comments if not comment._deleted
        ]
        deleted_comments = [
            comment for comment in link_comments if comment._deleted
        ]
        timer = g.stats.get_timer('comment_tree.add.%s' %
                                  link.comment_tree_version)
        timer.start()

        # write scores before CommentTree because the scores must exist for all
        # comments in the tree
        for sort in ("_controversy", "_confidence", "_score"):
            scores_by_comment = {
                comment._id36: getattr(comment, sort)
                for comment in link_comments
            }
            CommentScoresByLink.set_scores(link, sort, scores_by_comment)

        scores_by_comment = _get_qa_comment_scores(link, link_comments)
        CommentScoresByLink.set_scores(link, "_qa", scores_by_comment)
        timer.intermediate('scores')

        with CommentTree.mutation_context(link, timeout=180):
            try:
                timer.intermediate('lock')
                comment_tree = CommentTree.by_link(link, timer)
                timer.intermediate('get')

                if new_comments:
                    comment_tree.add_comments(new_comments)

                for comment in deleted_comments:
                    comment_tree.delete_comment(comment, link)

                timer.intermediate('update')
            except InconsistentCommentTreeError:
                # this exception occurs when we add a comment to the tree but
                # its parent isn't in the tree yet, need to rebuild the tree
                # from scratch

                comment_ids = [comment._id for comment in link_comments]
                g.log.exception(
                    'add_comments_nolock failed for link %s %s, recomputing',
                    link_id, comment_ids)

                comment_tree = CommentTree.rebuild(link)
                timer.intermediate('rebuild')
                # the tree rebuild updated the link's comment count, so schedule
                # it for search reindexing
                link.update_search_index()
                timer.intermediate('update_search_index')
                g.stats.simple_event('comment_tree_inconsistent')

            # do this under the same lock because we want to ensure we are using
            # the same version of the CommentTree as was just written
            write_comment_orders(link)
            timer.intermediate('write_order')

        timer.stop()
Exemple #27
0
def link_comments_and_sort(link, sort):
    from r2.models import CommentSortsCache

    # This has grown sort of organically over time. Right now the
    # cache of the comments tree consists in three keys:
    # 1. The comments_key: A tuple of
    #      (cids, comment_tree, depth, num_children)
    #    given:
    #      cids         =:= [comment_id]
    #      comment_tree =:= dict(comment_id -> [comment_id])
    #      depth        =:= dict(comment_id -> int depth)
    #      num_children =:= dict(comment_id -> int num_children)
    # 2. The parent_comments_key =:= dict(comment_id -> parent_id)
    # 3. The comments_sorts keys =:= dict(comment_id36 -> float).
    #    These are represented by a Cassandra model
    #    (CommentSortsCache) rather than a permacache key. One of
    #    these exists for each sort (hot, new, etc)

    timer = g.stats.get_timer('comment_tree.get.%s' %
                              link.comment_tree_version)
    timer.start()

    link_id = link._id
    cache = get_comment_tree(link, timer=timer)
    cids = cache.cids
    tree = cache.tree
    depth = cache.depth
    num_children = cache.num_children
    parents = cache.parents

    # load the sorter
    sorter = _get_comment_sorter(link_id, sort)

    sorter_needed = []
    if cids and not sorter:
        sorter_needed = cids
        g.log.debug("comment_tree.py: sorter (%s) cache miss for Link %s" %
                    (sort, link_id))
        sorter = {}

    sorter_needed = [x for x in cids if x not in sorter]
    if cids and sorter_needed:
        g.log.debug(
            "Error in comment_tree: sorter %r inconsistent (missing %d e.g. %r)"
            % (sort_comments_key(
                link_id, sort), len(sorter_needed), sorter_needed[:10]))
        if not g.disallow_db_writes:
            update_comment_votes(
                Comment._byID(sorter_needed, data=True, return_dict=False))

        sorter.update(_comment_sorter_from_cids(sorter_needed, sort))
        timer.intermediate('sort')

    if parents is None:
        g.log.debug("comment_tree.py: parents cache miss for Link %s" %
                    link_id)
        parents = {}
    elif cids and not all(x in parents for x in cids):
        g.log.debug("Error in comment_tree: parents inconsistent for Link %s" %
                    link_id)
        parents = {}

    if not parents and len(cids) > 0:
        with CommentTree.mutation_context(link):
            # reload under lock so the sorter and parents are consistent
            timer.intermediate('lock')
            cache = get_comment_tree(link, timer=timer)
            cache.parents = cache.parent_dict_from_tree(cache.tree)

    timer.stop()

    return (cache.cids, cache.tree, cache.depth, cache.num_children,
            cache.parents, sorter)
Exemple #28
0
def add_comments(comments):
    """Add comments to the CommentTree and update scores."""
    from r2.models.builder import write_comment_orders

    link_ids = [comment.link_id for comment in tup(comments)]
    links = Link._byID(link_ids, data=True)

    comments = tup(comments)
    comments_by_link_id = defaultdict(list)
    for comment in comments:
        comments_by_link_id[comment.link_id].append(comment)

    for link_id, link_comments in comments_by_link_id.iteritems():
        link = links[link_id]

        new_comments = [
            comment for comment in link_comments if not comment._deleted]
        deleted_comments = [
            comment for comment in link_comments if comment._deleted]
        timer = g.stats.get_timer(
            'comment_tree.add.%s' % link.comment_tree_version)
        timer.start()

        # write scores before CommentTree because the scores must exist for all
        # comments in the tree
        for sort in ("_controversy", "_confidence", "_score"):
            scores_by_comment = {
                comment._id36: getattr(comment, sort)
                for comment in link_comments
            }
            CommentScoresByLink.set_scores(link, sort, scores_by_comment)

        scores_by_comment = _get_qa_comment_scores(link, link_comments)
        CommentScoresByLink.set_scores(link, "_qa", scores_by_comment)
        timer.intermediate('scores')

        with CommentTree.mutation_context(link, timeout=180):
            try:
                timer.intermediate('lock')
                comment_tree = CommentTree.by_link(link, timer)
                timer.intermediate('get')

                if new_comments:
                    comment_tree.add_comments(new_comments)

                for comment in deleted_comments:
                    comment_tree.delete_comment(comment, link)

                timer.intermediate('update')
            except InconsistentCommentTreeError:
                # this exception occurs when we add a comment to the tree but
                # its parent isn't in the tree yet, need to rebuild the tree
                # from scratch

                comment_ids = [comment._id for comment in link_comments]
                g.log.exception(
                    'add_comments_nolock failed for link %s %s, recomputing',
                    link_id, comment_ids)

                comment_tree = CommentTree.rebuild(link)
                timer.intermediate('rebuild')
                # the tree rebuild updated the link's comment count, so schedule
                # it for search reindexing
                link.update_search_index()
                timer.intermediate('update_search_index')
                g.stats.simple_event('comment_tree_inconsistent')

            # do this under the same lock because we want to ensure we are using
            # the same version of the CommentTree as was just written
            write_comment_orders(link, timer)
            timer.intermediate('write_order')

        timer.stop()
Exemple #29
0
def get_comment_tree(link, timer=None):
    if timer is None:
        timer = SimpleSillyStub()

    cache = CommentTree.by_link(link, timer)
    return cache
Exemple #30
0
def link_comments_and_sort(link, sort):
    """Fetch and sort the comments on a post.

    Arguments:

    * link -- the Link whose comments we want to sort.
    * sort -- a string indicating the attribute on the comments to use for
      generating sort values.

    Returns a tuple in the form (cids, cid_tree, depth, parents, sorter), where
    the values are as follows:

    * cids -- a list of the ids of all comments in the thread.
    * cid_tree -- a dictionary from parent cid to children cids.
    * depth -- a dictionary from cid to the depth that comment resides in the
      tree. A top-level comment has depth 0.
    * parents -- a dictionary from child cid to parent cid.
    * sorter -- a dictionary from cid to a numeric value to be used for
      sorting.
    """

    # This has grown sort of organically over time. Right now the
    # cache of the comments tree consists in three keys:
    # 1. The comments_key: A tuple of
    #      (cids, comment_tree, depth)
    #    given:
    #      cids         =:= [comment_id]
    #      comment_tree =:= dict(comment_id -> [comment_id])
    #      depth        =:= dict(comment_id -> int depth)
    # 2. The parent_comments_key =:= dict(comment_id -> parent_id)
    # 3. The comments_sorts keys =:= dict(comment_id36 -> float).
    #    These are represented by a Cassandra model
    #    (CommentScoresByLink) rather than a permacache key. One of
    #    these exists for each sort (hot, new, etc)

    timer = g.stats.get_timer('comment_tree.get.%s' %
                              link.comment_tree_version)
    timer.start()

    cache = get_comment_tree(link, timer=timer)
    cids = cache.cids
    tree = cache.tree
    depth = cache.depth
    parents = cache.parents

    # load the sorter
    sorter = _get_comment_sorter(link, sort)

    # find comments for which the sort values weren't in the cache
    sorter_needed = []
    if cids and not sorter:
        sorter_needed = cids
        g.log.debug("comment_tree.py: sorter %s cache miss for %s", sort, link)
        sorter = {}

    sorter_needed = [x for x in cids if x not in sorter]
    if cids and sorter_needed:
        g.log.debug(
            "Error in comment_tree: sorter %s/%s inconsistent (missing %d e.g. %r)"
            % (link, sort, len(sorter_needed), sorter_needed[:10]))
        if not g.disallow_db_writes:
            update_comment_votes(
                Comment._byID(sorter_needed, data=True, return_dict=False))

        # The Q&A sort needs access to attributes the others don't, so save the
        # extra lookups if we can.
        data_needed = (sort == '_qa')
        comments = Comment._byID(sorter_needed,
                                 data=data_needed,
                                 return_dict=False)
        sorter.update(_comment_sorter_from_cids(comments, sort, link, tree))
        timer.intermediate('sort')

    if parents is None:
        g.log.debug("comment_tree.py: parents cache miss for %s", link)
        parents = {}
    elif cids and not all(x in parents for x in cids):
        g.log.debug("Error in comment_tree: parents inconsistent for %s", link)
        parents = {}

    if not parents and len(cids) > 0:
        with CommentTree.mutation_context(link):
            # reload under lock so the sorter and parents are consistent
            timer.intermediate('lock')
            cache = get_comment_tree(link, timer=timer)
            cache.parents = cache.parent_dict_from_tree(cache.tree)

    timer.stop()

    return (cache.cids, cache.tree, cache.depth, cache.parents, sorter)
Exemple #31
0
def link_comments_and_sort(link, sort):
    from r2.models import CommentSortsCache

    # This has grown sort of organically over time. Right now the
    # cache of the comments tree consists in three keys:
    # 1. The comments_key: A tuple of
    #      (cids, comment_tree, depth, num_children)
    #    given:
    #      cids         =:= [comment_id]
    #      comment_tree =:= dict(comment_id -> [comment_id])
    #      depth        =:= dict(comment_id -> int depth)
    #      num_children =:= dict(comment_id -> int num_children)
    # 2. The parent_comments_key =:= dict(comment_id -> parent_id)
    # 3. The comments_sorts keys =:= dict(comment_id36 -> float).
    #    These are represented by a Cassandra model
    #    (CommentSortsCache) rather than a permacache key. One of
    #    these exists for each sort (hot, new, etc)

    timer = g.stats.get_timer('comment_tree.get.%s' % link.comment_tree_version)
    timer.start()

    link_id = link._id
    cache = get_comment_tree(link, timer=timer)
    cids = cache.cids
    tree = cache.tree
    depth = cache.depth
    num_children = cache.num_children
    parents = cache.parents

    # load the sorter
    sorter = _get_comment_sorter(link_id, sort)

    sorter_needed = []
    if cids and not sorter:
        sorter_needed = cids
        g.log.debug("comment_tree.py: sorter (%s) cache miss for Link %s"
                    % (sort, link_id))
        sorter = {}

    sorter_needed = [x for x in cids if x not in sorter]
    if cids and sorter_needed:
        g.log.debug(
            "Error in comment_tree: sorter %r inconsistent (missing %d e.g. %r)"
            % (sort_comments_key(link_id, sort), len(sorter_needed), sorter_needed[:10]))
        if not g.disallow_db_writes:
            update_comment_votes(Comment._byID(sorter_needed, data=True, return_dict=False))

        sorter.update(_comment_sorter_from_cids(sorter_needed, sort))
        timer.intermediate('sort')

    if parents is None:
        g.log.debug("comment_tree.py: parents cache miss for Link %s"
                    % link_id)
        parents = {}
    elif cids and not all(x in parents for x in cids):
        g.log.debug("Error in comment_tree: parents inconsistent for Link %s"
                    % link_id)
        parents = {}

    if not parents and len(cids) > 0:
        with CommentTree.mutation_context(link):
            # reload under lock so the sorter and parents are consistent
            timer.intermediate('lock')
            cache = get_comment_tree(link, timer=timer)
            cache.parents = cache.parent_dict_from_tree(cache.tree)

    timer.stop()

    return (cache.cids, cache.tree, cache.depth, cache.num_children,
            cache.parents, sorter)
Exemple #32
0
def link_comments_and_sort(link, sort):
    """Fetch and sort the comments on a post.

    Arguments:

    * link -- the Link whose comments we want to sort.
    * sort -- a string indicating the attribute on the comments to use for
      generating sort values.

    Returns a tuple in the form (cids, cid_tree, depth, parents, sorter), where
    the values are as follows:

    * cids -- a list of the ids of all comments in the thread.
    * cid_tree -- a dictionary from parent cid to children cids.
    * depth -- a dictionary from cid to the depth that comment resides in the
      tree. A top-level comment has depth 0.
    * parents -- a dictionary from child cid to parent cid.
    * sorter -- a dictionary from cid to a numeric value to be used for
      sorting.
    """

    # This has grown sort of organically over time. Right now the
    # cache of the comments tree consists in three keys:
    # 1. The comments_key: A tuple of
    #      (cids, comment_tree, depth)
    #    given:
    #      cids         =:= [comment_id]
    #      comment_tree =:= dict(comment_id -> [comment_id])
    #      depth        =:= dict(comment_id -> int depth)
    # 2. The parent_comments_key =:= dict(comment_id -> parent_id)
    # 3. The comments_sorts keys =:= dict(comment_id36 -> float).
    #    These are represented by a Cassandra model
    #    (CommentScoresByLink) rather than a permacache key. One of
    #    these exists for each sort (hot, new, etc)

    timer = g.stats.get_timer('comment_tree.get.%s' % link.comment_tree_version)
    timer.start()

    cache = get_comment_tree(link, timer=timer)
    cids = cache.cids
    tree = cache.tree
    depth = cache.depth
    parents = cache.parents

    # load the sorter
    sorter = _get_comment_sorter(link, sort)

    # find comments for which the sort values weren't in the cache
    sorter_needed = []
    if cids and not sorter:
        sorter_needed = cids
        g.log.debug("comment_tree.py: sorter %s cache miss for %s", sort, link)
        sorter = {}

    sorter_needed = [x for x in cids if x not in sorter]
    if cids and sorter_needed:
        g.log.debug(
            "Error in comment_tree: sorter %s/%s inconsistent (missing %d e.g. %r)"
            % (link, sort, len(sorter_needed), sorter_needed[:10]))
        g.stats.simple_event('comment_tree_bad_sorter')
        if not g.disallow_db_writes:
            update_comment_votes(Comment._byID(sorter_needed, data=True, return_dict=False))

        # The Q&A sort needs access to attributes the others don't, so save the
        # extra lookups if we can.
        data_needed = (sort == '_qa')
        comments = Comment._byID(sorter_needed, data=data_needed, return_dict=False)
        sorter.update(_comment_sorter_from_cids(comments, sort, link, tree))
        timer.intermediate('sort')

    if parents is None:
        g.log.debug("comment_tree.py: parents cache miss for %s", link)
        parents = {}
    elif cids and not all(x in parents for x in cids):
        g.log.debug("Error in comment_tree: parents inconsistent for %s", link)
        parents = {}

    if not parents and len(cids) > 0:
        with CommentTree.mutation_context(link):
            # reload under lock so the sorter and parents are consistent
            timer.intermediate('lock')
            cache = get_comment_tree(link, timer=timer)
            cache.parents = cache.parent_dict_from_tree(cache.tree)

    timer.stop()

    return (cache.cids, cache.tree, cache.depth, cache.parents, sorter)
Exemple #33
0
def get_comment_tree(link, timer=None):
    if timer is None:
        timer = SimpleSillyStub()

    cache = CommentTree.by_link(link, timer)
    return cache