Beispiel #1
3
def add_comments(comments):
    """Add comments to the CommentTree and update scores."""
    from r2.models.builder import write_comment_orders

    link_ids = [comment.link_id for comment in tup(comments)]
    links = Link._byID(link_ids, data=True)

    comments = tup(comments)
    comments_by_link_id = defaultdict(list)
    for comment in comments:
        comments_by_link_id[comment.link_id].append(comment)

    for link_id, link_comments in comments_by_link_id.iteritems():
        link = links[link_id]

        timer = g.stats.get_timer(
            'comment_tree.add.%s' % link.comment_tree_version)
        timer.start()

        # write scores before CommentTree because the scores must exist for all
        # comments in the tree
        for sort in ("_controversy", "_confidence", "_score"):
            scores_by_comment = {
                comment._id36: getattr(comment, sort)
                for comment in link_comments
            }
            CommentScoresByLink.set_scores(link, sort, scores_by_comment)

        scores_by_comment = _get_qa_comment_scores(link, link_comments)
        CommentScoresByLink.set_scores(link, "_qa", scores_by_comment)
        timer.intermediate('scores')

        with CommentTree.mutation_context(link, timeout=180):
            try:
                timer.intermediate('lock')
                comment_tree = CommentTree.by_link(link, timer)
                timer.intermediate('get')
                comment_tree.add_comments(link_comments)
                timer.intermediate('update')
            except InconsistentCommentTreeError:
                # failed to add a comment to the CommentTree because its parent
                # is missing from the tree. this comment will be lost forever
                # unless a rebuild is performed.
                comment_ids = [comment._id for comment in link_comments]
                g.log.error(
                    "comment_tree_inconsistent: %s %s" % (link, comment_ids))
                g.stats.simple_event('comment_tree_inconsistent')
                return

            # do this under the same lock because we want to ensure we are using
            # the same version of the CommentTree as was just written
            write_comment_orders(link)
            timer.intermediate('write_order')

        timer.stop()
Beispiel #2
2
def add_comments(comments):
    """Add comments to the CommentTree and update scores."""
    from r2.models.builder import write_comment_orders

    link_ids = [comment.link_id for comment in tup(comments)]
    links = Link._byID(link_ids, data=True)

    comments = tup(comments)
    comments_by_link_id = defaultdict(list)
    for comment in comments:
        comments_by_link_id[comment.link_id].append(comment)

    for link_id, link_comments in comments_by_link_id.iteritems():
        link = links[link_id]

        timer = g.stats.get_timer("comment_tree.add.1")
        timer.start()

        # write scores before CommentTree because the scores must exist for all
        # comments in the tree
        for sort in ("_controversy", "_confidence", "_score"):
            scores_by_comment = {comment._id36: getattr(comment, sort) for comment in link_comments}
            CommentScoresByLink.set_scores(link, sort, scores_by_comment)

        scores_by_comment = _get_qa_comment_scores(link, link_comments)
        CommentScoresByLink.set_scores(link, "_qa", scores_by_comment)
        timer.intermediate("scores")

        CommentTree.add_comments(link, link_comments)
        timer.intermediate("update")

        write_comment_orders(link)
        timer.intermediate("write_order")

        timer.stop()
Beispiel #3
0
def get_comment_scores(link, sort, comment_ids, timer):
    """Retrieve cached sort values for all comments on a post.

    Arguments:

    * link_id -- id of the Link containing the comments.
    * sort -- a string indicating the attribute on the comments to use for
      generating sort values.

    Returns a dictionary from cid to a numeric sort value.

    """

    from r2.lib.db import queries
    from r2.models import CommentScoresByLink

    if not comment_ids:
        # no comments means no scores
        return {}

    if sort == "_date":
        # comment ids are monotonically increasing, so we can use them as a
        # substitute for creation date
        scores_by_id = {comment_id: comment_id for comment_id in comment_ids}
    else:
        scores_by_id36 = CommentScoresByLink.get_scores(link, sort)

        # we store these id36ed, but there are still bits of the code that
        # want to deal in integer IDs
        scores_by_id = {int(id36, 36): score for id36, score in scores_by_id36.iteritems()}

        scores_needed = set(comment_ids) - set(scores_by_id.keys())
        if scores_needed:
            # some scores were missing from CommentScoresByLink--lookup the
            # comments and calculate the scores.
            g.stats.simple_event("comment_tree_bad_sorter")

            missing_comments = Comment._byID(scores_needed, data=True, return_dict=False)

            if sort == "_qa":
                scores_by_missing_id36 = _get_qa_comment_scores(link, missing_comments)

                scores_by_missing = {int(id36, 36): score for id36, score in scores_by_missing_id36.iteritems()}
            else:
                scores_by_missing_id36 = {comment._id36: getattr(comment, sort) for comment in missing_comments}

                scores_by_missing = {int(id36, 36): score for id36, score in scores_by_missing_id36.iteritems()}

            # up to once per minute write the scores to limit writes but
            # eventually return us to the correct state.
            if not g.disallow_db_writes:
                write_key = "lock:score_{link}{sort}".format(link=link._id36, sort=sort)
                should_write = g.lock_cache.add(write_key, "", time=60)
                if should_write:
                    CommentScoresByLink.set_scores(link, sort, scores_by_missing_id36)

            scores_by_id.update(scores_by_missing)
            timer.intermediate("sort")

    return scores_by_id
Beispiel #4
0
def add_comments(comments):
    """Add comments to the CommentTree and update scores."""
    from r2.models.builder import write_comment_orders

    link_ids = [comment.link_id for comment in tup(comments)]
    links = Link._byID(link_ids, data=True)

    comments = tup(comments)
    comments_by_link_id = defaultdict(list)
    for comment in comments:
        comments_by_link_id[comment.link_id].append(comment)

    for link_id, link_comments in comments_by_link_id.iteritems():
        link = links[link_id]

        timer = g.stats.get_timer('comment_tree.add.%s' %
                                  link.comment_tree_version)
        timer.start()

        # write scores before CommentTree because the scores must exist for all
        # comments in the tree
        for sort in ("_controversy", "_confidence", "_score"):
            scores_by_comment = {
                comment._id36: getattr(comment, sort)
                for comment in link_comments
            }
            CommentScoresByLink.set_scores(link, sort, scores_by_comment)

        scores_by_comment = _get_qa_comment_scores(link, link_comments)
        CommentScoresByLink.set_scores(link, "_qa", scores_by_comment)
        timer.intermediate('scores')

        with CommentTree.mutation_context(link, timeout=180):
            try:
                timer.intermediate('lock')
                comment_tree = CommentTree.by_link(link, timer)
                timer.intermediate('get')
                comment_tree.add_comments(link_comments)
                timer.intermediate('update')
            except InconsistentCommentTreeError:
                # failed to add a comment to the CommentTree because its parent
                # is missing from the tree. this comment will be lost forever
                # unless a rebuild is performed.
                comment_ids = [comment._id for comment in link_comments]
                g.log.error("comment_tree_inconsistent: %s %s" %
                            (link, comment_ids))
                g.stats.simple_event('comment_tree_inconsistent')
                return

            # do this under the same lock because we want to ensure we are using
            # the same version of the CommentTree as was just written
            write_comment_orders(link)
            timer.intermediate('write_order')

        timer.stop()
Beispiel #5
0
def add_comments(comments):
    """Add comments to the CommentTree and update scores."""
    from r2.models.builder import write_comment_orders

    link_ids = [comment.link_id for comment in tup(comments)]
    links = Link._byID(link_ids, data=True)

    comments = tup(comments)
    comments_by_link_id = defaultdict(list)
    for comment in comments:
        comments_by_link_id[comment.link_id].append(comment)

    for link_id, link_comments in comments_by_link_id.iteritems():
        link = links[link_id]

        timer = g.stats.get_timer('comment_tree.add.1')
        timer.start()

        # write scores before CommentTree because the scores must exist for all
        # comments in the tree
        for sort in ("_controversy", "_confidence", "_score"):
            scores_by_comment = {
                comment._id36: getattr(comment, sort)
                for comment in link_comments
            }
            CommentScoresByLink.set_scores(link, sort, scores_by_comment)

        scores_by_comment = _get_qa_comment_scores(link, link_comments)
        CommentScoresByLink.set_scores(link, "_qa", scores_by_comment)
        timer.intermediate('scores')

        CommentTree.add_comments(link, link_comments)
        timer.intermediate('update')

        write_comment_orders(link)
        timer.intermediate('write_order')

        timer.stop()
Beispiel #6
0
def add_comments(comments):
    """Add comments to the CommentTree and update scores."""
    from r2.models.builder import write_comment_orders

    link_ids = [comment.link_id for comment in tup(comments)]
    links = Link._byID(link_ids, data=True)

    comments = tup(comments)
    comments_by_link_id = defaultdict(list)
    for comment in comments:
        comments_by_link_id[comment.link_id].append(comment)

    for link_id, link_comments in comments_by_link_id.iteritems():
        link = links[link_id]

        new_comments = [
            comment for comment in link_comments if not comment._deleted
        ]
        deleted_comments = [
            comment for comment in link_comments if comment._deleted
        ]
        timer = g.stats.get_timer('comment_tree.add.%s' %
                                  link.comment_tree_version)
        timer.start()

        # write scores before CommentTree because the scores must exist for all
        # comments in the tree
        for sort in ("_controversy", "_confidence", "_score"):
            scores_by_comment = {
                comment._id36: getattr(comment, sort)
                for comment in link_comments
            }
            CommentScoresByLink.set_scores(link, sort, scores_by_comment)

        scores_by_comment = _get_qa_comment_scores(link, link_comments)
        CommentScoresByLink.set_scores(link, "_qa", scores_by_comment)
        timer.intermediate('scores')

        with CommentTree.mutation_context(link, timeout=180):
            try:
                timer.intermediate('lock')
                comment_tree = CommentTree.by_link(link, timer)
                timer.intermediate('get')

                if new_comments:
                    comment_tree.add_comments(new_comments)

                for comment in deleted_comments:
                    comment_tree.delete_comment(comment, link)

                timer.intermediate('update')
            except InconsistentCommentTreeError:
                # this exception occurs when we add a comment to the tree but
                # its parent isn't in the tree yet, need to rebuild the tree
                # from scratch

                comment_ids = [comment._id for comment in link_comments]
                g.log.exception(
                    'add_comments_nolock failed for link %s %s, recomputing',
                    link_id, comment_ids)

                comment_tree = CommentTree.rebuild(link)
                timer.intermediate('rebuild')
                # the tree rebuild updated the link's comment count, so schedule
                # it for search reindexing
                link.update_search_index()
                timer.intermediate('update_search_index')
                g.stats.simple_event('comment_tree_inconsistent')

            # do this under the same lock because we want to ensure we are using
            # the same version of the CommentTree as was just written
            write_comment_orders(link)
            timer.intermediate('write_order')

        timer.stop()
Beispiel #7
0
def get_comment_scores(link, sort, comment_ids, timer):
    """Retrieve cached sort values for all comments on a post.

    Arguments:

    * link_id -- id of the Link containing the comments.
    * sort -- a string indicating the attribute on the comments to use for
      generating sort values.

    Returns a dictionary from cid to a numeric sort value.

    """

    from r2.lib.db import queries
    from r2.models import CommentScoresByLink

    if not comment_ids:
        # no comments means no scores
        return {}

    if sort == "_date":
        # comment ids are monotonically increasing, so we can use them as a
        # substitute for creation date
        scores_by_id = {comment_id: comment_id for comment_id in comment_ids}
    else:
        scores_by_id36 = CommentScoresByLink.get_scores(link, sort)

        # we store these id36ed, but there are still bits of the code that
        # want to deal in integer IDs
        scores_by_id = {
            int(id36, 36): score
            for id36, score in scores_by_id36.iteritems()
        }

        scores_needed = set(comment_ids) - set(scores_by_id.keys())
        if scores_needed:
            g.stats.simple_event('comment_tree_bad_sorter')

            missing_comments = Comment._byID(scores_needed,
                                             data=True,
                                             return_dict=False)

            # queue the missing comments to be added to the comments tree, which
            # will trigger adding their scores
            for comment in missing_comments:
                queries.add_to_commentstree_q(comment)

            if sort == "_qa":
                scores_by_missing_id36 = _get_qa_comment_scores(
                    link, missing_comments)

                scores_by_missing = {
                    int(id36, 36): score
                    for id36, score in scores_by_missing_id36.iteritems()
                }
            else:
                scores_by_missing = {
                    comment._id: getattr(comment, sort)
                    for comment in missing_comments
                }

            scores_by_id.update(scores_by_missing)
            timer.intermediate('sort')

    return scores_by_id
Beispiel #8
0
def get_comment_scores(link, sort, comment_ids, timer):
    """Retrieve cached sort values for all comments on a post.

    Arguments:

    * link_id -- id of the Link containing the comments.
    * sort -- a string indicating the attribute on the comments to use for
      generating sort values.

    Returns a dictionary from cid to a numeric sort value.

    """

    from r2.lib.db import queries
    from r2.models import CommentScoresByLink

    if not comment_ids:
        # no comments means no scores
        return {}

    if sort == "_date":
        # comment ids are monotonically increasing, so we can use them as a
        # substitute for creation date
        scores_by_id = {comment_id: comment_id for comment_id in comment_ids}
    else:
        scores_by_id36 = CommentScoresByLink.get_scores(link, sort)

        # we store these id36ed, but there are still bits of the code that
        # want to deal in integer IDs
        scores_by_id = {
            int(id36, 36): score
            for id36, score in scores_by_id36.iteritems()
        }

        scores_needed = set(comment_ids) - set(scores_by_id.keys())
        if scores_needed:
            # some scores were missing from CommentScoresByLink--lookup the
            # comments and calculate the scores.
            g.stats.simple_event('comment_tree_bad_sorter')

            missing_comments = Comment._byID(scores_needed,
                                             data=True,
                                             return_dict=False)

            if sort == "_qa":
                scores_by_missing_id36 = _get_qa_comment_scores(
                    link, missing_comments)

                scores_by_missing = {
                    int(id36, 36): score
                    for id36, score in scores_by_missing_id36.iteritems()
                }
            else:
                scores_by_missing_id36 = {
                    comment._id36: getattr(comment, sort)
                    for comment in missing_comments
                }

                scores_by_missing = {
                    int(id36, 36): score
                    for id36, score in scores_by_missing_id36.iteritems()
                }

            # up to once per minute write the scores to limit writes but
            # eventually return us to the correct state.
            if not g.disallow_db_writes:
                write_key = "lock:score_{link}{sort}".format(
                    link=link._id36,
                    sort=sort,
                )
                should_write = g.lock_cache.add(write_key, "", time=60)
                if should_write:
                    CommentScoresByLink.set_scores(link, sort,
                                                   scores_by_missing_id36)

            scores_by_id.update(scores_by_missing)
            timer.intermediate('sort')

    return scores_by_id
Beispiel #9
0
def write_comment_scores(link, comments):
    for sort in ("_upvotes", "_controversy", "_confidence", "_score", "_qa"):
        scores = calculate_comment_scores(link, sort, comments)
        CommentScoresByLink.set_scores(link, sort, scores)
Beispiel #10
0
def add_comments(comments):
    """Add comments to the CommentTree and update scores."""
    from r2.models.builder import write_comment_orders

    link_ids = [comment.link_id for comment in tup(comments)]
    links = Link._byID(link_ids, data=True)

    comments = tup(comments)
    comments_by_link_id = defaultdict(list)
    for comment in comments:
        comments_by_link_id[comment.link_id].append(comment)

    for link_id, link_comments in comments_by_link_id.iteritems():
        link = links[link_id]

        new_comments = [
            comment for comment in link_comments if not comment._deleted]
        deleted_comments = [
            comment for comment in link_comments if comment._deleted]
        timer = g.stats.get_timer(
            'comment_tree.add.%s' % link.comment_tree_version)
        timer.start()

        # write scores before CommentTree because the scores must exist for all
        # comments in the tree
        for sort in ("_controversy", "_confidence", "_score"):
            scores_by_comment = {
                comment._id36: getattr(comment, sort)
                for comment in link_comments
            }
            CommentScoresByLink.set_scores(link, sort, scores_by_comment)

        scores_by_comment = _get_qa_comment_scores(link, link_comments)
        CommentScoresByLink.set_scores(link, "_qa", scores_by_comment)
        timer.intermediate('scores')

        with CommentTree.mutation_context(link, timeout=180):
            try:
                timer.intermediate('lock')
                comment_tree = CommentTree.by_link(link, timer)
                timer.intermediate('get')

                if new_comments:
                    comment_tree.add_comments(new_comments)

                for comment in deleted_comments:
                    comment_tree.delete_comment(comment, link)

                timer.intermediate('update')
            except InconsistentCommentTreeError:
                # this exception occurs when we add a comment to the tree but
                # its parent isn't in the tree yet, need to rebuild the tree
                # from scratch

                comment_ids = [comment._id for comment in link_comments]
                g.log.exception(
                    'add_comments_nolock failed for link %s %s, recomputing',
                    link_id, comment_ids)

                comment_tree = CommentTree.rebuild(link)
                timer.intermediate('rebuild')
                # the tree rebuild updated the link's comment count, so schedule
                # it for search reindexing
                link.update_search_index()
                timer.intermediate('update_search_index')
                g.stats.simple_event('comment_tree_inconsistent')

            # do this under the same lock because we want to ensure we are using
            # the same version of the CommentTree as was just written
            write_comment_orders(link, timer)
            timer.intermediate('write_order')

        timer.stop()
Beispiel #11
0
def get_comment_scores(link, sort, comment_ids, timer):
    """Retrieve cached sort values for all comments on a post.

    Arguments:

    * link_id -- id of the Link containing the comments.
    * sort -- a string indicating the attribute on the comments to use for
      generating sort values.

    Returns a dictionary from cid to a numeric sort value.

    """

    from r2.lib.db import queries
    from r2.models import CommentScoresByLink

    if not comment_ids:
        # no comments means no scores
        return {}

    if sort == "_date":
        # comment ids are monotonically increasing, so we can use them as a
        # substitute for creation date
        scores_by_id = {comment_id: comment_id for comment_id in comment_ids}
    else:
        scores_by_id36 = CommentScoresByLink.get_scores(link, sort)

        # we store these id36ed, but there are still bits of the code that
        # want to deal in integer IDs
        scores_by_id = {
            int(id36, 36): score
            for id36, score in scores_by_id36.iteritems()
        }

        scores_needed = set(comment_ids) - set(scores_by_id.keys())
        if scores_needed:
            g.stats.simple_event('comment_tree_bad_sorter')

            missing_comments = Comment._byID(
                scores_needed, data=True, return_dict=False)

            # queue the missing comments to be added to the comments tree, which
            # will trigger adding their scores
            for comment in missing_comments:
                queries.add_to_commentstree_q(comment)

            if sort == "_qa":
                scores_by_missing_id36 = _get_qa_comment_scores(
                    link, missing_comments)

                scores_by_missing = {
                    int(id36, 36): score
                    for id36, score in scores_by_missing_id36.iteritems()
                }
            else:
                scores_by_missing = {
                    comment._id: getattr(comment, sort)
                    for comment in missing_comments
                }

            scores_by_id.update(scores_by_missing)
            timer.intermediate('sort')

    return scores_by_id
Beispiel #12
0
def write_comment_scores(link, comments):
    for sort in ("_controversy", "_confidence", "_score", "_qa"):
        scores = calculate_comment_scores(link, sort, comments)
        CommentScoresByLink.set_scores(link, sort, scores)