def run():

    STEP = 100
    thing = Link
    max_id = max_thing_id(thing)
    id_start = 0

    for id_low in xrange(id_start, max_id + 1, STEP):
        print "Add desc karma for links %s to %s" % (id_low, id_low + STEP)

        links = list(query_thing_id_range(thing, id_low, id_low + STEP))

        for link in links:
            if not link._loaded:
                link._load()
            comments = list(Comment._query(Comment.c.link_id == link._id, eager_load = True))
            link_descendant_karma = 0
            for comment in comments:
                if not comment._loaded:
                    comment._load()
                if hasattr(comment, 'parent_id') and comment.parent_id:
                    Comment._byID(comment.parent_id).incr_descendant_karma([], comment._ups - comment._downs)
                link_descendant_karma += (comment._ups - comment._downs)

            link._incr('_descendant_karma', link_descendant_karma)
Example #2
0
def run():

    STEP = 100
    thing = Link
    max_id = max_thing_id(thing)
    id_start = 0

    for id_low in xrange(id_start, max_id + 1, STEP):
        print "Add desc karma for links %s to %s" % (id_low, id_low + STEP)

        links = list(query_thing_id_range(thing, id_low, id_low + STEP))

        for link in links:
            if not link._loaded:
                link._load()
            comments = list(
                Comment._query(Comment.c.link_id == link._id, eager_load=True))
            link_descendant_karma = 0
            for comment in comments:
                if not comment._loaded:
                    comment._load()
                if hasattr(comment, 'parent_id') and comment.parent_id:
                    Comment._byID(comment.parent_id).incr_descendant_karma(
                        [], comment._ups - comment._downs)
                link_descendant_karma += (comment._ups - comment._downs)

            link._incr('_descendant_karma', link_descendant_karma)
Example #3
0
def _calculate_qa_comment_scores(link, cid_tree, comments):
    """Return a dict of comment_id36 -> qa score"""

    # Responder is usually the OP, but there could be support for adding
    # other answerers in the future.
    responder_ids = link.responder_ids

    # An OP response will change the sort value for its parent, so we need
    # to process the parent, too.
    parent_cids = []
    for comment in comments:
        if comment.author_id in responder_ids and comment.parent_id:
            parent_cids.append(comment.parent_id)
    parent_comments = Comment._byID(parent_cids, return_dict=False)
    comments.extend(parent_comments)

    # Fetch the comments in batch to avoid a bunch of separate calls down
    # the line.
    all_child_cids = []
    for comment in comments:
        child_cids = cid_tree.get(comment._id, None)
        if child_cids:
            all_child_cids.extend(child_cids)
    all_child_comments = Comment._byID(all_child_cids)

    comment_sorter = {}
    for comment in comments:
        child_cids = cid_tree.get(comment._id, ())
        child_comments = (all_child_comments[cid] for cid in child_cids)
        sort_value = comment._qa(child_comments, responder_ids)
        comment_sorter[comment._id36] = sort_value

    return comment_sorter
Example #4
0
def update_score(obj, up_change, down_change, new_valid_thing, old_valid_thing):
     obj._incr('_ups',   up_change)
     obj._incr('_downs', down_change)
     if isinstance(obj, Comment):
         if hasattr(obj, 'parent_id'):
             Comment._byID(obj.parent_id).incr_descendant_karma([], up_change - down_change)
         Link._byID(obj.link_id)._incr('_descendant_karma', up_change - down_change)
Example #5
0
def _calculate_qa_comment_scores(link, cid_tree, comments):
    """Return a dict of comment_id36 -> qa score"""

    # Responder is usually the OP, but there could be support for adding
    # other answerers in the future.
    responder_ids = link.responder_ids

    # An OP response will change the sort value for its parent, so we need
    # to process the parent, too.
    parent_cids = []
    for comment in comments:
        if comment.author_id in responder_ids and comment.parent_id:
            parent_cids.append(comment.parent_id)
    parent_comments = Comment._byID(parent_cids, return_dict=False)
    comments.extend(parent_comments)

    # Fetch the comments in batch to avoid a bunch of separate calls down
    # the line.
    all_child_cids = []
    for comment in comments:
        child_cids = cid_tree.get(comment._id, None)
        if child_cids:
            all_child_cids.extend(child_cids)
    all_child_comments = Comment._byID(all_child_cids)

    comment_sorter = {}
    for comment in comments:
        child_cids = cid_tree.get(comment._id, ())
        child_comments = (all_child_comments[cid] for cid in child_cids)
        sort_value = comment._qa(child_comments, responder_ids)
        comment_sorter[comment._id36] = sort_value

    return comment_sorter
Example #6
0
def _comment_sorter_from_cids(comments, sort, link, cid_tree, by_36=False):
    """Retrieve sort values for comments.

    Useful to fill in any gaps in CommentSortsCache.

    Arguments:

    * comments -- an iterable of Comments to retrieve sort values for.
    * sort -- a string representing the type of sort to use.
    * cid_tree -- a mapping from parent id to children ids, as created by
      CommentTree.
    * by_36 -- a boolean indicating if the resultant map keys off of base 36
      ids instead of integer ids.

    Returns a dictionary from cid to a numeric sort value.
    """
    # The Q&A sort requires extra information about surrounding comments.  It's
    # more efficient to gather it up here instead of in the guts of the comment
    # sort, but we don't want to do that for sort types that don't need it.
    if sort == '_qa':
        # An OP response will change the sort value for its parent, so we need
        # to process the parent, too.
        parent_cids = []
        responder_ids = link.responder_ids
        for c in comments:
            if c.author_id in responder_ids and c.parent_id:
                parent_cids.append(c.parent_id)
        parent_comments = Comment._byID(parent_cids,
                                        data=True,
                                        return_dict=False)
        comments.extend(parent_comments)

        # Fetch the comments in batch to avoid a bunch of separate calls down
        # the line.
        all_child_cids = []
        for c in comments:
            child_cids = cid_tree.get(c._id, None)
            if child_cids:
                all_child_cids.extend(child_cids)
        all_child_comments = Comment._byID(all_child_cids, data=True)

    comment_sorter = {}
    for comment in comments:
        if sort == '_qa':
            child_cids = cid_tree.get(comment._id, ())
            child_comments = (all_child_comments[cid] for cid in child_cids)
            sort_value = _get_sort_value(comment, sort, link, child_comments)
        else:
            sort_value = _get_sort_value(comment, sort)
        if by_36:
            id = comment._id36
        else:
            id = comment._id
        comment_sorter[id] = sort_value

    return comment_sorter
Example #7
0
def _comment_sorter_from_cids(comments, sort, link, cid_tree, by_36=False):
    """Retrieve sort values for comments.

    Useful to fill in any gaps in CommentSortsCache.

    Arguments:

    * comments -- an iterable of Comments to retrieve sort values for.
    * sort -- a string representing the type of sort to use.
    * cid_tree -- a mapping from parent id to children ids, as created by
      CommentTree.
    * by_36 -- a boolean indicating if the resultant map keys off of base 36
      ids instead of integer ids.

    Returns a dictionary from cid to a numeric sort value.
    """
    # The Q&A sort requires extra information about surrounding comments.  It's
    # more efficient to gather it up here instead of in the guts of the comment
    # sort, but we don't want to do that for sort types that don't need it.
    if sort == '_qa':
        # An OP response will change the sort value for its parent, so we need
        # to process the parent, too.
        parent_cids = []
        responder_ids = link.responder_ids
        for c in comments:
            if c.author_id in responder_ids and c.parent_id:
                parent_cids.append(c.parent_id)
        parent_comments = Comment._byID(parent_cids, data=True,
                return_dict=False)
        comments.extend(parent_comments)

        # Fetch the comments in batch to avoid a bunch of separate calls down
        # the line.
        all_child_cids = []
        for c in comments:
            child_cids = cid_tree.get(c._id, None)
            if child_cids:
                all_child_cids.extend(child_cids)
        all_child_comments = Comment._byID(all_child_cids, data=True)

    comment_sorter = {}
    for comment in comments:
        if sort == '_qa':
            child_cids = cid_tree.get(comment._id, ())
            child_comments = (all_child_comments[cid] for cid in child_cids)
            sort_value = _get_sort_value(comment, sort, link, child_comments)
        else:
            sort_value = _get_sort_value(comment, sort)
        if by_36:
            id = comment._id36
        else:
            id = comment._id
        comment_sorter[id] = sort_value

    return comment_sorter
Example #8
0
def get_comment_scores(link, sort, comment_ids, timer):
    """Retrieve cached sort values for all comments on a post.

    Arguments:

    * link_id -- id of the Link containing the comments.
    * sort -- a string indicating the attribute on the comments to use for
      generating sort values.

    Returns a dictionary from cid to a numeric sort value.

    """

    from r2.lib.db import queries
    from r2.models import CommentScoresByLink

    if not comment_ids:
        # no comments means no scores
        return {}

    if sort == "_date":
        # comment ids are monotonically increasing, so we can use them as a
        # substitute for creation date
        scores_by_id = {comment_id: comment_id for comment_id in comment_ids}
    else:
        scores_by_id36 = CommentScoresByLink.get_scores(link, sort)

        # we store these id36ed, but there are still bits of the code that
        # want to deal in integer IDs
        scores_by_id = {int(id36, 36): score for id36, score in scores_by_id36.iteritems()}

        scores_needed = set(comment_ids) - set(scores_by_id.keys())
        if scores_needed:
            # some scores were missing from CommentScoresByLink--lookup the
            # comments and calculate the scores.
            g.stats.simple_event("comment_tree_bad_sorter")

            missing_comments = Comment._byID(scores_needed, data=True, return_dict=False)

            if sort == "_qa":
                scores_by_missing_id36 = _get_qa_comment_scores(link, missing_comments)

                scores_by_missing = {int(id36, 36): score for id36, score in scores_by_missing_id36.iteritems()}
            else:
                scores_by_missing_id36 = {comment._id36: getattr(comment, sort) for comment in missing_comments}

                scores_by_missing = {int(id36, 36): score for id36, score in scores_by_missing_id36.iteritems()}

            # up to once per minute write the scores to limit writes but
            # eventually return us to the correct state.
            if not g.disallow_db_writes:
                write_key = "lock:score_{link}{sort}".format(link=link._id36, sort=sort)
                should_write = g.lock_cache.add(write_key, "", time=60)
                if should_write:
                    CommentScoresByLink.set_scores(link, sort, scores_by_missing_id36)

            scores_by_id.update(scores_by_missing)
            timer.intermediate("sort")

    return scores_by_id
Example #9
0
    def upgrade(cls, tree, link):
        cids = []
        for parent, children in tree.tree.iteritems():
            cids.extend(children)

        comments = {}
        for i in xrange(0, len(cids), 100):
            g.log.debug('  loading comments %d..%d', i, i + 100)
            comments.update(Comment._byID(cids[i:i + 100], data=True))

        # need to fill in parents attr for each comment
        modified = []
        stack = [None]
        while stack:
            pid = stack.pop()
            if pid is None:
                parents = ''
            else:
                parents = comments[pid].parents + ':' + comments[pid]._id36
            children = tree.tree.get(pid, [])
            stack.extend(children)
            for cid in children:
                if comments[cid].parents != parents:
                    comments[cid].parents = parents
                    modified.append(comments[cid])

        for i, comment in enumerate(modified):
            comment._commit()

        cls.add_comments(tree, comments.values())
Example #10
0
    def rebuild(cls, link):
        # fetch all comments and sort by parent_id, so parents are added to the
        # tree before their children
        q = Comment._query(Comment.c.link_id == link._id,
                           Comment.c._deleted == (True, False),
                           Comment.c._spam == (True, False),
                           optimize_rules=True,
                           data=True)
        comments = sorted(q, key=lambda c: c.parent_id)

        # remove any comments with missing parents
        comment_ids = {comment._id for comment in comments}
        comments = [
            comment for comment in comments
            if not comment.parent_id or comment.parent_id in comment_ids 
        ]

        # build tree from scratch (for V2 results in double-counting in cass)
        tree = cls(link, cids=[], tree={}, depth={}, parents={})
        impl = cls.IMPLEMENTATIONS[link.comment_tree_version]
        impl.rebuild(tree, comments)

        link.num_comments = sum(1 for c in comments if not c._deleted)
        link._commit()

        return tree
Example #11
0
    def upgrade(cls, tree, link):
        cids = []
        for parent, children in tree.tree.iteritems():
            cids.extend(children)

        comments = {}
        for i in xrange(0, len(cids), 100):
            g.log.debug('  loading comments %d..%d', i, i + 100)
            comments.update(Comment._byID(cids[i:i + 100], data=True))

        # need to fill in parents attr for each comment
        modified = []
        stack = [None]
        while stack:
            pid = stack.pop()
            if pid is None:
                parents = ''
            else:
                parents = comments[pid].parents + ':' + comments[pid]._id36
            children = tree.tree.get(pid, [])
            stack.extend(children)
            for cid in children:
                if comments[cid].parents != parents:
                    comments[cid].parents = parents
                    modified.append(comments[cid])

        for i, comment in enumerate(modified):
            comment._commit()

        cls.add_comments(tree, comments.values())
Example #12
0
    def upgrade(cls, tree, link):
        cids = []
        for parent, children in tree.tree.iteritems():
            cids.extend(children)

        comments = {}
        for i in xrange(0, len(cids), 100):
            g.log.debug('  loading comments %d..%d', i, i + 100)
            comments.update(Comment._byID(cids[i:i + 100], data=True))

        cls.add_comments(tree, comments.values())
Example #13
0
    def upgrade(cls, tree, link):
        cids = []
        for parent, children in tree.tree.iteritems():
            cids.extend(children)

        comments = {}
        for i in xrange(0, len(cids), 100):
            g.log.debug('  loading comments %d..%d', i, i + 100)
            comments.update(Comment._byID(cids[i:i + 100], data=True))

        cls.add_comments(tree, comments.values())
Example #14
0
def _populate(after_id=None, estimate=54301242):
    from r2.models import desc
    from r2.lib.db import tdb_cassandra
    from r2.lib import utils

    # larger has a chance to decrease the number of Cassandra writes,
    # but the probability is low
    chunk_size = 5000

    q = Comment._query(Comment.c._spam == (True, False), Comment.c._deleted == (True, False), sort=desc("_date"))

    if after_id is not None:
        q._after(Comment._byID(after_id))

    q = utils.fetch_things2(q, chunk_size=chunk_size)
    q = utils.progress(q, verbosity=chunk_size, estimate=estimate)

    for chunk in utils.in_chunks(q, chunk_size):
        chunk = filter(lambda x: hasattr(x, "link_id"), chunk)
        update_comment_votes(chunk)
Example #15
0
def _populate(after_id=None, estimate=54301242):
    from r2.models import desc
    from r2.lib.db import tdb_cassandra
    from r2.lib import utils

    # larger has a chance to decrease the number of Cassandra writes,
    # but the probability is low
    chunk_size = 5000

    q = Comment._query(Comment.c._spam == (True, False),
                       Comment.c._deleted == (True, False),
                       sort=desc('_date'))

    if after_id is not None:
        q._after(Comment._byID(after_id))

    q = utils.fetch_things2(q, chunk_size=chunk_size)
    q = utils.progress(q, verbosity=chunk_size, estimate=estimate)

    for chunk in utils.in_chunks(q, chunk_size):
        chunk = filter(lambda x: hasattr(x, 'link_id'), chunk)
        update_comment_votes(chunk)
Example #16
0
def _comment_sorter_from_cids(cids, sort):
    """Retrieve sort values for comments.

    Useful to fill in any gaps in CommentSortsCache.

    Arguments:

    * comments -- an iterable of Comments to retrieve sort values for.
    * sort -- a string representing the type of sort to use.
    * cid_tree -- a mapping from parent id to children ids, as created by
      CommentTree.
    * by_36 -- a boolean indicating if the resultant map keys off of base 36
      ids instead of integer ids.

    Returns a dictionary from cid to a numeric sort value.
    """
    comments = Comment._byID(cids, data=False, return_dict=False)
    return dict((x._id, _get_sort_value(x, sort)) for x in comments)
Example #17
0
def _comment_sorter_from_cids(cids, sort):
    """Retrieve sort values for comments.

    Useful to fill in any gaps in CommentSortsCache.

    Arguments:

    * comments -- an iterable of Comments to retrieve sort values for.
    * sort -- a string representing the type of sort to use.
    * cid_tree -- a mapping from parent id to children ids, as created by
      CommentTree.
    * by_36 -- a boolean indicating if the resultant map keys off of base 36
      ids instead of integer ids.

    Returns a dictionary from cid to a numeric sort value.
    """
    comments = Comment._byID(cids, data = False, return_dict = False)
    return dict((x._id, _get_sort_value(x, sort)) for x in comments)
Example #18
0
    def rebuild(cls, link):
        # retrieve all the comments for the link
        q = Comment._query(
            Comment.c.link_id == link._id,
            Comment.c._deleted == (True, False),
            Comment.c._spam == (True, False),
            optimize_rules=True,
        )
        comments = list(q)

        # remove any comments with missing parents
        comment_ids = {comment._id for comment in comments}
        comments = [
            comment for comment in comments
            if not comment.parent_id or comment.parent_id in comment_ids 
        ]

        CommentTreePermacache.rebuild(link, comments)

        link.num_comments = sum(1 for c in comments if not c._deleted)
        link._commit()
Example #19
0
    def rebuild(cls, link):
        # retrieve all the comments for the link
        q = Comment._query(
            Comment.c.link_id == link._id,
            Comment.c._deleted == (True, False),
            Comment.c._spam == (True, False),
            optimize_rules=True,
        )
        comments = list(q)

        # remove any comments with missing parents
        comment_ids = {comment._id for comment in comments}
        comments = [
            comment for comment in comments
            if not comment.parent_id or comment.parent_id in comment_ids
        ]

        CommentTreePermacache.rebuild(link, comments)

        link.num_comments = sum(1 for c in comments if not c._deleted)
        link._commit()
Example #20
0
def _comment_sorter_from_cids(cids, sort):
    comments = Comment._byID(cids, data = False, return_dict = False)
    return dict((x._id, _get_sort_value(x, sort)) for x in comments)
Example #21
0
def link_comments_and_sort(link, sort):
    from r2.models import CommentSortsCache

    # This has grown sort of organically over time. Right now the
    # cache of the comments tree consists in three keys:
    # 1. The comments_key: A tuple of
    #      (cids, comment_tree, depth, num_children)
    #    given:
    #      cids         =:= [comment_id]
    #      comment_tree =:= dict(comment_id -> [comment_id])
    #      depth        =:= dict(comment_id -> int depth)
    #      num_children =:= dict(comment_id -> int num_children)
    # 2. The parent_comments_key =:= dict(comment_id -> parent_id)
    # 3. The comments_sorts keys =:= dict(comment_id36 -> float).
    #    These are represented by a Cassandra model
    #    (CommentSortsCache) rather than a permacache key. One of
    #    these exists for each sort (hot, new, etc)

    timer = g.stats.get_timer('comment_tree.get.%s' %
                              link.comment_tree_version)
    timer.start()

    link_id = link._id
    cache = get_comment_tree(link, timer=timer)
    cids = cache.cids
    tree = cache.tree
    depth = cache.depth
    num_children = cache.num_children
    parents = cache.parents

    # load the sorter
    sorter = _get_comment_sorter(link_id, sort)

    sorter_needed = []
    if cids and not sorter:
        sorter_needed = cids
        g.log.debug("comment_tree.py: sorter (%s) cache miss for Link %s" %
                    (sort, link_id))
        sorter = {}

    sorter_needed = [x for x in cids if x not in sorter]
    if cids and sorter_needed:
        g.log.debug(
            "Error in comment_tree: sorter %r inconsistent (missing %d e.g. %r)"
            % (sort_comments_key(
                link_id, sort), len(sorter_needed), sorter_needed[:10]))
        if not g.disallow_db_writes:
            update_comment_votes(
                Comment._byID(sorter_needed, data=True, return_dict=False))

        sorter.update(_comment_sorter_from_cids(sorter_needed, sort))
        timer.intermediate('sort')

    if parents is None:
        g.log.debug("comment_tree.py: parents cache miss for Link %s" %
                    link_id)
        parents = {}
    elif cids and not all(x in parents for x in cids):
        g.log.debug("Error in comment_tree: parents inconsistent for Link %s" %
                    link_id)
        parents = {}

    if not parents and len(cids) > 0:
        with CommentTree.mutation_context(link):
            # reload under lock so the sorter and parents are consistent
            timer.intermediate('lock')
            cache = get_comment_tree(link, timer=timer)
            cache.parents = cache.parent_dict_from_tree(cache.tree)

    timer.stop()

    return (cache.cids, cache.tree, cache.depth, cache.num_children,
            cache.parents, sorter)
Example #22
0
def _comment_sorter_from_cids(cids, sort):
    comments = Comment._byID(cids, data=False, return_dict=False)
    return dict((x._id, _get_sort_value(x, sort)) for x in comments)
Example #23
0
def link_comments_and_sort(link, sort):
    """Fetch and sort the comments on a post.

    Arguments:

    * link -- the Link whose comments we want to sort.
    * sort -- a string indicating the attribute on the comments to use for
      generating sort values.

    Returns a tuple in the form (cids, cid_tree, depth, parents, sorter), where
    the values are as follows:

    * cids -- a list of the ids of all comments in the thread.
    * cid_tree -- a dictionary from parent cid to children cids.
    * depth -- a dictionary from cid to the depth that comment resides in the
      tree. A top-level comment has depth 0.
    * parents -- a dictionary from child cid to parent cid.
    * sorter -- a dictionary from cid to a numeric value to be used for
      sorting.
    """

    # This has grown sort of organically over time. Right now the
    # cache of the comments tree consists in three keys:
    # 1. The comments_key: A tuple of
    #      (cids, comment_tree, depth)
    #    given:
    #      cids         =:= [comment_id]
    #      comment_tree =:= dict(comment_id -> [comment_id])
    #      depth        =:= dict(comment_id -> int depth)
    # 2. The parent_comments_key =:= dict(comment_id -> parent_id)
    # 3. The comments_sorts keys =:= dict(comment_id36 -> float).
    #    These are represented by a Cassandra model
    #    (CommentScoresByLink) rather than a permacache key. One of
    #    these exists for each sort (hot, new, etc)

    timer = g.stats.get_timer('comment_tree.get.%s' % link.comment_tree_version)
    timer.start()

    cache = get_comment_tree(link, timer=timer)
    cids = cache.cids
    tree = cache.tree
    depth = cache.depth
    parents = cache.parents

    # load the sorter
    sorter = _get_comment_sorter(link, sort)

    # find comments for which the sort values weren't in the cache
    sorter_needed = []
    if cids and not sorter:
        sorter_needed = cids
        g.log.debug("comment_tree.py: sorter %s cache miss for %s", sort, link)
        sorter = {}

    sorter_needed = [x for x in cids if x not in sorter]
    if cids and sorter_needed:
        g.log.debug(
            "Error in comment_tree: sorter %s/%s inconsistent (missing %d e.g. %r)"
            % (link, sort, len(sorter_needed), sorter_needed[:10]))
        g.stats.simple_event('comment_tree_bad_sorter')
        if not g.disallow_db_writes:
            update_comment_votes(Comment._byID(sorter_needed, data=True, return_dict=False))

        # The Q&A sort needs access to attributes the others don't, so save the
        # extra lookups if we can.
        data_needed = (sort == '_qa')
        comments = Comment._byID(sorter_needed, data=data_needed, return_dict=False)
        sorter.update(_comment_sorter_from_cids(comments, sort, link, tree))
        timer.intermediate('sort')

    timer.stop()

    return (cache.cids, cache.tree, cache.depth, cache.parents, sorter)
Example #24
0
def link_comments_and_sort(link, sort):
    """Fetch and sort the comments on a post.

    Arguments:

    * link -- the Link whose comments we want to sort.
    * sort -- a string indicating the attribute on the comments to use for
      generating sort values.

    Returns a tuple in the form (cids, cid_tree, depth, parents, sorter), where
    the values are as follows:

    * cids -- a list of the ids of all comments in the thread.
    * cid_tree -- a dictionary from parent cid to children cids.
    * depth -- a dictionary from cid to the depth that comment resides in the
      tree. A top-level comment has depth 0.
    * parents -- a dictionary from child cid to parent cid.
    * sorter -- a dictionary from cid to a numeric value to be used for
      sorting.
    """
    from r2.models import CommentSortsCache

    # This has grown sort of organically over time. Right now the
    # cache of the comments tree consists in three keys:
    # 1. The comments_key: A tuple of
    #      (cids, comment_tree, depth)
    #    given:
    #      cids         =:= [comment_id]
    #      comment_tree =:= dict(comment_id -> [comment_id])
    #      depth        =:= dict(comment_id -> int depth)
    # 2. The parent_comments_key =:= dict(comment_id -> parent_id)
    # 3. The comments_sorts keys =:= dict(comment_id36 -> float).
    #    These are represented by a Cassandra model
    #    (CommentSortsCache) rather than a permacache key. One of
    #    these exists for each sort (hot, new, etc)

    timer = g.stats.get_timer('comment_tree.get.%s' % link.comment_tree_version)
    timer.start()

    link_id = link._id
    cache = get_comment_tree(link, timer=timer)
    cids = cache.cids
    tree = cache.tree
    depth = cache.depth
    parents = cache.parents

    # load the sorter
    sorter = _get_comment_sorter(link_id, sort)

    # find comments for which the sort values weren't in the cache
    sorter_needed = []
    if cids and not sorter:
        sorter_needed = cids
        g.log.debug("comment_tree.py: sorter (%s) cache miss for Link %s"
                    % (sort, link_id))
        sorter = {}

    sorter_needed = [x for x in cids if x not in sorter]
    if cids and sorter_needed:
        g.log.debug(
            "Error in comment_tree: sorter %r inconsistent (missing %d e.g. %r)"
            % (sort_comments_key(link_id, sort), len(sorter_needed), sorter_needed[:10]))
        if not g.disallow_db_writes:
            update_comment_votes(Comment._byID(sorter_needed, data=True, return_dict=False))

        comments = Comment._byID(sorter_needed, data = False, return_dict = False)
        sorter.update(_comment_sorter_from_cids(comments, sort, link, tree))
        timer.intermediate('sort')

    if parents is None:
        g.log.debug("comment_tree.py: parents cache miss for Link %s"
                    % link_id)
        parents = {}
    elif cids and not all(x in parents for x in cids):
        g.log.debug("Error in comment_tree: parents inconsistent for Link %s"
                    % link_id)
        parents = {}

    if not parents and len(cids) > 0:
        with CommentTree.mutation_context(link):
            # reload under lock so the sorter and parents are consistent
            timer.intermediate('lock')
            cache = get_comment_tree(link, timer=timer)
            cache.parents = cache.parent_dict_from_tree(cache.tree)

    timer.stop()

    return (cache.cids, cache.tree, cache.depth, cache.parents, sorter)
Example #25
0
def link_comments_and_sort(link, sort):
    """Fetch and sort the comments on a post.

    Arguments:

    * link -- the Link whose comments we want to sort.
    * sort -- a string indicating the attribute on the comments to use for
      generating sort values.

    Returns a tuple in the form (cids, cid_tree, depth, parents, sorter), where
    the values are as follows:

    * cids -- a list of the ids of all comments in the thread.
    * cid_tree -- a dictionary from parent cid to children cids.
    * depth -- a dictionary from cid to the depth that comment resides in the
      tree. A top-level comment has depth 0.
    * parents -- a dictionary from child cid to parent cid.
    * sorter -- a dictionary from cid to a numeric value to be used for
      sorting.
    """

    # This has grown sort of organically over time. Right now the
    # cache of the comments tree consists in three keys:
    # 1. The comments_key: A tuple of
    #      (cids, comment_tree, depth)
    #    given:
    #      cids         =:= [comment_id]
    #      comment_tree =:= dict(comment_id -> [comment_id])
    #      depth        =:= dict(comment_id -> int depth)
    # 2. The parent_comments_key =:= dict(comment_id -> parent_id)
    # 3. The comments_sorts keys =:= dict(comment_id36 -> float).
    #    These are represented by a Cassandra model
    #    (CommentScoresByLink) rather than a permacache key. One of
    #    these exists for each sort (hot, new, etc)

    timer = g.stats.get_timer('comment_tree.get.%s' %
                              link.comment_tree_version)
    timer.start()

    cache = get_comment_tree(link, timer=timer)
    cids = cache.cids
    tree = cache.tree
    depth = cache.depth
    parents = cache.parents

    # load the sorter
    sorter = _get_comment_sorter(link, sort)

    # find comments for which the sort values weren't in the cache
    sorter_needed = []
    if cids and not sorter:
        sorter_needed = cids
        g.log.debug("comment_tree.py: sorter %s cache miss for %s", sort, link)
        sorter = {}

    sorter_needed = [x for x in cids if x not in sorter]
    if cids and sorter_needed:
        g.log.debug(
            "Error in comment_tree: sorter %s/%s inconsistent (missing %d e.g. %r)"
            % (link, sort, len(sorter_needed), sorter_needed[:10]))
        if not g.disallow_db_writes:
            update_comment_votes(
                Comment._byID(sorter_needed, data=True, return_dict=False))

        # The Q&A sort needs access to attributes the others don't, so save the
        # extra lookups if we can.
        data_needed = (sort == '_qa')
        comments = Comment._byID(sorter_needed,
                                 data=data_needed,
                                 return_dict=False)
        sorter.update(_comment_sorter_from_cids(comments, sort, link, tree))
        timer.intermediate('sort')

    if parents is None:
        g.log.debug("comment_tree.py: parents cache miss for %s", link)
        parents = {}
    elif cids and not all(x in parents for x in cids):
        g.log.debug("Error in comment_tree: parents inconsistent for %s", link)
        parents = {}

    if not parents and len(cids) > 0:
        with CommentTree.mutation_context(link):
            # reload under lock so the sorter and parents are consistent
            timer.intermediate('lock')
            cache = get_comment_tree(link, timer=timer)
            cache.parents = cache.parent_dict_from_tree(cache.tree)

    timer.stop()

    return (cache.cids, cache.tree, cache.depth, cache.parents, sorter)
Example #26
0
def get_comment_scores(link, sort, comment_ids, timer):
    """Retrieve cached sort values for all comments on a post.

    Arguments:

    * link_id -- id of the Link containing the comments.
    * sort -- a string indicating the attribute on the comments to use for
      generating sort values.

    Returns a dictionary from cid to a numeric sort value.

    """

    from r2.lib.db import queries
    from r2.models import CommentScoresByLink

    if not comment_ids:
        # no comments means no scores
        return {}

    if sort == "_date":
        # comment ids are monotonically increasing, so we can use them as a
        # substitute for creation date
        scores_by_id = {comment_id: comment_id for comment_id in comment_ids}
    else:
        scores_by_id36 = CommentScoresByLink.get_scores(link, sort)

        # we store these id36ed, but there are still bits of the code that
        # want to deal in integer IDs
        scores_by_id = {
            int(id36, 36): score
            for id36, score in scores_by_id36.iteritems()
        }

        scores_needed = set(comment_ids) - set(scores_by_id.keys())
        if scores_needed:
            g.stats.simple_event('comment_tree_bad_sorter')

            missing_comments = Comment._byID(scores_needed,
                                             data=True,
                                             return_dict=False)

            # queue the missing comments to be added to the comments tree, which
            # will trigger adding their scores
            for comment in missing_comments:
                queries.add_to_commentstree_q(comment)

            if sort == "_qa":
                scores_by_missing_id36 = _get_qa_comment_scores(
                    link, missing_comments)

                scores_by_missing = {
                    int(id36, 36): score
                    for id36, score in scores_by_missing_id36.iteritems()
                }
            else:
                scores_by_missing = {
                    comment._id: getattr(comment, sort)
                    for comment in missing_comments
                }

            scores_by_id.update(scores_by_missing)
            timer.intermediate('sort')

    return scores_by_id
Example #27
0
def link_comments_and_sort(link, sort):
    from r2.models import CommentSortsCache

    # This has grown sort of organically over time. Right now the
    # cache of the comments tree consists in three keys:
    # 1. The comments_key: A tuple of
    #      (cids, comment_tree, depth, num_children)
    #    given:
    #      cids         =:= [comment_id]
    #      comment_tree =:= dict(comment_id -> [comment_id])
    #      depth        =:= dict(comment_id -> int depth)
    #      num_children =:= dict(comment_id -> int num_children)
    # 2. The parent_comments_key =:= dict(comment_id -> parent_id)
    # 3. The comments_sorts keys =:= dict(comment_id36 -> float).
    #    These are represented by a Cassandra model
    #    (CommentSortsCache) rather than a permacache key. One of
    #    these exists for each sort (hot, new, etc)

    timer = g.stats.get_timer('comment_tree.get.%s' % link.comment_tree_version)
    timer.start()

    link_id = link._id
    cache = get_comment_tree(link, timer=timer)
    cids = cache.cids
    tree = cache.tree
    depth = cache.depth
    num_children = cache.num_children
    parents = cache.parents

    # load the sorter
    sorter = _get_comment_sorter(link_id, sort)

    sorter_needed = []
    if cids and not sorter:
        sorter_needed = cids
        g.log.debug("comment_tree.py: sorter (%s) cache miss for Link %s"
                    % (sort, link_id))
        sorter = {}

    sorter_needed = [x for x in cids if x not in sorter]
    if cids and sorter_needed:
        g.log.debug(
            "Error in comment_tree: sorter %r inconsistent (missing %d e.g. %r)"
            % (sort_comments_key(link_id, sort), len(sorter_needed), sorter_needed[:10]))
        if not g.disallow_db_writes:
            update_comment_votes(Comment._byID(sorter_needed, data=True, return_dict=False))

        sorter.update(_comment_sorter_from_cids(sorter_needed, sort))
        timer.intermediate('sort')

    if parents is None:
        g.log.debug("comment_tree.py: parents cache miss for Link %s"
                    % link_id)
        parents = {}
    elif cids and not all(x in parents for x in cids):
        g.log.debug("Error in comment_tree: parents inconsistent for Link %s"
                    % link_id)
        parents = {}

    if not parents and len(cids) > 0:
        with CommentTree.mutation_context(link):
            # reload under lock so the sorter and parents are consistent
            timer.intermediate('lock')
            cache = get_comment_tree(link, timer=timer)
            cache.parents = cache.parent_dict_from_tree(cache.tree)

    timer.stop()

    return (cache.cids, cache.tree, cache.depth, cache.num_children,
            cache.parents, sorter)
Example #28
0
def get_comment_scores(link, sort, comment_ids, timer):
    """Retrieve cached sort values for all comments on a post.

    Arguments:

    * link_id -- id of the Link containing the comments.
    * sort -- a string indicating the attribute on the comments to use for
      generating sort values.

    Returns a dictionary from cid to a numeric sort value.

    """

    from r2.lib.db import queries
    from r2.models import CommentScoresByLink

    if not comment_ids:
        # no comments means no scores
        return {}

    if sort == "_date":
        # comment ids are monotonically increasing, so we can use them as a
        # substitute for creation date
        scores_by_id = {comment_id: comment_id for comment_id in comment_ids}
    else:
        scores_by_id36 = CommentScoresByLink.get_scores(link, sort)

        # we store these id36ed, but there are still bits of the code that
        # want to deal in integer IDs
        scores_by_id = {
            int(id36, 36): score
            for id36, score in scores_by_id36.iteritems()
        }

        scores_needed = set(comment_ids) - set(scores_by_id.keys())
        if scores_needed:
            g.stats.simple_event('comment_tree_bad_sorter')

            missing_comments = Comment._byID(
                scores_needed, data=True, return_dict=False)

            # queue the missing comments to be added to the comments tree, which
            # will trigger adding their scores
            for comment in missing_comments:
                queries.add_to_commentstree_q(comment)

            if sort == "_qa":
                scores_by_missing_id36 = _get_qa_comment_scores(
                    link, missing_comments)

                scores_by_missing = {
                    int(id36, 36): score
                    for id36, score in scores_by_missing_id36.iteritems()
                }
            else:
                scores_by_missing = {
                    comment._id: getattr(comment, sort)
                    for comment in missing_comments
                }

            scores_by_id.update(scores_by_missing)
            timer.intermediate('sort')

    return scores_by_id
Example #29
0
def get_comment_scores(link, sort, comment_ids, timer):
    """Retrieve cached sort values for all comments on a post.

    Arguments:

    * link_id -- id of the Link containing the comments.
    * sort -- a string indicating the attribute on the comments to use for
      generating sort values.

    Returns a dictionary from cid to a numeric sort value.

    """

    from r2.lib.db import queries
    from r2.models import CommentScoresByLink

    if not comment_ids:
        # no comments means no scores
        return {}

    if sort == "_date":
        # comment ids are monotonically increasing, so we can use them as a
        # substitute for creation date
        scores_by_id = {comment_id: comment_id for comment_id in comment_ids}
    else:
        scores_by_id36 = CommentScoresByLink.get_scores(link, sort)

        # we store these id36ed, but there are still bits of the code that
        # want to deal in integer IDs
        scores_by_id = {
            int(id36, 36): score
            for id36, score in scores_by_id36.iteritems()
        }

        scores_needed = set(comment_ids) - set(scores_by_id.keys())
        if scores_needed:
            # some scores were missing from CommentScoresByLink--lookup the
            # comments and calculate the scores.
            g.stats.simple_event('comment_tree_bad_sorter')

            missing_comments = Comment._byID(scores_needed,
                                             data=True,
                                             return_dict=False)

            if sort == "_qa":
                scores_by_missing_id36 = _get_qa_comment_scores(
                    link, missing_comments)

                scores_by_missing = {
                    int(id36, 36): score
                    for id36, score in scores_by_missing_id36.iteritems()
                }
            else:
                scores_by_missing_id36 = {
                    comment._id36: getattr(comment, sort)
                    for comment in missing_comments
                }

                scores_by_missing = {
                    int(id36, 36): score
                    for id36, score in scores_by_missing_id36.iteritems()
                }

            # up to once per minute write the scores to limit writes but
            # eventually return us to the correct state.
            if not g.disallow_db_writes:
                write_key = "lock:score_{link}{sort}".format(
                    link=link._id36,
                    sort=sort,
                )
                should_write = g.lock_cache.add(write_key, "", time=60)
                if should_write:
                    CommentScoresByLink.set_scores(link, sort,
                                                   scores_by_missing_id36)

            scores_by_id.update(scores_by_missing)
            timer.intermediate('sort')

    return scores_by_id