Example #1
0
    def process_message(msgs, chan):
        """Update get_links(), the Links by Subreddit precomputed query.

        get_links() is a CachedResult which is stored in permacache. To
        update these objects we need to do a read-modify-write which requires
        obtaining a lock. Sharding these updates by subreddit allows us to run
        multiple consumers (but ideally just one per shard) to avoid lock
        contention.

        """

        from r2.lib.db.queries import add_queries, get_links

        link_names = {msg.body for msg in msgs}
        links = Link._by_fullname(link_names, return_dict=False)
        print 'Processing %r' % (links, )

        links_by_sr_id = defaultdict(list)
        for link in links:
            links_by_sr_id[link.sr_id].append(link)

        srs_by_id = Subreddit._byID(links_by_sr_id.keys(), stale=True)

        for sr_id, links in links_by_sr_id.iteritems():
            with g.stats.get_timer("link_vote_processor.subreddit_queries"):
                sr = srs_by_id[sr_id]
                add_queries(
                    queries=[get_links(sr, sort, "all") for sort in SORTS],
                    insert_items=links,
                )
Example #2
0
    def process_message(msgs, chan):
        """Update get_domain_links(), the Links by domain precomputed query.

        get_domain_links() is a CachedResult which is stored in permacache. To
        update these objects we need to do a read-modify-write which requires
        obtaining a lock. Sharding these updates by domain allows us to run
        multiple consumers (but ideally just one per shard) to avoid lock
        contention.

        """

        from r2.lib.db.queries import add_queries, get_domain_links

        link_names = {msg.body for msg in msgs}
        links = Link._by_fullname(link_names, return_dict=False)
        print 'Processing %r' % (links, )

        links_by_domain = defaultdict(list)
        for link in links:
            parsed = UrlParser(link.url)

            # update the listings for all permutations of the link's domain
            for domain in parsed.domain_permutations():
                links_by_domain[domain].append(link)

        for d, links in links_by_domain.iteritems():
            with g.stats.get_timer("link_vote_processor.domain_queries"):
                add_queries(
                    queries=[
                        get_domain_links(d, sort, "all") for sort in SORTS
                    ],
                    insert_items=links,
                )
Example #3
0
    def process_message(msg):
        from r2.lib.db.queries import (
            add_queries,
            add_to_commentstree_q,
            get_comments,
        )

        vote_data = json.loads(msg.body)
        hook = hooks.get_hook('vote.validate_vote_data')
        if hook.call_until_return(msg=msg, vote_data=vote_data) is False:
            # Corrupt records in the queue. Ignore them.
            print "Ignoring invalid vote by %s on %s %s" % (
                vote_data.get('user_id', '<unknown>'),
                vote_data.get('thing_fullname', '<unknown>'), vote_data)
            return

        timer = g.stats.get_timer("comment_vote_processor")
        timer.start()

        user = Account._byID(vote_data.pop("user_id"))
        comment = Comment._by_fullname(vote_data.pop("thing_fullname"))

        print "Processing vote by %s on %s %s" % (user, comment, vote_data)

        try:
            vote = Vote(
                user,
                comment,
                direction=vote_data["direction"],
                date=datetime.utcfromtimestamp(vote_data["date"]),
                data=vote_data["data"],
                event_data=vote_data.get("event_data"),
            )
        except TypeError as e:
            # a vote on an invalid type got in the queue, just skip it
            g.log.exception("Invalid type: %r", e.message)
            return

        vote.commit()
        timer.intermediate("create_vote_object")

        vote_valid = vote.is_automatic_initial_vote or vote.effects.affects_score
        comment_valid = not (comment._spam or comment._deleted)
        if vote_valid and comment_valid:
            author = Account._byID(comment.author_id)
            add_queries(
                queries=[get_comments(author, sort, 'all') for sort in SORTS],
                insert_items=comment,
            )
            timer.intermediate("author_queries")

            # update the score periodically when a comment has many votes
            update_threshold = g.live_config['comment_vote_update_threshold']
            update_period = g.live_config['comment_vote_update_period']
            num_votes = comment.num_votes
            if num_votes <= update_threshold or num_votes % update_period == 0:
                add_to_commentstree_q(comment)

        timer.stop()
        timer.flush()
Example #4
0
def set_status(l, status, onchange=None):
    # keep this out here.  Useful for updating the queue if there is a bug
    # and for initial migration
    add_queries(
        [
            _sponsored_link_query(None, l.author_id),
            _sponsored_link_query(None),
            _sponsored_link_query(status, l.author_id),
            _sponsored_link_query(status),
        ],
        insert_items=[l],
    )

    # no need to delete or commit of the status is unchanged
    if status != getattr(l, "promote_status", None):
        # new links won't even have a promote_status yet
        if hasattr(l, "promote_status"):
            add_queries(
                [_sponsored_link_query(l.promote_status, l.author_id), _sponsored_link_query(l.promote_status)],
                delete_items=[l],
            )
        l.promote_status = status
        l._commit()
        if onchange:
            onchange()
Example #5
0
    def process_message(msgs, chan):
        """Update get_domain_links(), the Links by domain precomputed query.

        get_domain_links() is a CachedResult which is stored in permacache. To
        update these objects we need to do a read-modify-write which requires
        obtaining a lock. Sharding these updates by domain allows us to run
        multiple consumers (but ideally just one per shard) to avoid lock
        contention.

        """

        from r2.lib.db.queries import add_queries, get_domain_links

        link_names = {msg.body for msg in msgs}
        links = Link._by_fullname(link_names, return_dict=False)
        print 'Processing %r' % (links,)

        links_by_domain = defaultdict(list)
        for link in links:
            parsed = UrlParser(link.url)

            # update the listings for all permutations of the link's domain
            for domain in parsed.domain_permutations():
                links_by_domain[domain].append(link)

        for d, links in links_by_domain.iteritems():
            with g.stats.get_timer("link_vote_processor.domain_queries"):
                add_queries(
                    queries=[
                        get_domain_links(d, sort, "all") for sort in SORTS],
                    insert_items=links,
                )
Example #6
0
    def process_message(msgs, chan):
        """Update get_links(), the Links by Subreddit precomputed query.

        get_links() is a CachedResult which is stored in permacache. To
        update these objects we need to do a read-modify-write which requires
        obtaining a lock. Sharding these updates by subreddit allows us to run
        multiple consumers (but ideally just one per shard) to avoid lock
        contention.

        """

        from r2.lib.db.queries import add_queries, get_links

        link_names = {msg.body for msg in msgs}
        links = Link._by_fullname(link_names, return_dict=False)
        print 'Processing %r' % (links,)

        links_by_sr_id = defaultdict(list)
        for link in links:
            links_by_sr_id[link.sr_id].append(link)

        srs_by_id = Subreddit._byID(links_by_sr_id.keys(), stale=True)

        for sr_id, links in links_by_sr_id.iteritems():
            with g.stats.get_timer("link_vote_processor.subreddit_queries"):
                sr = srs_by_id[sr_id]
                add_queries(
                    queries=[get_links(sr, sort, "all") for sort in SORTS],
                    insert_items=links,
                )
Example #7
0
def set_status(l, status, onchange=None):
    # keep this out here.  Useful for updating the queue if there is a bug
    # and for initial migration
    add_queries([
        _sponsored_link_query(None, l.author_id),
        _sponsored_link_query(None),
        _sponsored_link_query(status, l.author_id),
        _sponsored_link_query(status)
    ],
                insert_items=[l])

    # no need to delete or commit of the status is unchanged
    if status != getattr(l, "promote_status", None):
        # new links won't even have a promote_status yet
        if hasattr(l, "promote_status"):
            add_queries([
                _sponsored_link_query(l.promote_status, l.author_id),
                _sponsored_link_query(l.promote_status)
            ],
                        delete_items=[l])
        l.promote_status = status
        l._commit()
        if onchange:
            onchange()
Example #8
0
    def process_message(msg):
        from r2.lib.comment_tree import write_comment_scores
        from r2.lib.db.queries import (
            add_queries,
            add_to_commentstree_q,
            get_comments,
        )
        from r2.models.builder import get_active_sort_orders_for_link

        vote_data = json.loads(msg.body)
        hook = hooks.get_hook('vote.validate_vote_data')
        if hook.call_until_return(msg=msg, vote_data=vote_data) is False:
            # Corrupt records in the queue. Ignore them.
            print "Ignoring invalid vote by %s on %s %s" % (
                vote_data.get('user_id', '<unknown>'),
                vote_data.get('thing_fullname', '<unknown>'), vote_data)
            return

        timer = g.stats.get_timer("comment_vote_processor")
        timer.start()

        user = Account._byID(vote_data.pop("user_id"))
        comment = Comment._by_fullname(vote_data.pop("thing_fullname"))

        print "Processing vote by %s on %s %s" % (user, comment, vote_data)

        try:
            vote = Vote(
                user,
                comment,
                direction=vote_data["direction"],
                date=datetime.utcfromtimestamp(vote_data["date"]),
                data=vote_data["data"],
                event_data=vote_data.get("event_data"),
            )
        except TypeError as e:
            # a vote on an invalid type got in the queue, just skip it
            g.log.exception("Invalid type: %r", e.message)
            return

        vote.commit()
        timer.intermediate("create_vote_object")

        vote_invalid = (not vote.effects.affects_score
                        and not vote.is_automatic_initial_vote)
        comment_invalid = comment._spam or comment._deleted
        if vote_invalid or comment_invalid:
            timer.stop()
            timer.flush()
            return

        author = Account._byID(comment.author_id)
        add_queries(
            queries=[get_comments(author, sort, 'all') for sort in SORTS],
            insert_items=comment,
        )
        timer.intermediate("author_queries")

        update_threshold = g.live_config['comment_vote_update_threshold']
        update_period = g.live_config['comment_vote_update_period']
        skip_score_update = (comment.num_votes > update_threshold
                             and comment.num_votes % update_period != 0)

        # skip updating scores if this was the automatic initial vote. those
        # updates will be handled by new_comment. Also only update scores
        # periodically once a comment has many votes.
        if not vote.is_automatic_initial_vote and not skip_score_update:
            # check whether this link is using precomputed sorts, if it is
            # we'll need to push an update to commentstree_q
            link = Link._byID(comment.link_id)
            if get_active_sort_orders_for_link(link):
                # send this comment to commentstree_q where we will update
                # CommentScoresByLink, CommentTree (noop), and CommentOrderer
                add_to_commentstree_q(comment)
            else:
                # the link isn't using precomputed sorts, so just update the
                # scores
                write_comment_scores(link, [comment])
                timer.intermediate("update_scores")

        timer.stop()
        timer.flush()
Example #9
0
    def process_message(msg):
        from r2.lib.db.queries import (
            add_queries,
            add_to_commentstree_q,
            get_comments,
        )

        vote_data = json.loads(msg.body)
        hook = hooks.get_hook('vote.validate_vote_data')
        if hook.call_until_return(msg=msg, vote_data=vote_data) is False:
            # Corrupt records in the queue. Ignore them.
            print "Ignoring invalid vote by %s on %s %s" % (
                    vote_data.get('user_id', '<unknown>'),
                    vote_data.get('thing_fullname', '<unknown>'),
                    vote_data)
            return

        timer = g.stats.get_timer("comment_vote_processor")
        timer.start()

        user = Account._byID(vote_data.pop("user_id"))
        comment = Comment._by_fullname(vote_data.pop("thing_fullname"))

        print "Processing vote by %s on %s %s" % (user, comment, vote_data)

        try:
            vote = Vote(
                user,
                comment,
                direction=vote_data["direction"],
                date=datetime.utcfromtimestamp(vote_data["date"]),
                data=vote_data["data"],
                event_data=vote_data.get("event_data"),
            )
        except TypeError as e:
            # a vote on an invalid type got in the queue, just skip it
            g.log.exception("Invalid type: %r", e.message)
            return

        vote.commit()
        timer.intermediate("create_vote_object")

        vote_valid = vote.is_automatic_initial_vote or vote.effects.affects_score
        comment_valid = not (comment._spam or comment._deleted)
        if vote_valid and comment_valid:
            author = Account._byID(comment.author_id)
            add_queries(
                queries=[get_comments(author, sort, 'all') for sort in SORTS],
                insert_items=comment,
            )
            timer.intermediate("author_queries")

            # update the score periodically when a comment has many votes
            update_threshold = g.live_config['comment_vote_update_threshold']
            update_period = g.live_config['comment_vote_update_period']
            num_votes = comment.num_votes
            if num_votes <= update_threshold or num_votes % update_period == 0:
                add_to_commentstree_q(comment)

        timer.stop()
        timer.flush()
Example #10
0
    def process_message(msg):
        from r2.lib.comment_tree import write_comment_scores
        from r2.lib.db.queries import (
            add_queries,
            add_to_commentstree_q,
            get_comments,
        )
        from r2.models.builder import get_active_sort_orders_for_link

        vote_data = json.loads(msg.body)
        hook = hooks.get_hook('vote.validate_vote_data')
        if hook.call_until_return(msg=msg, vote_data=vote_data) is False:
            # Corrupt records in the queue. Ignore them.
            print "Ignoring invalid vote by %s on %s %s" % (
                    vote_data.get('user_id', '<unknown>'),
                    vote_data.get('thing_fullname', '<unknown>'),
                    vote_data)
            return

        timer = g.stats.get_timer("comment_vote_processor")
        timer.start()

        user = Account._byID(vote_data.pop("user_id"))
        comment = Comment._by_fullname(vote_data.pop("thing_fullname"))

        print "Processing vote by %s on %s %s" % (user, comment, vote_data)

        try:
            vote = Vote(
                user,
                comment,
                direction=vote_data["direction"],
                date=datetime.utcfromtimestamp(vote_data["date"]),
                data=vote_data["data"],
                event_data=vote_data.get("event_data"),
            )
        except TypeError as e:
            # a vote on an invalid type got in the queue, just skip it
            g.log.exception("Invalid type: %r", e.message)
            return

        vote.commit()
        timer.intermediate("create_vote_object")

        vote_invalid = (not vote.effects.affects_score and
            not vote.is_automatic_initial_vote)
        comment_invalid = comment._spam or comment._deleted
        if vote_invalid or comment_invalid:
            timer.stop()
            timer.flush()
            return

        author = Account._byID(comment.author_id)
        add_queries(
            queries=[get_comments(author, sort, 'all') for sort in SORTS],
            insert_items=comment,
        )
        timer.intermediate("author_queries")

        update_threshold = g.live_config['comment_vote_update_threshold']
        update_period = g.live_config['comment_vote_update_period']
        skip_score_update = (comment.num_votes > update_threshold and
            comment.num_votes % update_period != 0)

        # skip updating scores if this was the automatic initial vote. those
        # updates will be handled by new_comment. Also only update scores
        # periodically once a comment has many votes.
        if not vote.is_automatic_initial_vote and not skip_score_update:
            # check whether this link is using precomputed sorts, if it is
            # we'll need to push an update to commentstree_q
            link = Link._byID(comment.link_id)
            if get_active_sort_orders_for_link(link):
                # send this comment to commentstree_q where we will update
                # CommentScoresByLink, CommentTree (noop), and CommentOrderer
                add_to_commentstree_q(comment)
            else:
                # the link isn't using precomputed sorts, so just update the
                # scores
                write_comment_scores(link, [comment])
                timer.intermediate("update_scores")

        timer.stop()
        timer.flush()