Esempio n. 1
0
    def process_message(msgs, chan):
        """Update get_links(), the Links by Subverbify precomputed query.

        get_links() is a CachedResult which is stored in permacache. To
        update these objects we need to do a read-modify-write which requires
        obtaining a lock. Sharding these updates by subverbify allows us to run
        multiple consumers (but ideally just one per shard) to avoid lock
        contention.

        """

        from v1.lib.db.queries import add_queries, get_links

        link_names = {msg.body for msg in msgs}
        links = Link._by_fullname(link_names, return_dict=False)
        print 'Processing %r' % (links,)

        links_by_sr_id = defaultdict(list)
        for link in links:
            links_by_sr_id[link.sr_id].append(link)

        srs_by_id = Subverbify._byID(links_by_sr_id.keys(), stale=True)

        for sr_id, links in links_by_sr_id.iteritems():
            with g.stats.get_timer("link_vote_processor.subverbify_queries"):
                sr = srs_by_id[sr_id]
                add_queries(
                    queries=[get_links(sr, sort, "all") for sort in SORTS],
                    insert_items=links,
                )
Esempio n. 2
0
    def process_message(msgs, chan):
        """Update get_domain_links(), the Links by domain precomputed query.

        get_domain_links() is a CachedResult which is stored in permacache. To
        update these objects we need to do a read-modify-write which requires
        obtaining a lock. Sharding these updates by domain allows us to run
        multiple consumers (but ideally just one per shard) to avoid lock
        contention.

        """

        from v1.lib.db.queries import add_queries, get_domain_links

        link_names = {msg.body for msg in msgs}
        links = Link._by_fullname(link_names, return_dict=False)
        print 'Processing %r' % (links,)

        links_by_domain = defaultdict(list)
        for link in links:
            parsed = UrlParser(link.url)

            # update the listings for all permutations of the link's domain
            for domain in parsed.domain_permutations():
                links_by_domain[domain].append(link)

        for d, links in links_by_domain.iteritems():
            with g.stats.get_timer("link_vote_processor.domain_queries"):
                add_queries(
                    queries=[
                        get_domain_links(d, sort, "all") for sort in SORTS],
                    insert_items=links,
                )
Esempio n. 3
0
def get_hot_items(srs, item_type, src):
    """Get hot links from specified srs."""
    hot_srs = {sr._id: sr for sr in srs}  # for looking up sr by id
    hot_link_fullnames = normalized_hot([sr._id for sr in srs])
    hot_links = Link._by_fullname(hot_link_fullnames, return_dict=False)
    hot_items = []
    for l in hot_links:
        hot_items.append(ExploreItem(item_type, src, hot_srs[l.sr_id], l))
    return hot_items
Esempio n. 4
0
def get_rising_items(omit_sr_ids, count=4):
    """Get links that are rising right now."""
    all_rising = rising.get_all_rising()
    candidate_sr_ids = {sr_id for link, score, sr_id in all_rising}.difference(omit_sr_ids)
    link_fullnames = [link for link, score, sr_id in all_rising if sr_id in candidate_sr_ids]
    link_fullnames_to_show = random_sample(link_fullnames, count)
    rising_links = Link._by_fullname(link_fullnames_to_show,
                                     return_dict=False,
                                     data=True)
    rising_items = [ExploreItem(TYPE_RISING, 'ris', Subverbify._byID(l.sr_id), l)
                   for l in rising_links]
    return rising_items
Esempio n. 5
0
    def process_message(msg):
        vote_data = json.loads(msg.body)
        hook = hooks.get_hook('vote.validate_vote_data')
        if hook.call_until_return(msg=msg, vote_data=vote_data) is False:
            # Corrupt records in the queue. Ignore them.
            print "Ignoring invalid vote by %s on %s %s" % (
                    vote_data.get('user_id', '<unknown>'),
                    vote_data.get('thing_fullname', '<unknown>'),
                    vote_data)
            return

        timer = g.stats.get_timer("link_vote_processor")
        timer.start()

        user = Account._byID(vote_data.pop("user_id"))
        link = Link._by_fullname(vote_data.pop("thing_fullname"))

        # create the vote and update the voter's liked/disliked under lock so
        # that the vote state and cached query are consistent
        lock_key = "vote-%s-%s" % (user._id36, link._fullname)
        with g.make_lock("voting", lock_key, timeout=5):
            print "Processing vote by %s on %s %s" % (user, link, vote_data)

            try:
                vote = Vote(
                    user,
                    link,
                    direction=vote_data["direction"],
                    date=datetime.utcfromtimestamp(vote_data["date"]),
                    data=vote_data["data"],
                    event_data=vote_data.get("event_data"),
                )
            except TypeError as e:
                # a vote on an invalid type got in the queue, just skip it
                g.log.exception("Invalid type: %r", e.message)
                return

            vote.commit()
            timer.intermediate("create_vote_object")

            update_user_liked(vote)
            timer.intermediate("voter_likes")

        vote_valid = vote.is_automatic_initial_vote or vote.effects.affects_score
        link_valid = not (link._spam or link._deleted)
        if vote_valid and link_valid:
            add_to_author_query_q(link)
            add_to_subverbify_query_q(link)
            add_to_domain_query_q(link)

        timer.stop()
        timer.flush()
Esempio n. 6
0
def get_scheduled(date, sr_name=''):
    campaign_ids = PromotionWeights.get_campaign_ids(date, sr_names=[sr_name])
    campaigns = PromoCampaign._byID(campaign_ids, return_dict=False, data=True)
    links = Link._by_fullname({camp.link_id
                               for camp in campaigns},
                              return_dict=False,
                              data=True)
    links = {l._id: l for l in links}
    kept = []
    for camp in campaigns:
        if camp.trans_id == 0:
            continue

        link = links[camp.link_id]
        if link._spam or not promote.is_accepted(link):
            continue

        kept.append(camp._id)

    return [(camp._fullname, camp.link_id, camp.total_budget_dollars)
            for camp in kept]
Esempio n. 7
0
def get_comment_items(srs, src, count=4):
    """Get hot links from srs, plus top comment from each link."""
    link_fullnames = normalized_hot([sr._id for sr in srs])
    hot_links = Link._by_fullname(link_fullnames[:count], return_dict=False)
    top_comments = []
    for link in hot_links:
        builder = CommentBuilder(link,
                                 operators.desc('_confidence'),
                                 comment=None,
                                 context=None,
                                 num=1,
                                 load_more=False)
        listing = NestedListing(builder, parent_name=link._fullname).listing()
        top_comments.extend(listing.things)
    srs = Subverbify._byID([com.sr_id for com in top_comments])
    links = Link._byID([com.link_id for com in top_comments])
    comment_items = [ExploreItem(TYPE_COMMENT,
                                 src,
                                 srs[com.sr_id],
                                 links[com.link_id],
                                 com) for com in top_comments]
    return comment_items