Example #1
0
    def process_message(msgs, chan):
        """Update get_domain_links(), the Links by domain precomputed query.

        get_domain_links() is a CachedResult which is stored in permacache. To
        update these objects we need to do a read-modify-write which requires
        obtaining a lock. Sharding these updates by domain allows us to run
        multiple consumers (but ideally just one per shard) to avoid lock
        contention.

        """

        from r2.lib.db.queries import add_queries, get_domain_links

        link_names = {msg.body for msg in msgs}
        links = Link._by_fullname(link_names, return_dict=False)
        print 'Processing %r' % (links,)

        links_by_domain = defaultdict(list)
        for link in links:
            parsed = UrlParser(link.url)

            # update the listings for all permutations of the link's domain
            for domain in parsed.domain_permutations():
                links_by_domain[domain].append(link)

        for d, links in links_by_domain.iteritems():
            with g.stats.get_timer("link_vote_processor.domain_queries"):
                add_queries(
                    queries=[
                        get_domain_links(d, sort, "all") for sort in SORTS],
                    insert_items=links,
                )
Example #2
0
    def process_message(msgs, chan):
        """Update get_domain_links(), the Links by domain precomputed query.

        get_domain_links() is a CachedResult which is stored in permacache. To
        update these objects we need to do a read-modify-write which requires
        obtaining a lock. Sharding these updates by domain allows us to run
        multiple consumers (but ideally just one per shard) to avoid lock
        contention.

        """

        from r2.lib.db.queries import add_queries, get_domain_links

        link_names = {msg.body for msg in msgs}
        links = Link._by_fullname(link_names, return_dict=False)
        print 'Processing %r' % (links, )

        links_by_domain = defaultdict(list)
        for link in links:
            parsed = UrlParser(link.url)

            # update the listings for all permutations of the link's domain
            for domain in parsed.domain_permutations():
                links_by_domain[domain].append(link)

        for d, links in links_by_domain.iteritems():
            with g.stats.get_timer("link_vote_processor.domain_queries"):
                add_queries(
                    queries=[
                        get_domain_links(d, sort, "all") for sort in SORTS
                    ],
                    insert_items=links,
                )
Example #3
0
def add_to_domain_query_q(link):
    parsed = UrlParser(link.url)
    if not parsed.domain_permutations():
        # no valid domains found
        return

    if g.shard_domain_query_queues:
        domain_shard = hash(parsed.hostname) % 10
        queue_name = "domain_query_%s_q" % domain_shard
    else:
        queue_name = "domain_query_q"
    amqp.add_item(queue_name, link._fullname)
Example #4
0
def add_to_domain_query_q(link):
    parsed = UrlParser(link.url)
    if not parsed.domain_permutations():
        # no valid domains found
        return

    if g.shard_domain_query_queues:
        domain_shard = hash(parsed.hostname) % 10
        queue_name = "domain_query_%s_q" % domain_shard
    else:
        queue_name = "domain_query_q"
    amqp.add_item(queue_name, link._fullname)
Example #5
0
    def process(thing):
        if thing.deleted:
            return

        thing_cls = thingcls_by_name[thing.thing_type]
        fname = make_fullname(thing_cls, thing.thing_id)
        thing_score = score(thing.ups, thing.downs)
        thing_upvotes = upvotes(thing.ups)
        thing_controversy = controversy(thing.ups, thing.downs)

        for interval, cutoff in cutoff_by_interval.iteritems():
            if thing.timestamp < cutoff:
                continue

            yield ("user/%s/top/%s/%d" % (thing.thing_type, interval, thing.author_id),
                   thing_score, thing.timestamp, fname)
            yield ("user/%s/%s/%s/%d" % (thing.thing_type, g.voting_upvote_path, interval, thing.author_id),
                   thing_upvotes, thing.timestamp, fname)
            yield ("user/%s/%s/%s/%d" % (thing.thing_type, g.voting_controversial_path, interval, thing.author_id),
                   thing_controversy, thing.timestamp, fname)

            if thing.spam:
                continue

            if thing.thing_type == "link":
                yield ("sr/link/top/%s/%d" % (interval, thing.sr_id),
                       thing_score, thing.timestamp, fname)
                yield ("sr/link/%s/%s/%d" % (g.voting_upvote_path, interval, thing.sr_id),
                       thing_upvotes, thing.timestamp, fname)
                yield ("sr/link/%s/%s/%d" % (g.voting_controversial_path, interval, thing.sr_id),
                       thing_controversy, thing.timestamp, fname)

                if thing.url:
                    try:
                        parsed = UrlParser(thing.url)
                    except ValueError:
                        continue

                    for domain in parsed.domain_permutations():
                        yield ("domain/link/top/%s/%s" % (interval, domain),
                               thing_score, thing.timestamp, fname)
                        yield ("domain/link/%s/%s/%s" % (g.voting_upvote_path, interval, domain),
                               thing_upvotes, thing.timestamp, fname)
                        yield ("domain/link/%s/%s/%s" % (g.voting_controversial_path, interval, domain),
                               thing_controversy, thing.timestamp, fname)
Example #6
0
    def process(thing):
        if thing.deleted:
            return

        thing_cls = thingcls_by_name[thing.thing_type]
        fname = make_fullname(thing_cls, thing.thing_id)
        thing_score = score(thing.ups, thing.downs)
        thing_controversy = controversy(thing.ups, thing.downs)

        for interval, cutoff in cutoff_by_interval.iteritems():
            if thing.timestamp < cutoff:
                continue

            yield ("user/%s/top/%s/%d" % (thing.thing_type, interval, thing.author_id),
                   thing_score, thing.timestamp, fname)
            yield ("user/%s/controversial/%s/%d" % (thing.thing_type, interval, thing.author_id),
                   thing_controversy, thing.timestamp, fname)

            if thing.spam:
                continue

            if thing.thing_type == "link":
                yield ("sr/link/top/%s/%d" % (interval, thing.sr_id),
                       thing_score, thing.timestamp, fname)
                yield ("sr/link/controversial/%s/%d" % (interval, thing.sr_id),
                       thing_controversy, thing.timestamp, fname)

                if thing.url:
                    try:
                        parsed = UrlParser(thing.url)
                    except ValueError:
                        continue

                    for domain in parsed.domain_permutations():
                        yield ("domain/link/top/%s/%s" % (interval, domain),
                               thing_score, thing.timestamp, fname)
                        yield ("domain/link/controversial/%s/%s" % (interval, domain),
                               thing_controversy, thing.timestamp, fname)
Example #7
0
    def time_listing_iter(self, thing, cutoff_by_interval):
        if thing.deleted:
            return

        thing_cls = self.thing_cls
        fname = make_fullname(thing_cls, thing.thing_id)
        scores = {k: func(thing) for k, func in self.LISTING_SORTS.iteritems()}

        for interval, cutoff in cutoff_by_interval.iteritems():
            if thing.timestamp < cutoff:
                continue

            for sort, value in scores.iteritems():
                aid = thing.author_id
                key = self.make_key("user", sort, interval, aid)
                yield (key, value, thing.timestamp, fname)

            if thing.spam:
                continue

            if thing.thing_type == "link":
                for sort, value in scores.iteritems():
                    sr_id = thing.sr_id
                    key = self.make_key("sr", sort, interval, sr_id)
                    yield (key, value, thing.timestamp, fname)

                if not thing.url:
                    continue
                try:
                    parsed = UrlParser(thing.url)
                except ValueError:
                    continue

                for d in parsed.domain_permutations():
                    for sort, value in scores.iteritems():
                        key = self.make_key("domain", sort, interval, d)
                        yield (key, value, thing.timestamp, fname)
Example #8
0
    def time_listing_iter(self, thing, cutoff_by_interval):
        if thing.deleted:
            return

        thing_cls = self.thing_cls
        fname = make_fullname(thing_cls, thing.thing_id)
        scores = {k: func(thing) for k, func in self.LISTING_SORTS.iteritems()}

        for interval, cutoff in cutoff_by_interval.iteritems():
            if thing.timestamp < cutoff:
                continue

            for sort, value in scores.iteritems():
                aid = thing.author_id
                key = self.make_key("user", sort, interval, aid)
                yield (key, value, thing.timestamp, fname)

            if thing.spam:
                continue

            if thing.thing_type == "link":
                for sort, value in scores.iteritems():
                    sr_id = thing.sr_id
                    key = self.make_key("sr", sort, interval, sr_id)
                    yield (key, value, thing.timestamp, fname)

                if not thing.url:
                    continue
                try:
                    parsed = UrlParser(thing.url)
                except ValueError:
                    continue

                for d in parsed.domain_permutations():
                    for sort, value in scores.iteritems():
                        key = self.make_key("domain", sort, interval, d)
                        yield (key, value, thing.timestamp, fname)