Example #1
0
    def store_keys(cls, key, listing):
        """Look up query based on key, and update with provided listing.

        :param str key: key generated by :py:method:`make_key`
        :param list listing: sorted listing generated by
            `mr_reduce_max_per_key`, generally by :py:method:`write_permacache`
        """
        category, thing_cls, sort, time, uid = cls.split_key(key)

        query = None
        if category == "user":
            if thing_cls == "link":
                query = queries._get_submitted(int(uid), sort, time)
            elif thing_cls == "comment":
                query = queries._get_comments(int(uid), sort, time)
        elif category == "sr":
            if thing_cls == "link":
                query = queries._get_links(int(uid), sort, time)
        elif category == "domain":
            if thing_cls == "link":
                query = queries.get_domain_links(uid, sort, time)

        assert query, 'unknown query type for {}'.format(key)

        item_tuples = [
            (thing_fullname, float(value), float(timestamp))
            for value, timestamp, thing_fullname in listing
        ]
        # we only need locking updates for non-time-based listings, since for
        # time- based ones we're the only ones that ever update it
        lock = time == 'all'

        query._replace(item_tuples, lock=lock)
Example #2
0
def store_keys(key, maxes):
    category, thing_cls, sort, time, id = key.split("/")

    query = None
    if category == "user":
        if thing_cls == "link":
            query = queries._get_submitted(int(id), sort, time)
        elif thing_cls == "comment":
            query = queries._get_comments(int(id), sort, time)
    elif category == "sr":
        if thing_cls == "link":
            query = queries._get_links(int(id), sort, time)
    elif category == "domain":
        if thing_cls == "link":
            query = queries.get_domain_links(id, sort, time)

    assert query, 'unknown query type for %s' % (key,)

    item_tuples = [tuple([item[-1]] + [float(x) for x in item[:-1]])
                   for item in maxes]

    # we only need locking updates for non-time-based listings, since for time-
    # based ones we're the only ones that ever update it
    lock = time == 'all'

    query._replace(item_tuples, lock=lock)
Example #3
0
    def process_message(msgs, chan):
        """Update get_domain_links(), the Links by domain precomputed query.

        get_domain_links() is a CachedResult which is stored in permacache. To
        update these objects we need to do a read-modify-write which requires
        obtaining a lock. Sharding these updates by domain allows us to run
        multiple consumers (but ideally just one per shard) to avoid lock
        contention.

        """

        from r2.lib.db.queries import add_queries, get_domain_links

        link_names = {msg.body for msg in msgs}
        links = Link._by_fullname(link_names, return_dict=False)
        print 'Processing %r' % (links,)

        links_by_domain = defaultdict(list)
        for link in links:
            parsed = UrlParser(link.url)

            # update the listings for all permutations of the link's domain
            for domain in parsed.domain_permutations():
                links_by_domain[domain].append(link)

        for d, links in links_by_domain.iteritems():
            with g.stats.get_timer("link_vote_processor.domain_queries"):
                add_queries(
                    queries=[
                        get_domain_links(d, sort, "all") for sort in SORTS],
                    insert_items=links,
                )
Example #4
0
def store_keys(key, maxes):
    # we're building queries using queries.py, but we could make the
    # queries ourselves if we wanted to avoid the individual lookups
    # for accounts and subreddits.

    # Note that we're only generating the 'sr-' type queries here, but
    # we're also able to process the other listings generated by the
    # old migrate.mr_permacache for convenience

    userrel_fns = dict(liked = queries.get_liked,
                       disliked = queries.get_disliked,
                       saved = queries.get_saved,
                       hidden = queries.get_hidden)

    if key.startswith('user-'):
        acc_str, keytype, account_id = key.split('-')
        account_id = int(account_id)
        fn = queries.get_submitted if keytype == 'submitted' else queries.get_comments
        q = fn(Account._byID(account_id), 'new', 'all')
        q._insert_tuples([(fname, float(timestamp))
                    for (timestamp, fname)
                    in maxes])

    elif key.startswith('sr-'):
        sr_str, sort, time, sr_id = key.split('-')
        sr_id = int(sr_id)

        if sort == 'controversy':
            # I screwed this up in the mapper and it's too late to fix
            # it
            sort = 'controversial'

        q = queries.get_links(Subreddit._byID(sr_id), sort, time)
        q._insert_tuples([tuple([item[-1]] + map(float, item[:-1]))
                    for item in maxes])
    elif key.startswith('domain/'):
        d_str, sort, time, domain = key.split('/')
        q = queries.get_domain_links(domain, sort, time)
        q._insert_tuples([tuple([item[-1]] + map(float, item[:-1]))
                    for item in maxes])


    elif key.split('-')[0] in userrel_fns:
        key_type, account_id = key.split('-')
        account_id = int(account_id)
        fn = userrel_fns[key_type]
        q = fn(Account._byID(account_id))
        q._insert_tuples([tuple([item[-1]] + map(float, item[:-1]))
                    for item in maxes])
Example #5
0
def store_keys(key, maxes):
    category, thing_cls, sort, time, id = key.split("/")

    query = None
    if category == "user":
        if thing_cls == "link":
            query = queries._get_submitted(int(id), sort, time)
        elif thing_cls == "comment":
            query = queries._get_comments(int(id), sort, time)
    elif category == "sr":
        if thing_cls == "link":
            query = queries._get_links(int(id), sort, time)
    elif category == "domain":
        if thing_cls == "link":
            query = queries.get_domain_links(id, sort, time)
    assert query

    item_tuples = [tuple([item[-1]] + [float(x) for x in item[:-1]])
                   for item in maxes]
    query._replace(item_tuples)
Example #6
0
    def get_links(self, sort, time):
        from r2.lib.db import queries

        return queries.get_domain_links(self.domain, sort, time)
Example #7
0
    def get_links(self, sort, time):
        from r2.lib.db import queries

        # TODO: once the lists are precomputed properly, this can be
        # switched over to use the non-_old variety.
        return queries.get_domain_links(self.domain, sort, time)
Example #8
0
 def get_links(self, sort, time):
     from r2.lib.db import queries
     return queries.get_domain_links(self.domain, sort, time)
Example #9
0
 def get_links(self, sort, time):
     from r2.lib.db import queries
     # TODO: once the lists are precomputed properly, this can be
     # switched over to use the non-_old variety.
     return queries.get_domain_links(self.domain, sort, time)