Beispiel #1
0
def prime_url_cache(f, verbosity = 10000):
    import gzip, time
    from pylons import g
    handle = gzip.open(f, 'rb')
    counter = 0
    start_time = time.time()
    for line in handle:
        try:
            tid, key, url, kind = line.split('|')
            tid = int(tid)
            if url.lower() != "self":
                key = Link.by_url_key(url)
                link_ids = g.urlcache.get(key) or []
                if tid not in link_ids:
                    link_ids.append(tid)
                    g.urlcache.set(key, link_ids)
        except ValueError:
            print "FAIL: %s" % line
        counter += 1
        if counter % verbosity == 0:
            print "%6d: %s" % (counter, line)
            print "--> doing %5.2f / s" % (float(counter) / (time.time() - start_time))
Beispiel #2
0
def prime_url_cache(f, verbosity=10000):
    import gzip, time
    from pylons import g
    handle = gzip.open(f, 'rb')
    counter = 0
    start_time = time.time()
    for line in handle:
        try:
            tid, key, url, kind = line.split('|')
            tid = int(tid)
            if url.lower() != "self":
                key = Link.by_url_key(url)
                link_ids = g.urlcache.get(key) or []
                if tid not in link_ids:
                    link_ids.append(tid)
                    g.urlcache.set(key, link_ids)
        except ValueError:
            print "FAIL: %s" % line
        counter += 1
        if counter % verbosity == 0:
            print "%6d: %s" % (counter, line)
            print "--> doing %5.2f / s" % (float(counter) /
                                           (time.time() - start_time))
Beispiel #3
0
    def gen_keys():
        yield promoted_memo_key

        # just let this one do its own writing
        load_all_reddits()

        yield queries.get_all_comments().iden

        l_q = Link._query(
            Link.c._spam == (True, False),
            Link.c._deleted == (True, False),
            sort=desc('_date'),
            data=True,
        )
        for link in fetch_things2(l_q, verbosity):
            yield comments_key(link._id)
            yield last_modified_key(link, 'comments')
            if not getattr(link, 'is_self', False) and hasattr(link, 'url'):
                yield Link.by_url_key(link.url)

        a_q = Account._query(
            Account.c._spam == (True, False),
            sort=desc('_date'),
        )
        for account in fetch_things2(a_q, verbosity):
            yield messages_key(account._id)
            yield last_modified_key(account, 'overview')
            yield last_modified_key(account, 'commented')
            yield last_modified_key(account, 'submitted')
            yield last_modified_key(account, 'liked')
            yield last_modified_key(account, 'disliked')
            yield queries.get_comments(account, 'new', 'all').iden
            yield queries.get_submitted(account, 'new', 'all').iden
            yield queries.get_liked(account).iden
            yield queries.get_disliked(account).iden
            yield queries.get_hidden(account).iden
            yield queries.get_saved(account).iden
            yield queries.get_inbox_messages(account).iden
            yield queries.get_unread_messages(account).iden
            yield queries.get_inbox_comments(account).iden
            yield queries.get_unread_comments(account).iden
            yield queries.get_inbox_selfreply(account).iden
            yield queries.get_unread_selfreply(account).iden
            yield queries.get_sent(account).iden

        sr_q = Subreddit._query(
            Subreddit.c._spam == (True, False),
            sort=desc('_date'),
        )
        for sr in fetch_things2(sr_q, verbosity):
            yield last_modified_key(sr, 'stylesheet_contents')
            yield queries.get_links(sr, 'hot', 'all').iden
            yield queries.get_links(sr, 'new', 'all').iden

            for sort in 'top', 'controversial':
                for time in 'hour', 'day', 'week', 'month', 'year', 'all':
                    yield queries.get_links(sr,
                                            sort,
                                            time,
                                            merge_batched=False).iden
            yield queries.get_spam_links(sr).iden
            yield queries.get_spam_comments(sr).iden
            yield queries.get_reported_links(sr).iden
            yield queries.get_reported_comments(sr).iden
            yield queries.get_subreddit_messages(sr).iden
            yield queries.get_unread_subreddit_messages(sr).iden
Beispiel #4
0
    def gen_keys():
        yield promoted_memo_key

        # just let this one do its own writing
        load_all_reddits()

        yield queries.get_all_comments().iden

        l_q = Link._query(Link.c._spam == (True, False),
                          Link.c._deleted == (True, False),
                          sort=desc('_date'),
                          data=True,
                          )
        for link in fetch_things2(l_q, verbosity):
            yield comments_key(link._id)
            yield last_modified_key(link, 'comments')
            if not getattr(link, 'is_self', False) and hasattr(link, 'url'):
                yield Link.by_url_key(link.url)

        a_q = Account._query(Account.c._spam == (True, False),
                             sort=desc('_date'),
                             )
        for account in fetch_things2(a_q, verbosity):
            yield messages_key(account._id)
            yield last_modified_key(account, 'overview')
            yield last_modified_key(account, 'commented')
            yield last_modified_key(account, 'submitted')
            yield last_modified_key(account, 'liked')
            yield last_modified_key(account, 'disliked')
            yield queries.get_comments(account, 'new', 'all').iden
            yield queries.get_submitted(account, 'new', 'all').iden
            yield queries.get_liked(account).iden
            yield queries.get_disliked(account).iden
            yield queries.get_hidden(account).iden
            yield queries.get_saved(account).iden
            yield queries.get_inbox_messages(account).iden
            yield queries.get_unread_messages(account).iden
            yield queries.get_inbox_comments(account).iden
            yield queries.get_unread_comments(account).iden
            yield queries.get_inbox_selfreply(account).iden
            yield queries.get_unread_selfreply(account).iden
            yield queries.get_sent(account).iden

        sr_q = Subreddit._query(Subreddit.c._spam == (True, False),
                                sort=desc('_date'),
                                )
        for sr in fetch_things2(sr_q, verbosity):
            yield last_modified_key(sr, 'stylesheet_contents')
            yield queries.get_links(sr, 'hot', 'all').iden
            yield queries.get_links(sr, 'new', 'all').iden

            for sort in 'top', 'controversial':
                for time in 'hour', 'day', 'week', 'month', 'year', 'all':
                    yield queries.get_links(sr, sort, time,
                                            merge_batched=False).iden
            yield queries.get_spam_links(sr).iden
            yield queries.get_spam_comments(sr).iden
            yield queries.get_reported_links(sr).iden
            yield queries.get_reported_comments(sr).iden
            yield queries.get_subreddit_messages(sr).iden
            yield queries.get_unread_subreddit_messages(sr).iden