Ejemplo n.º 1
0
def get_links(sr, sort, time, merge_batched=True):
    """General link query for a subreddit."""
    q = Link._query(Link.c.sr_id == sr._id,
                    sort = db_sort(sort),
                    data = True)

    if time != 'all':
        q._filter(db_times[time])

    res = make_results(q)

    # see the discussion above batched_time_times
    if (merge_batched
        and g.use_query_cache
        and sort in batched_time_sorts
        and time in batched_time_times):

        byday = Link._query(Link.c.sr_id == sr._id,
                            sort = db_sort(sort), data=True)
        byday._filter(db_times['day'])

        res = merge_results(res,
                            make_results(byday))

    return res
Ejemplo n.º 2
0
    def get_links(self, sort, time):
        from r2.lib.db import queries
        from r2.models import Link
        from r2.controllers.errors import UserRequiredException

        if not c.user_is_loggedin:
            raise UserRequiredException

        friends = self.get_important_friends(c.user._id)

        if not friends:
            return []

        if g.use_query_cache:
            # with the precomputer enabled, this Subreddit only supports
            # being sorted by 'new'. it would be nice to have a
            # cleaner UI than just blatantly ignoring their sort,
            # though
            sort = 'new'
            time = 'all'

            friends = Account._byID(friends, return_dict=False)

            crs = [
                queries.get_submitted(friend, sort, time) for friend in friends
            ]
            return queries.MergedCachedResults(crs)

        else:
            q = Link._query(Link.c.author_id == friends,
                            sort=queries.db_sort(sort),
                            data=True)
            if time != 'all':
                q._filter(queries.db_times[time])
            return q
Ejemplo n.º 3
0
def get_spam_filtered_links(sr_id):
    """ NOTE: This query will never run unless someone does an "update" on it,
        but that will probably timeout. Use insert_spam_filtered_links."""
    return Link._query(Link.c.sr_id == sr_id,
                       Link.c._spam == True,
                       Link.c.verdict != 'mod-removed',
                       sort=db_sort('new'))
Ejemplo n.º 4
0
def make_daily_promotions():
    # charge campaigns so they can go live
    charge_pending(offset=0)
    charge_pending(offset=1)

    # promote links and record ids of promoted links
    link_ids = set()
    for campaign, link in get_scheduled_promos(offset=0):
        link_ids.add(link._id)
        promote_link(link, campaign)

    # expire finished links
    q = Link._query(Link.c.promote_status == PROMOTE_STATUS.promoted,
                    data=True)
    q = q._filter(not_(Link.c._id.in_(link_ids)))
    for link in q:
        update_promote_status(link, PROMOTE_STATUS.finished)
        emailer.finished_promo(link)

    # update subreddits with promos
    all_live_promo_srnames(_update=True)

    _mark_promos_updated()
    finalize_completed_campaigns(daysago=1)
    hooks.get_hook('promote.make_daily_promotions').call(offset=0)
Ejemplo n.º 5
0
def test_cassasavehide():
    from r2.models import Account, Link, CassandraSave, SavesByAccount
    from r2.lib.db import tdb_cassandra

    a = list(Account._query(sort=desc('_date'), limit=1))[0]
    l = list(Link._query(sort=desc('_date'), limit=1))[0]

    try:
        csh = CassandraSave._fast_query(a._id36, l._id36)
        print "Warning! Deleting!", csh
        CassandraSave._fast_query(a._id36, l._id36)._destroy()
    except tdb_cassandra.NotFound:
        pass

    csh = CassandraSave._save(a, l)
    csh._commit()
    assert CassandraSave._fast_query(a._id36, l._id36) == csh

    # check for the SavesByAccount object too
    assert SavesByAccount._byID(a._id36)[csh._id] == csh._id

    csh._destroy()

    try:
        CassandraSave._fast_query(a._id36, l._id36) == csh
        raise Exception("shouldn't exist after destroying")
    except tdb_cassandra.NotFound:
        pass

    try:
        assert csh._id not in SavesByAccount._byID(
            a._id36, properties=csh._id)._values()
    except tdb_cassandra.NotFound:
        pass
Ejemplo n.º 6
0
def run(verbose=True, sleep_time = 60, num_items = 1):
    key = "indextank_cursor"
    cursor = g.cache.get(key)
    if cursor is None:
        raise ValueError("%s is not set!" % key)
    cursor = int(cursor)

    while True:
        if verbose:
            print "Looking for %d items with _id < %d" % (num_items, cursor)
        q = Link._query(sort = desc('_id'),
                        limit = num_items)
        q._after(Link._byID(cursor))
        last_date = None
        for item in q:
            cursor = item._id
            last_date = item._date
            amqp.add_item('indextank_changes', item._fullname,
                      message_id = item._fullname,
                      delivery_mode = amqp.DELIVERY_TRANSIENT)
        g.cache.set(key, cursor)

        if verbose:
            if last_date:
                last_date = last_date.strftime("%Y-%m-%d")
            print ("Just enqueued %d items. New cursor=%s (%s). Sleeping %d seconds."
                   % (num_items, cursor, last_date, sleep_time))

        sleep(sleep_time)
Ejemplo n.º 7
0
def test_cassasavehide():
    from r2.models import Account, Link, CassandraSave, SavesByAccount
    from r2.lib.db import tdb_cassandra

    a = list(Account._query(sort=desc('_date'),
                            limit=1))[0]
    l = list(Link._query(sort=desc('_date'),
                         limit=1))[0]

    try:
        csh = CassandraSave._fast_query(a._id36, l._id36)
        print "Warning! Deleting!", csh
        CassandraSave._fast_query(a._id36, l._id36)._destroy()
    except tdb_cassandra.NotFound:
        pass

    csh = CassandraSave._save(a, l)
    csh._commit()
    assert CassandraSave._fast_query(a._id36, l._id36) == csh

    # check for the SavesByAccount object too
    assert SavesByAccount._byID(a._id36)[csh._id] == csh._id

    csh._destroy()

    try:
        CassandraSave._fast_query(a._id36, l._id36) == csh
        raise Exception("shouldn't exist after destroying")
    except tdb_cassandra.NotFound:
        pass

    try:
        assert csh._id not in SavesByAccount._byID(a._id36, properties = csh._id)._values()
    except tdb_cassandra.NotFound:
        pass
Ejemplo n.º 8
0
def run(verbose=True, sleep_time=60, num_items=1):
    key = "indextank_cursor"
    cursor = g.cache.get(key)
    if cursor is None:
        raise ValueError("%s is not set!" % key)
    cursor = int(cursor)

    while True:
        if verbose:
            print "Looking for %d items with _id < %d" % (num_items, cursor)
        q = Link._query(sort=desc('_id'), limit=num_items)
        q._after(Link._byID(cursor))
        last_date = None
        for item in q:
            cursor = item._id
            last_date = item._date
            amqp.add_item('indextank_changes',
                          item._fullname,
                          message_id=item._fullname,
                          delivery_mode=amqp.DELIVERY_TRANSIENT)
        g.cache.set(key, cursor)

        if verbose:
            if last_date:
                last_date = last_date.strftime("%Y-%m-%d")
            print(
                "Just enqueued %d items. New cursor=%s (%s). Sleeping %d seconds."
                % (num_items, cursor, last_date, sleep_time))

        sleep(sleep_time)
Ejemplo n.º 9
0
def get_hot(sr):
    q = Link._query(Link.c.sr_id == sr._id,
                    sort = desc('_hot'),
                    write_cache = True,
                    limit = 150)

    iden = q._iden()

    read_cache = True
    #if query is in the cache, the expire flag is true, and the access
    #time is old, set read_cache = False
    if cache.get(iden) is not None:
        if cache.get(expire_key(sr)):
            access_time = cache.get(access_key(sr))
            if not access_time or datetime.now() > access_time + expire_delta:
                cache.delete(expire_key(sr))
                read_cache = False
    #if the query isn't in the cache, set read_cache to false so we
    #record the access time
    else:
        read_cache = False

    if not read_cache:
        cache.set(access_key(sr), datetime.now())
    
    q._read_cache = read_cache
    res = list(q)
    
    #set the #1 link so we can ignore it later. expire after TOP_CACHE
    #just in case something happens and that sr doesn't update
    if res:
        cache.set(top_key(sr), res[0]._fullname, TOP_CACHE)

    return res
Ejemplo n.º 10
0
 def get_links(self, sort, time):
     from r2.models import Link
     from r2.lib.db import queries
     q = Link._query(sort = queries.db_sort(sort))
     if time != 'all':
         q._filter(queries.db_times[time])
     return q
Ejemplo n.º 11
0
def get_spam_filtered_links(sr_id):
    """ NOTE: This query will never run unless someone does an "update" on it,
        but that will probably timeout. Use insert_spam_filtered_links."""
    return Link._query(Link.c.sr_id == sr_id,
                       Link.c._spam == True,
                       Link.c.verdict != 'mod-removed',
                       sort = db_sort('new'))
Ejemplo n.º 12
0
    def get_links(self, sort, time):
        from r2.lib.db import queries
        from r2.models import Link
        from r2.controllers.errors import UserRequiredException

        if not c.user_is_loggedin:
            raise UserRequiredException

        friends = self.get_important_friends(c.user._id)

        if not friends:
            return []

        if g.use_query_cache:
            # with the precomputer enabled, this Subreddit only supports
            # being sorted by 'new'. it would be nice to have a
            # cleaner UI than just blatantly ignoring their sort,
            # though
            sort = "new"
            time = "all"

            friends = Account._byID(friends, return_dict=False)

            crs = [queries.get_submitted(friend, sort, time) for friend in friends]
            return queries.MergedCachedResults(crs)

        else:
            q = Link._query(Link.c.author_id == friends, sort=queries.db_sort(sort), data=True)
            if time != "all":
                q._filter(queries.db_times[time])
            return q
Ejemplo n.º 13
0
def import_missing_comments(filename, apply_changes=False):
    """Imports the comments from the supplied YAML"""
    missing_comments = yaml.load(open(filename), Loader=yaml.CLoader)
    global dryrun
    dryrun = not apply_changes

    total_posts = len(missing_comments)
    post_count = 0
    for post in missing_comments:
        if post['author'] != 'Eliezer Yudkowsky':
            # print "Skipping non-EY post (%s): %s" % (post['author'], post['permalink'])
            continue

        ob_permalink = adjust_permalink(post['permalink'])

        # Attempt to retrieve the post that was imported into Less Wrong
        imported_post = list(Link._query(Link.c.ob_permalink == ob_permalink, data=True))
        if len(imported_post) < 1:
            print "Unable to retrieve imported post: %s" % ob_permalink
            continue
        elif len(imported_post) > 1:
            print "Got more than one result for: %s" % ob_permalink
            raise Exception
        else:
            imported_post = imported_post[0]

        post_count += 1
        try:
            print "Importing (%d of %d) comments on: %s" % (post_count, total_posts, imported_post.canonical_url)
        except UnicodeError:
            print "Importing comments on post (%d of %d)"
        process_comments_on_post(imported_post, post['comments'])
Ejemplo n.º 14
0
    def gen_keys():
        yield promoted_memo_key

        # just let this one do its own writing
        load_all_reddits()

        yield queries.get_all_comments().iden

        l_q = Link._query(Link.c._spam == (True, False),
                          Link.c._deleted == (True, False),
                          sort=desc('_date'),
                          data=True,
                          )
        for link in fetch_things2(l_q, verbosity):
            yield comments_key(link._id)
            yield last_modified_key(link, 'comments')

        a_q = Account._query(Account.c._spam == (True, False),
                             sort=desc('_date'),
                             )
        for account in fetch_things2(a_q, verbosity):
            yield messages_key(account._id)
            yield last_modified_key(account, 'overview')
            yield last_modified_key(account, 'commented')
            yield last_modified_key(account, 'submitted')
            yield last_modified_key(account, 'liked')
            yield last_modified_key(account, 'disliked')
            yield queries.get_comments(account, 'new', 'all').iden
            yield queries.get_submitted(account, 'new', 'all').iden
            yield queries.get_liked(account).iden
            yield queries.get_disliked(account).iden
            yield queries.get_hidden(account).iden
            yield queries.get_saved(account).iden
            yield queries.get_inbox_messages(account).iden
            yield queries.get_unread_messages(account).iden
            yield queries.get_inbox_comments(account).iden
            yield queries.get_unread_comments(account).iden
            yield queries.get_inbox_selfreply(account).iden
            yield queries.get_unread_selfreply(account).iden
            yield queries.get_sent(account).iden

        sr_q = Subreddit._query(Subreddit.c._spam == (True, False),
                                sort=desc('_date'),
                                )
        for sr in fetch_things2(sr_q, verbosity):
            yield last_modified_key(sr, 'stylesheet_contents')
            yield queries.get_links(sr, 'hot', 'all').iden
            yield queries.get_links(sr, 'new', 'all').iden

            for sort in 'top', 'controversial':
                for time in 'hour', 'day', 'week', 'month', 'year', 'all':
                    yield queries.get_links(sr, sort, time,
                                            merge_batched=False).iden
            yield queries.get_spam_links(sr).iden
            yield queries.get_spam_comments(sr).iden
            yield queries.get_reported_links(sr).iden
            yield queries.get_reported_comments(sr).iden
            yield queries.get_subreddit_messages(sr).iden
            yield queries.get_unread_subreddit_messages(sr).iden
Ejemplo n.º 15
0
    def gen_keys():
        yield promoted_memo_key

        # just let this one do its own writing
        load_all_reddits()

        yield queries.get_all_comments().iden

        l_q = Link._query(Link.c._spam == (True, False),
                          Link.c._deleted == (True, False),
                          sort=desc('_date'),
                          data=True,
                          )
        for link in fetch_things2(l_q, verbosity):
            yield comments_key(link._id)
            yield last_modified_key(link, 'comments')

        a_q = Account._query(Account.c._spam == (True, False),
                             sort=desc('_date'),
                             )
        for account in fetch_things2(a_q, verbosity):
            yield messages_key(account._id)
            yield last_modified_key(account, 'overview')
            yield last_modified_key(account, 'commented')
            yield last_modified_key(account, 'submitted')
            yield last_modified_key(account, 'liked')
            yield last_modified_key(account, 'disliked')
            yield queries.get_comments(account, 'new', 'all').iden
            yield queries.get_submitted(account, 'new', 'all').iden
            yield queries.get_liked(account).iden
            yield queries.get_disliked(account).iden
            yield queries.get_hidden(account).iden
            yield queries.get_saved(account).iden
            yield queries.get_inbox_messages(account).iden
            yield queries.get_unread_messages(account).iden
            yield queries.get_inbox_comments(account).iden
            yield queries.get_unread_comments(account).iden
            yield queries.get_inbox_selfreply(account).iden
            yield queries.get_unread_selfreply(account).iden
            yield queries.get_sent(account).iden

        sr_q = Subreddit._query(Subreddit.c._spam == (True, False),
                                sort=desc('_date'),
                                )
        for sr in fetch_things2(sr_q, verbosity):
            yield last_modified_key(sr, 'stylesheet_contents')
            yield queries.get_links(sr, 'hot', 'all').iden
            yield queries.get_links(sr, 'new', 'all').iden

            for sort in 'top', 'controversial':
                for time in 'hour', 'day', 'week', 'month', 'year', 'all':
                    yield queries.get_links(sr, sort, time,
                                            merge_batched=False).iden
            yield queries.get_spam_links(sr).iden
            yield queries.get_spam_comments(sr).iden
            yield queries.get_reported_links(sr).iden
            yield queries.get_reported_comments(sr).iden
            yield queries.get_subreddit_messages(sr).iden
            yield queries.get_unread_subreddit_messages(sr).iden
Ejemplo n.º 16
0
 def _query_post(self, *args):
     post = None
     kwargs = {'data': True}
     q = Link._query(*args, **kwargs)
     posts = list(q)
     if posts:
         post = posts[0]
     return post
Ejemplo n.º 17
0
 def _query_post(self, *args):
     post = None
     kwargs = {'data': True}
     q = Link._query(*args, **kwargs)
     posts = list(q)
     if posts:
         post = posts[0]
     return post
Ejemplo n.º 18
0
def get_domain_links(domain, sort, time):
    from r2.lib.db import operators

    q = Link._query(operators.domain(Link.c.url) == filters._force_utf8(domain), sort=db_sort(sort), data=True)
    if time != "all":
        q._filter(db_times[time])

    return make_results(q)
Ejemplo n.º 19
0
def shorten_byurl_keys():
    """We changed by_url keys from a format like
           byurl_google.com...
       to:
           byurl(1d5920f4b44b27a802bd77c4f0536f5a, google.com...)
       so that they would fit in memcache's 251-char limit
    """

    from datetime import datetime
    from hashlib import md5
    from r2.models import Link
    from r2.lib.filters import _force_utf8
    from pylons import g
    from r2.lib.utils import fetch_things2, in_chunks
    from r2.lib.db.operators import desc
    from r2.lib.utils import base_url, progress

    # from link.py
    def old_by_url_key(url):
        prefix = 'byurl_'
        s = _force_utf8(base_url(url.lower()))
        return '%s%s' % (prefix, s)

    def new_by_url_key(url):
        maxlen = 250
        template = 'byurl(%s,%s)'
        keyurl = _force_utf8(base_url(url.lower()))
        hexdigest = md5(keyurl).hexdigest()
        usable_len = maxlen - len(template) - len(hexdigest)
        return template % (hexdigest, keyurl[:usable_len])

    verbosity = 1000

    l_q = Link._query(Link.c._spam == (True, False),
                      data=True,
                      sort=desc('_date'))
    for links in (in_chunks(
            progress(
                fetch_things2(l_q, verbosity),
                key=lambda link: link._date,
                verbosity=verbosity,
                estimate=int(9.9e6),
                persec=True,
            ), verbosity)):
        # only links with actual URLs
        links = filter(
            lambda link:
            (not getattr(link, 'is_self', False) and getattr(link, 'url', '')),
            links)

        # old key -> new key
        translate = dict((old_by_url_key(link.url), new_by_url_key(link.url))
                         for link in links)

        old = g.permacache.get_multi(translate.keys())
        new = dict((translate[old_key], value)
                   for (old_key, value) in old.iteritems())
        g.permacache.set_multi(new)
Ejemplo n.º 20
0
def get_domain_links(domain, sort, time):
    from r2.lib.db import operators
    q = Link._query(operators.domain(Link.c.url) == filters._force_utf8(domain),
                    sort = db_sort(sort),
                    data = True)
    if time != "all":
        q._filter(db_times[time])

    return make_results(q)
Ejemplo n.º 21
0
    def get_links(self, sort, time):
        from r2.lib import promote
        from r2.models import Link
        from r2.lib.db import queries

        q = Link._query(sort=queries.db_sort(sort), read_cache=True, write_cache=True, cache_time=60, data=True)
        if time != "all":
            q._filter(queries.db_times[time])
        return q
Ejemplo n.º 22
0
def get_unmoderated_links(sr_id):
    q = Link._query(Link.c.sr_id == sr_id,
                    Link.c._spam == (True, False),
                    sort = db_sort('new'))

    # Doesn't really work because will not return Links with no verdict
    q._filter(or_(and_(Link.c._spam == True, Link.c.verdict != 'mod-removed'),
                  and_(Link.c._spam == False, Link.c.verdict != 'mod-approved')))
    return q
Ejemplo n.º 23
0
def get_unmoderated_links(sr_id):
    q = Link._query(Link.c.sr_id == sr_id,
                    Link.c._spam == (True, False),
                    sort=db_sort('new'))

    # Doesn't really work because will not return Links with no verdict
    q._filter(
        or_(and_(Link.c._spam == True, Link.c.verdict != 'mod-removed'),
            and_(Link.c._spam == False, Link.c.verdict != 'mod-approved')))
    return q
Ejemplo n.º 24
0
def _get_links(sr_id, sort, time):
    """General link query for a subreddit."""
    q = Link._query(Link.c.sr_id == sr_id, sort=db_sort(sort), data=True)

    if time != 'all':
        q._filter(db_times[time])

    res = make_results(q)

    return res
Ejemplo n.º 25
0
def _get_links(sr_id, sort, time):
    """General link query for a subreddit."""
    q = Link._query(Link.c.sr_id == sr_id, sort=db_sort(sort), data=True)

    if time != "all":
        q._filter(db_times[time])

    res = make_results(q)

    return res
Ejemplo n.º 26
0
def get_links(sr, sort, time, merge_batched=True):
    """General link query for a subreddit."""
    q = Link._query(Link.c.sr_id == sr._id, sort=db_sort(sort))

    if time != 'all':
        q._filter(db_times[time])

    res = make_results(q)

    # see the discussion above batched_time_times
    if (merge_batched and g.use_query_cache and sort in batched_time_sorts
            and time in batched_time_times):

        byday = Link._query(Link.c.sr_id == sr._id, sort=db_sort(sort))
        byday._filter(db_times['day'])

        res = merge_results(res, make_results(byday))

    return res
Ejemplo n.º 27
0
def get_links(sr, sort, time):
    """General link query for a subreddit."""
    q = Link._query(Link.c.sr_id == sr._id, sort=db_sort(sort))

    if sort == "toplinks":
        q._filter(Link.c.top_link == True)

    if time != "all":
        q._filter(db_times[time])
    return make_results(q)
Ejemplo n.º 28
0
 def get_links(self, sort, time):
     from r2.lib import promote
     from r2.models import Link
     from r2.lib.db import queries
     q = Link._query(sort = queries.db_sort(sort),
                     read_cache = True,
                     write_cache = True,
                     cache_time = 60)
     if time != 'all':
         q._filter(queries.db_times[time])
     return q
Ejemplo n.º 29
0
def get_links(sr, sort, time, merge_batched=True):
    """General link query for a subreddit."""
    q = Link._query(Link.c.sr_id == sr._id,
                    sort = db_sort(sort),
                    data = True)

    if time != 'all':
        q._filter(db_times[time])

    res = make_results(q)

    return res
Ejemplo n.º 30
0
    def get_links(self, sort, time):
        from r2.lib.db import queries
        from r2.models import Link
        from r2.controllers.errors import UserRequiredException

        if not c.user_is_loggedin:
            raise UserRequiredException

        q = Link._query(Link.c.author_id == c.user.friends, sort=queries.db_sort(sort))
        if time != "all":
            q._filter(queries.db_times[time])
        return q
Ejemplo n.º 31
0
    def gen_keys():
        yield promoted_memo_key

        # just let this one do its own writing
        load_all_reddits()

        yield queries.get_all_comments().iden

        l_q = Link._query(
            Link.c._spam == (True, False), Link.c._deleted == (True, False), sort=desc("_date"), data=True
        )
        for link in fetch_things2(l_q, verbosity):
            yield comments_key(link._id)
            yield last_modified_key(link, "comments")

        a_q = Account._query(Account.c._spam == (True, False), sort=desc("_date"))
        for account in fetch_things2(a_q, verbosity):
            yield messages_key(account._id)
            yield last_modified_key(account, "overview")
            yield last_modified_key(account, "commented")
            yield last_modified_key(account, "submitted")
            yield last_modified_key(account, "liked")
            yield last_modified_key(account, "disliked")
            yield queries.get_comments(account, "new", "all").iden
            yield queries.get_submitted(account, "new", "all").iden
            yield queries.get_liked(account).iden
            yield queries.get_disliked(account).iden
            yield queries.get_hidden(account).iden
            yield queries.get_saved(account).iden
            yield queries.get_inbox_messages(account).iden
            yield queries.get_unread_messages(account).iden
            yield queries.get_inbox_comments(account).iden
            yield queries.get_unread_comments(account).iden
            yield queries.get_inbox_selfreply(account).iden
            yield queries.get_unread_selfreply(account).iden
            yield queries.get_sent(account).iden

        sr_q = Subreddit._query(Subreddit.c._spam == (True, False), sort=desc("_date"))
        for sr in fetch_things2(sr_q, verbosity):
            yield last_modified_key(sr, "stylesheet_contents")
            yield queries.get_links(sr, "hot", "all").iden
            yield queries.get_links(sr, "new", "all").iden

            for sort in "top", "controversial":
                for time in "hour", "day", "week", "month", "year", "all":
                    yield queries.get_links(sr, sort, time, merge_batched=False).iden
            yield queries.get_spam_links(sr).iden
            yield queries.get_spam_comments(sr).iden
            yield queries.get_reported_links(sr).iden
            yield queries.get_reported_comments(sr).iden
            yield queries.get_subreddit_messages(sr).iden
            yield queries.get_unread_subreddit_messages(sr).iden
Ejemplo n.º 32
0
 def get_links(self, sort, time):
     from r2.lib import promote
     from r2.models import Link
     from r2.lib.db import queries
     q = Link._query(Link.c.sr_id > 0,
                     sort = queries.db_sort(sort),
                     read_cache = True,
                     write_cache = True,
                     cache_time = 60,
                     data = True,
                     filter_primary_sort_only=True)
     if time != 'all':
         q._filter(queries.db_times[time])
     return q
Ejemplo n.º 33
0
 def get_links(self, sort, time):
     from r2.lib import promote
     from r2.models import Link
     from r2.lib.db import queries
     q = Link._query(Link.c.sr_id > 0,
                     sort=queries.db_sort(sort),
                     read_cache=True,
                     write_cache=True,
                     cache_time=60,
                     data=True,
                     filter_primary_sort_only=True)
     if time != 'all':
         q._filter(queries.db_times[time])
     return q
Ejemplo n.º 34
0
def write_all_hot_cache():
    from r2.models.link import Link
    from r2.lib.db import queries

    q = Link._query(
        sort=queries.db_sort('hot'),
        limit=NUM_LINKS,
    )

    top_links = resort_links(list(q))
    link_ids = [link._fullname for link in top_links]

    g.gencache.set(CACHE_KEY, link_ids)

    return link_ids
Ejemplo n.º 35
0
def default_queries():
    from r2.models import Link, Subreddit
    from r2.lib.db.operators import desc
    from copy import deepcopy

    queries = []

    q = Link._query(Link.c.sr_id == Subreddit.user_subreddits(None), sort=desc("_hot"), limit=37)

    queries.append(q)
    # add a higher limit one too
    q = deepcopy(q)
    q._limit = 75
    queries.append(q)

    return queries
Ejemplo n.º 36
0
def port_deleted_links(after_id=None):
    from r2.models import Link
    from r2.lib.db.operators import desc
    from r2.models.query_cache import CachedQueryMutator
    from r2.lib.db.queries import get_deleted_links
    from r2.lib.utils import fetch_things2, in_chunks, progress

    q = Link._query(Link.c._deleted == True, Link.c._spam == (True, False), sort=desc("_date"), data=True)
    q = fetch_things2(q, chunk_size=500)
    q = progress(q, verbosity=1000)

    for chunk in in_chunks(q):
        with CachedQueryMutator() as m:
            for link in chunk:
                query = get_deleted_links(link.author_id)
                m.insert(query, [link])
Ejemplo n.º 37
0
    def import_into_subreddit(self, sr, data, rewrite_map_file):
        posts = list(Link._query())
        for post in posts:
            post._delete_from_db()

        comments = self._query_comments()
        for comment in comments:
            comment._delete_from_db()

        for post_data in data:
            try:
                print post_data['title']
                self.process_post(post_data, sr)
            except Exception, e:
                print 'Unable to create post:\n%s\n%s\n%s' % (type(e), e, post_data)
                raise
Ejemplo n.º 38
0
    def get_links_sr_ids(self, sr_ids, sort, time):
        from r2.lib.db import queries
        from r2.models import Link

        if not sr_ids:
            return []
        else:
            srs = Subreddit._byID(sr_ids, data=True, return_dict=False)

        if g.use_query_cache:
            results = [queries.get_links(sr, sort, time) for sr in srs]
            return queries.merge_results(*results)
        else:
            q = Link._query(Link.c.sr_id == sr_ids, sort=queries.db_sort(sort), data=True)
            if time != "all":
                q._filter(queries.db_times[time])
            return q
Ejemplo n.º 39
0
    def import_into_subreddit(self, sr, data, rewrite_map_file):
        posts = list(Link._query())
        for post in posts:
            post._delete_from_db()

        comments = self._query_comments()
        for comment in comments:
            comment._delete_from_db()

        for post_data in data:
            try:
                print post_data['title']
                self.process_post(post_data, sr)
            except Exception, e:
                print 'Unable to create post:\n%s\n%s\n%s' % (type(e), e,
                                                              post_data)
                raise
Ejemplo n.º 40
0
def default_queries():
    from r2.models import Link, Subreddit
    from r2.lib.db.operators import desc
    from copy import deepcopy
    queries = []

    q = Link._query(Link.c.sr_id == Subreddit.user_subreddits(None),
                    sort=desc('_hot'),
                    limit=37)

    queries.append(q)
    #add a higher limit one too
    q = deepcopy(q)
    q._limit = 75
    queries.append(q)

    return queries
Ejemplo n.º 41
0
    def get_links_sr_ids(self, sr_ids, sort, time):
        from r2.lib.db import queries
        from r2.models import Link

        if not sr_ids:
            return []
        else:
            srs = Subreddit._byID(sr_ids, return_dict=False)

        if g.use_query_cache:
            results = [queries.get_links(sr, sort, time) for sr in srs]
            return queries.merge_results(*results)
        else:
            q = Link._query(Link.c.sr_id == sr_ids, sort=queries.db_sort(sort))
            if time != 'all':
                q._filter(queries.db_times[time])
            return q
Ejemplo n.º 42
0
def add_byurl_prefix():
    """Run one before the byurl prefix is set, and once after (killing
       it after it gets when it started the first time"""

    from datetime import datetime
    from r2.models import Link
    from r2.lib.filters import _force_utf8
    from pylons import g
    from r2.lib.utils import fetch_things2
    from r2.lib.db.operators import desc
    from r2.lib.utils import base_url

    now = datetime.now(g.tz)
    print 'started at %s' % (now,)

    l_q = Link._query(
        Link.c._date < now,
        data=True,
        sort=desc('_date'))

    # from link.py
    def by_url_key(url, prefix=''):
        s = _force_utf8(base_url(url.lower()))
        return '%s%s' % (prefix, s)

    done = 0
    for links in fetch_things2(l_q, 1000, chunks=True):
        done += len(links)
        print 'Doing: %r, %s..%s' % (done, links[-1]._date, links[0]._date)

        # only links with actual URLs
        links = filter(lambda link: (not getattr(link, 'is_self', False)
                                     and getattr(link, 'url', '')),
                       links)

        # old key -> new key
        translate = dict((by_url_key(link.url),
                          by_url_key(link.url, prefix='byurl_'))
                         for link in links)

        old = g.permacache.get_multi(translate.keys())
        new = dict((translate[old_key], value)
                   for (old_key, value)
                   in old.iteritems())
        g.permacache.set_multi(new)
Ejemplo n.º 43
0
def port_deleted_links(after_id=None):
    from r2.models import Link
    from r2.lib.db.operators import desc
    from r2.models.query_cache import CachedQueryMutator
    from r2.lib.db.queries import get_deleted_links
    from r2.lib.utils import fetch_things2, in_chunks, progress

    q = Link._query(Link.c._deleted == True,
                    Link.c._spam == (True, False),
                    sort=desc('_date'), data=True)
    q = fetch_things2(q, chunk_size=500)
    q = progress(q, verbosity=1000)

    for chunk in in_chunks(q):
        with CachedQueryMutator() as m:
            for link in chunk:
                query = get_deleted_links(link.author_id)
                m.insert(query, [link])
Ejemplo n.º 44
0
def _promoted_link_query(user_id, status):
    STATUS_CODES = {
        "unpaid": PROMOTE_STATUS.unpaid,
        "unapproved": PROMOTE_STATUS.unseen,
        "rejected": PROMOTE_STATUS.rejected,
        "live": PROMOTE_STATUS.promoted,
        "accepted": (PROMOTE_STATUS.accepted, PROMOTE_STATUS.pending, PROMOTE_STATUS.finished),
    }

    q = Link._query(
        Link.c.sr_id == get_promote_srid(),
        Link.c._spam == (True, False),
        Link.c._deleted == (True, False),
        Link.c.promote_status == STATUS_CODES[status],
        sort=db_sort("new"),
    )
    if user_id:
        q._filter(Link.c.author_id == user_id)
    return q
def fix_images(dryrun=True):
    from r2.models import Link, Comment

    links = Link._query(Link.c.ob_permalink != None, data=True)
    for link in links:
        ob_url = link.ob_permalink.strip()
        print "Processing %s" % ob_url

        new_content = process_content(link.article)
        if not dryrun:
            link.article = new_content
            link._commit()

        comments = Comment._query(Comment.c.link_id == link._id, data=True)
        for comment in comments:
            new_content = process_content(comment.body)
            if not dryrun:
                comment.body = new_content
                comment._commit()
def fix_images(dryrun=True):
    from r2.models import Link, Comment

    links = Link._query(Link.c.ob_permalink != None, data = True)
    for link in links:
        ob_url = link.ob_permalink.strip()
        print "Processing %s" % ob_url

        new_content = process_content(link.article)
        if not dryrun:
            link.article = new_content
            link._commit()

        comments = Comment._query(Comment.c.link_id == link._id, data = True)
        for comment in comments:
            new_content = process_content(comment.body)
            if not dryrun:
                comment.body = new_content
                comment._commit()
Ejemplo n.º 47
0
def get_all_query(sort, time):
    """ Return a Query for r/all links sorted by anything other than Hot, which
    has special treatment."""
    from r2.models import Link
    from r2.lib.db import queries

    q = Link._query(
        sort=queries.db_sort(sort),
        read_cache=True,
        write_cache=True,
        cache_time=60,
        data=True,
        filter_primary_sort_only=True,
    )

    if time != 'all':
        q._filter(queries.db_times[time])

    return q
Ejemplo n.º 48
0
def _get_links(sr_id, sort, time,no_children=False):
    """General link query for a subsciteit."""
    #Get the children if there are any...
    from r2.lib.normalized_hot import expand_children
    #Are we building a lot of them?
    if not no_children:
        srs = expand_children(sr_id,byID=True)
	results = [_get_links(sr_id,sort,time,no_children=True) for sr_id in srs]
	return merge_results(*results)
    q = Link._query(Link.c.sr_id == sr_id,
                    sort = db_sort(sort),
                    data = True)

    if time != 'all':
        q._filter(db_times[time])

    res = make_results(q)

    return res
Ejemplo n.º 49
0
def port_cassaurls(after_id=None, estimate=15231317):
    from r2.models import Link, LinksByUrl
    from r2.lib.db import tdb_cassandra
    from r2.lib.db.operators import desc
    from r2.lib.db.tdb_cassandra import CL
    from r2.lib.utils import fetch_things2, in_chunks, progress

    q = Link._query(Link.c._spam == (True, False), sort=desc("_date"), data=True)
    if after_id:
        q._after(Link._byID(after_id, data=True))
    q = fetch_things2(q, chunk_size=500)
    q = progress(q, estimate=estimate)
    q = (l for l in q if getattr(l, "url", "self") != "self" and not getattr(l, "is_self", False))
    chunks = in_chunks(q, 500)

    for chunk in chunks:
        with LinksByUrl._cf.batch(write_consistency_level=CL.ONE) as b:
            for l in chunk:
                k = LinksByUrl._key_from_url(l.url)
                if k:
                    b.insert(k, {l._id36: l._id36})
Ejemplo n.º 50
0
    def spam_account_links(self, account, query_limit=10000, spam_limit=500):
        from r2.lib.db.operators import asc, desc, timeago

        q = Link._query(Link.c.author_id == account._id,
            Link.c._spam == False,
            sort=desc('_date'),
            data=False)
        q._limit = query_limit
        things = list(q)

        processed = 0
        for item in things:
            if processed < spam_limit:
                verdict = getattr(item, "verdict", None)
                if not verdict or not verdict.endswith("-approved"):
                    processed += 1
                    admintools.spam(item,
                        auto=False,
                        moderator_banned=False,
                        banner=None,
                        train_spam=True)
Ejemplo n.º 51
0
def port_cassaurls(after_id=None, estimate=15231317):
    from r2.models import Link, LinksByUrlAndSubreddit
    from r2.lib.db import tdb_cassandra
    from r2.lib.db.operators import desc
    from r2.lib.db.tdb_cassandra import CL
    from r2.lib.utils import fetch_things2, in_chunks, progress

    q = Link._query(Link.c._spam == (True, False),
                    sort=desc('_date'),
                    data=True)
    if after_id:
        q._after(Link._byID(after_id, data=True))
    q = fetch_things2(q, chunk_size=500)
    q = progress(q, estimate=estimate)
    q = (l for l in q if getattr(l, 'url', 'self') != 'self'
         and not getattr(l, 'is_self', False))
    chunks = in_chunks(q, 500)

    for chunk in chunks:
        for l in chunk:
            LinksByUrlAndSubreddit.add_link(l)
Ejemplo n.º 52
0
    def _post_process(self, rewrite_map_file):
        def unicode_safe(text):
            if isinstance(text, unicode):
                return text.encode('utf-8')
            else:
                return text

        posts = list(Link._query(Link.c.ob_permalink != None, data = True))

        # Generate a mapping between ob permalinks and imported posts
        self.post_mapping = {}
        for post in posts:
            self.post_mapping[post.ob_permalink] = post

        # Write out the rewrite map
        for old_url, post in self.post_mapping.iteritems():
            ob_url = urlparse.urlparse(old_url)
            new_url = post.canonical_url
            try:
                rewrite_map_file.write("%s %s\n" % (unicode_safe(ob_url.path), unicode_safe(new_url)))
            except UnicodeEncodeError, uee:
                print "Unable to write to rewrite map file:"
                print unicode_safe(ob_url.path)
                print unicode_safe(new_url)
Ejemplo n.º 53
0
def port_cassaurls(after_id=None, estimate=15231317):
    from r2.models import Link, LinksByUrl
    from r2.lib.db import tdb_cassandra
    from r2.lib.db.operators import desc
    from r2.lib.db.tdb_cassandra import CL
    from r2.lib.utils import fetch_things2, in_chunks, progress

    q = Link._query(Link.c._spam == (True, False),
                    sort=desc('_date'),
                    data=True)
    if after_id:
        q._after(Link._byID(after_id, data=True))
    q = fetch_things2(q, chunk_size=500)
    q = progress(q, estimate=estimate)
    q = (l for l in q if getattr(l, 'url', 'self') != 'self'
         and not getattr(l, 'is_self', False))
    chunks = in_chunks(q, 500)

    for chunk in chunks:
        with LinksByUrl._cf.batch(write_consistency_level=CL.ONE) as b:
            for l in chunk:
                k = LinksByUrl._key_from_url(l.url)
                if k:
                    b.insert(k, {l._id36: l._id36})
Ejemplo n.º 54
0
from r2.lib.db.operators import desc
from r2.lib.utils import fetch_things2
from r2.models import (
    calculate_server_seconds,
    Comment,
    Link,
    Subreddit,
)

LINK_GILDING_START = datetime(2014, 2, 1, 0, 0, tzinfo=g.tz)
COMMENT_GILDING_START = datetime(2012, 10, 1, 0, 0, tzinfo=g.tz)

queries = [
    Link._query(
        Link.c.gildings != 0,
        Link.c._date > LINK_GILDING_START,
        data=True,
        sort=desc('_date'),
    ),
    Comment._query(
        Comment.c.gildings != 0,
        Comment.c._date > COMMENT_GILDING_START,
        data=True,
        sort=desc('_date'),
    ),
]

seconds_by_srid = defaultdict(int)
gilding_price = g.gold_month_price.pennies

for q in queries:
    for things in fetch_things2(q, chunks=True, chunk_size=100):
Ejemplo n.º 55
0
def get_reported_links(sr):
    q_l = Link._query(Link.c.reported != 0,
                      Link.c.sr_id == sr._id,
                      Link.c._spam == False,
                      sort=db_sort('new'))
    return make_results(q_l)
Ejemplo n.º 56
0
def get_spam_links(sr):
    q_l = Link._query(Link.c.sr_id == sr._id,
                      Link.c._spam == True,
                      sort=db_sort('new'))
    return make_results(q_l)