Python progress 예제들, r2.lib.utils.progress Python 예제들

예제 #1

1

파일 보기

파일: mr_permacache.py 프로젝트: new-day-international/reddit

def write_permacache_from_dir(dirname):
    # we want the whole list so that we can display accurate progress
    # information. If we're operating on more than tens of millions of
    # files, we should either bail out or tweak this to not need the
    # whole list at once
    allfiles = []
    for root, dirs, files in os.walk(dirname):
        for f in files:
            allfiles.append(os.path.join(root, f))

    for fname in progress(allfiles, persec=True):
        try:
            write_permacache_from_file(fname)
            os.unlink(fname)
        except:
            mr_tools.status("failed on %r" % fname)
            raise

    mr_tools.status("Removing empty directories")
    for root, dirs, files in os.walk(dirname, topdown=False):
        for d in dirs:
            dname = os.path.join(root, d)
            try:
                os.rmdir(dname)
            except OSError as e:
                if e.errno == errno.ENOTEMPTY:
                    mr_tools.status("%s not empty" % (dname,))
                else:
                    raise

예제 #2

0

파일 보기

파일: cloudsearch.py 프로젝트: ProfNandaa/reddit

def rebuild_index(start_at=None, sleeptime=1, cls=Link, estimate=50000000,
                  chunk_size=1000):
    if start_at is _REBUILD_INDEX_CACHE_KEY:
        start_at = g.cache.get(start_at)
        if not start_at:
            raise ValueError("Told me to use '%s' key, but it's not set" %
                             _REBUILD_INDEX_CACHE_KEY)
    
    q = cls._query(cls.c._deleted == (True, False),
                   sort=desc('_date'), data=True)
    if start_at:
        after = cls._by_fullname(start_at)
        assert isinstance(after, cls)
        q._after(after)
    q = r2utils.fetch_things2(q, chunk_size=chunk_size)
    q = r2utils.progress(q, verbosity=1000, estimate=estimate, persec=True,
                         key=_progress_key)
    for chunk in r2utils.in_chunks(q, size=chunk_size):
        for x in range(5):
            try:
                inject(chunk)
            except httplib.HTTPException as err:
                print "Got  %s, sleeping %s secs" % (err, x)
                time.sleep(x)
                continue
            else:
                break
        else:
            raise err
        last_update = chunk[-1]
        g.cache.set(_REBUILD_INDEX_CACHE_KEY, last_update._fullname)
        time.sleep(sleeptime)

예제 #3

0

파일 보기

파일: cloudsearch.py 프로젝트: KeyserSosa/reddit

def rebuild_link_index(start_at=None, sleeptime=1, cls=Link,
                       uploader=LinkUploader, doc_api='CLOUDSEARCH_DOC_API',
                       estimate=50000000, chunk_size=1000):
    doc_api = getattr(g, doc_api)
    uploader = uploader(doc_api)

    q = cls._query(cls.c._deleted == (True, False), sort=desc('_date'))

    if start_at:
        after = cls._by_fullname(start_at)
        assert isinstance(after, cls)
        q._after(after)

    q = r2utils.fetch_things2(q, chunk_size=chunk_size)
    q = r2utils.progress(q, verbosity=1000, estimate=estimate, persec=True,
                         key=_progress_key)
    for chunk in r2utils.in_chunks(q, size=chunk_size):
        uploader.things = chunk
        for x in range(5):
            try:
                uploader.inject()
            except httplib.HTTPException as err:
                print "Got %s, sleeping %s secs" % (err, x)
                time.sleep(x)
                continue
            else:
                break
        else:
            raise err
        last_update = chunk[-1]
        print "last updated %s" % last_update._fullname
        time.sleep(sleeptime)

예제 #4

0

파일 보기

파일: migrate.py 프로젝트: MatsT/reddit

def port_cassavotes():
    from r2.models import Vote, Account, Link, Comment
    from r2.models.vote import CassandraVote, CassandraLinkVote, CassandraCommentVote
    from r2.lib.db.tdb_cassandra import CL
    from r2.lib.utils import fetch_things2, to36, progress

    ts = [(Vote.rel(Account, Link), CassandraLinkVote),
          (Vote.rel(Account, Comment), CassandraCommentVote)]

    dataattrs = set(['valid_user', 'valid_thing', 'ip', 'organic'])

    for prel, crel in ts:
        vq = prel._query(sort=desc('_date'),
                         data=True,
                         eager_load=False)
        vq = fetch_things2(vq)
        vq = progress(vq, persec=True)
        for v in vq:
            t1 = to36(v._thing1_id)
            t2 = to36(v._thing2_id)
            cv = crel(thing1_id = t1,
                      thing2_id = t2,
                      date=v._date,
                      name=v._name)
            for dkey, dval in v._t.iteritems():
                if dkey in dataattrs:
                    setattr(cv, dkey, dval)

            cv._commit(write_consistency_level=CL.ONE)

예제 #5

0

파일 보기

파일: trophies.py 프로젝트: 13steinj/reddit-plugin-f2p

def get_participated():
    users = {}

    q = Account._query(Account.c.f2p != "", sort=asc("_date"), data=True)
    for user in progress(fetch_things2(q)):
        users[user._fullname] = user.f2p

    return users

예제 #6

0

파일 보기

파일: mr_permacache.py 프로젝트: JediWatchman/reddit

def write_permacache_from_dir(dirname):
    for fname in progress(os.listdir(dirname), persec=True):
        try:
            fpath = os.path.join(dirname, fname)
            write_permacache_from_file(fpath)
            os.unlink(fpath)
        except:
            mr_tools.status('failed on %r' % fname)
            raise

예제 #7

0

파일 보기

파일: migrate.py 프로젝트: ketralnis/reddit

def port_cassahides():
    from r2.models import SaveHide, CassandraHide
    from r2.lib.db.tdb_cassandra import CL
    from r2.lib.db.operators import desc
    from r2.lib.utils import fetch_things2, timeago, progress

    q = SaveHide._query(SaveHide.c._date > timeago("1 week"), SaveHide.c._name == "hide", sort=desc("_date"))
    q = fetch_things2(q)
    q = progress(q, estimate=1953374)

    for sh in q:
        CassandraHide._hide(sh._thing1, sh._thing2, write_consistency_level=CL.ONE)

예제 #8

0

파일 보기

파일: indextankupdate.py 프로젝트: codetripping/reddit

def rebuild_index(after_id = None):
    cls = Link

    # don't pull spam/deleted
    q = cls._query(sort=desc('_date'), data=True)

    if after_id:
        q._after(cls._byID(after_id))

    q = fetch_things2(q)

    q = progress(q, verbosity=1000, estimate=10000000, persec=True)
    for chunk in in_chunks(q):
        inject(chunk)

예제 #9

0

파일 보기

파일: migrate.py 프로젝트: ketralnis/reddit

def port_cassasaves(after_id=None, estimate=12489897):
    from r2.models import SaveHide, CassandraSave
    from r2.lib.db.operators import desc
    from r2.lib.db.tdb_cassandra import CL
    from r2.lib.utils import fetch_things2, to36, progress

    q = SaveHide._query(SaveHide.c._name == "save", sort=desc("_date"), data=False, eager_load=False)

    if after_id is not None:
        q._after(SaveHide._byID(after_id))

    for sh in progress(fetch_things2(q), estimate=estimate):

        csh = CassandraSave(thing1_id=to36(sh._thing1_id), thing2_id=to36(sh._thing2_id), date=sh._date)
        csh._commit(write_consistency_level=CL.ONE)

예제 #10

0

파일 보기

파일: trophies.py 프로젝트: 13steinj/reddit-plugin-f2p

def give_trophies(users):
    for fullnames in in_chunks(progress(users, verbosity=50), size=50):
        users = Account._by_fullname(fullnames, return_dict=False)

        for user in users:
            team = get_user_team(user)

            trophy = Award.give_if_needed(
                codename="f2p_orangered" if team == "red" else "f2p_periwinkle",
                user=user,
            )
            if trophy:
                trophy._commit()

        sleep(.5)

예제 #11

0

파일 보기

파일: migrate.py 프로젝트: Shilohtd/reddit

def port_deleted_links(after_id=None):
    from r2.models import Link
    from r2.lib.db.operators import desc
    from r2.models.query_cache import CachedQueryMutator
    from r2.lib.db.queries import get_deleted_links
    from r2.lib.utils import fetch_things2, in_chunks, progress

    q = Link._query(Link.c._deleted == True, Link.c._spam == (True, False), sort=desc("_date"), data=True)
    q = fetch_things2(q, chunk_size=500)
    q = progress(q, verbosity=1000)

    for chunk in in_chunks(q):
        with CachedQueryMutator() as m:
            for link in chunk:
                query = get_deleted_links(link.author_id)
                m.insert(query, [link])

예제 #12

0

파일 보기

파일: scrub_deleted_users.py 프로젝트: pra85/reddit

def backfill_deleted_accounts(resume_id=None):
    del_accts = Account._query(Account.c._deleted == True, sort=desc('_date'))
    if resume_id:
        del_accts._filter(Account.c._id < resume_id)

    for i, account in enumerate(progress(fetch_things2(del_accts))):
        # Don't kill the rabbit! Wait for the relevant queues to calm down.
        if i % 1000 == 0:
            del_len = get_queue_length('del_account_q')
            cs_len = get_queue_length('cloudsearch_changes')
            while (del_len > 1000 or
                    cs_len > 10000):
                sys.stderr.write(("CS: %d, DEL: %d" % (cs_len, del_len)) + "\n")
                sys.stderr.flush()
                time.sleep(1)
                del_len = get_queue_length('del_account_q')
                cs_len = get_queue_length('cloudsearch_changes')
        amqp.add_item('account_deleted', account._fullname)

예제 #13

0

파일 보기

파일: indextank.py 프로젝트: JediWatchman/reddit

def rebuild_index(after_id = None, estimate=10000000):
    cls = Link

    # don't pull spam/deleted
    q = cls._query(sort=desc('_date'), data=True)

    if after_id:
        q._after(cls._byID(after_id))

    q = fetch_things2(q)

    def key(link):
        # we're going back in time, so this will give us a good idea
        # of how far we've gone
        return "%s/%s" % (link._id, link._date)

    q = progress(q, verbosity=1000, estimate=estimate, persec=True, key=key)
    for chunk in in_chunks(q):
        inject(chunk)

예제 #14

0

파일 보기

파일: comment_tree.py 프로젝트: druska/reddit

def _populate(after_id=None, estimate=54301242):
    from r2.models import desc
    from r2.lib.db import tdb_cassandra
    from r2.lib import utils

    # larger has a chance to decrease the number of Cassandra writes,
    # but the probability is low
    chunk_size = 5000

    q = Comment._query(Comment.c._spam == (True, False), Comment.c._deleted == (True, False), sort=desc("_date"))

    if after_id is not None:
        q._after(Comment._byID(after_id))

    q = utils.fetch_things2(q, chunk_size=chunk_size)
    q = utils.progress(q, verbosity=chunk_size, estimate=estimate)

    for chunk in utils.in_chunks(q, chunk_size):
        chunk = filter(lambda x: hasattr(x, "link_id"), chunk)
        update_comment_votes(chunk)

예제 #15

0

파일 보기

파일: cloudsearch.py 프로젝트: jzplusplus/reddit

def rebuild_link_index(
    start_at=None,
    sleeptime=1,
    cls=Link,
    uploader=LinkUploader,
    doc_api="CLOUDSEARCH_DOC_API",
    estimate=50000000,
    chunk_size=1000,
):
    cache_key = _REBUILD_INDEX_CACHE_KEY % uploader.__name__.lower()
    doc_api = getattr(g, doc_api)
    uploader = uploader(doc_api)

    if start_at is _REBUILD_INDEX_CACHE_KEY:
        start_at = g.cache.get(cache_key)
        if not start_at:
            raise ValueError("Told me to use '%s' key, but it's not set" % cache_key)

    q = cls._query(cls.c._deleted == (True, False), sort=desc("_date"), data=True)
    if start_at:
        after = cls._by_fullname(start_at)
        assert isinstance(after, cls)
        q._after(after)
    q = r2utils.fetch_things2(q, chunk_size=chunk_size)
    q = r2utils.progress(q, verbosity=1000, estimate=estimate, persec=True, key=_progress_key)
    for chunk in r2utils.in_chunks(q, size=chunk_size):
        uploader.things = chunk
        for x in range(5):
            try:
                uploader.inject()
            except httplib.HTTPException as err:
                print "Got %s, sleeping %s secs" % (err, x)
                time.sleep(x)
                continue
            else:
                break
        else:
            raise err
        last_update = chunk[-1]
        g.cache.set(cache_key, last_update._fullname)
        time.sleep(sleeptime)

예제 #16

0

파일 보기

파일: migrate.py 프로젝트: AHAMED750/reddit

def port_cassaurls(after_id=None, estimate=15231317):
    from r2.models import Link, LinksByUrlAndSubreddit
    from r2.lib.db import tdb_cassandra
    from r2.lib.db.operators import desc
    from r2.lib.db.tdb_cassandra import CL
    from r2.lib.utils import fetch_things2, in_chunks, progress

    q = Link._query(Link.c._spam == (True, False),
                    sort=desc('_date'), data=True)
    if after_id:
        q._after(Link._byID(after_id,data=True))
    q = fetch_things2(q, chunk_size=500)
    q = progress(q, estimate=estimate)
    q = (l for l in q
         if getattr(l, 'url', 'self') != 'self'
         and not getattr(l, 'is_self', False))
    chunks = in_chunks(q, 500)

    for chunk in chunks:
        for l in chunk:
            LinksByUrlAndSubreddit.add_link(l)

예제 #17

0

파일 보기

파일: migrate.py 프로젝트: Shilohtd/reddit

def port_cassaurls(after_id=None, estimate=15231317):
    from r2.models import Link, LinksByUrl
    from r2.lib.db import tdb_cassandra
    from r2.lib.db.operators import desc
    from r2.lib.db.tdb_cassandra import CL
    from r2.lib.utils import fetch_things2, in_chunks, progress

    q = Link._query(Link.c._spam == (True, False), sort=desc("_date"), data=True)
    if after_id:
        q._after(Link._byID(after_id, data=True))
    q = fetch_things2(q, chunk_size=500)
    q = progress(q, estimate=estimate)
    q = (l for l in q if getattr(l, "url", "self") != "self" and not getattr(l, "is_self", False))
    chunks = in_chunks(q, 500)

    for chunk in chunks:
        with LinksByUrl._cf.batch(write_consistency_level=CL.ONE) as b:
            for l in chunk:
                k = LinksByUrl._key_from_url(l.url)
                if k:
                    b.insert(k, {l._id36: l._id36})

예제 #18

0

파일 보기

파일: cloudsearch.py 프로젝트: mewald55/Blockpath

def rebuild_link_index(start_at=None,
                       sleeptime=1,
                       cls=Link,
                       uploader=LinkUploader,
                       doc_api='CLOUDSEARCH_DOC_API',
                       estimate=50000000,
                       chunk_size=1000):
    doc_api = getattr(g, doc_api)
    uploader = uploader(doc_api)

    q = cls._query(cls.c._deleted == (True, False), sort=desc('_date'))

    if start_at:
        after = cls._by_fullname(start_at)
        assert isinstance(after, cls)
        q._after(after)

    q = r2utils.fetch_things2(q, chunk_size=chunk_size)
    q = r2utils.progress(q,
                         verbosity=1000,
                         estimate=estimate,
                         persec=True,
                         key=_progress_key)
    for chunk in r2utils.in_chunks(q, size=chunk_size):
        uploader.things = chunk
        for x in range(5):
            try:
                uploader.inject()
            except httplib.HTTPException as err:
                print "Got %s, sleeping %s secs" % (err, x)
                time.sleep(x)
                continue
            else:
                break
        else:
            raise err
        last_update = chunk[-1]
        print "last updated %s" % last_update._fullname
        time.sleep(sleeptime)

예제 #19

0

파일 보기

파일: comment_tree.py 프로젝트: zeantsoi/reddit

def _populate(after_id=None, estimate=54301242):
    from r2.models import desc
    from r2.lib.db import tdb_cassandra
    from r2.lib import utils

    # larger has a chance to decrease the number of Cassandra writes,
    # but the probability is low
    chunk_size = 5000

    q = Comment._query(Comment.c._spam == (True, False),
                       Comment.c._deleted == (True, False),
                       sort=desc('_date'))

    if after_id is not None:
        q._after(Comment._byID(after_id))

    q = utils.fetch_things2(q, chunk_size=chunk_size)
    q = utils.progress(q, verbosity=chunk_size, estimate=estimate)

    for chunk in utils.in_chunks(q, chunk_size):
        chunk = filter(lambda x: hasattr(x, 'link_id'), chunk)
        add_comments(chunk)

예제 #20

0

파일 보기

def rebuild_index(start_at=None,
                  sleeptime=1,
                  cls=Link,
                  estimate=50000000,
                  chunk_size=1000):
    if start_at is _REBUILD_INDEX_CACHE_KEY:
        start_at = g.cache.get(start_at)
        if not start_at:
            raise ValueError("Told me to use '%s' key, but it's not set" %
                             _REBUILD_INDEX_CACHE_KEY)

    q = cls._query(cls.c._deleted == (True, False),
                   sort=desc('_date'),
                   data=True)
    if start_at:
        after = cls._by_fullname(start_at)
        assert isinstance(after, cls)
        q._after(after)
    q = r2utils.fetch_things2(q, chunk_size=chunk_size)
    q = r2utils.progress(q,
                         verbosity=1000,
                         estimate=estimate,
                         persec=True,
                         key=_progress_key)
    for chunk in r2utils.in_chunks(q, size=chunk_size):
        for x in range(5):
            try:
                inject(chunk)
            except httplib.HTTPException as err:
                print "Got  %s, sleeping %s secs" % (err, x)
                time.sleep(x)
                continue
            else:
                break
        else:
            raise err
        last_update = chunk[-1]
        g.cache.set(_REBUILD_INDEX_CACHE_KEY, last_update._fullname)
        time.sleep(sleeptime)

예제 #21

0

파일 보기

파일: cloudsearch.py 프로젝트: yangzxstar/reddit

def rebuild_link_index(start_at=None, sleeptime=1, cls=Link,
                       uploader=LinkUploader, doc_api='CLOUDSEARCH_DOC_API',
                       estimate=50000000, chunk_size=1000):
    cache_key = _REBUILD_INDEX_CACHE_KEY % uploader.__name__.lower()
    doc_api = getattr(g, doc_api)
    uploader = uploader(doc_api)

    if start_at is _REBUILD_INDEX_CACHE_KEY:
        start_at = g.cache.get(cache_key)
        if not start_at:
            raise ValueError("Told me to use '%s' key, but it's not set" %
                             cache_key)

    q = cls._query(cls.c._deleted == (True, False),
                   sort=desc('_date'), data=True)
    if start_at:
        after = cls._by_fullname(start_at)
        assert isinstance(after, cls)
        q._after(after)
    q = r2utils.fetch_things2(q, chunk_size=chunk_size)
    q = r2utils.progress(q, verbosity=1000, estimate=estimate, persec=True,
                         key=_progress_key)
    for chunk in r2utils.in_chunks(q, size=chunk_size):
        uploader.things = chunk
        for x in range(5):
            try:
                uploader.inject()
            except httplib.HTTPException as err:
                print "Got %s, sleeping %s secs" % (err, x)
                time.sleep(x)
                continue
            else:
                break
        else:
            raise err
        last_update = chunk[-1]
        g.cache.set(cache_key, last_update._fullname)
        time.sleep(sleeptime)

예제 #22

0

파일 보기

def port_cassavotes():
    from r2.models import Vote, Account, Link, Comment
    from r2.models.vote import CassandraVote, CassandraLinkVote, CassandraCommentVote
    from r2.lib.db.tdb_cassandra import CL
    from r2.lib.utils import fetch_things2, to36, progress

    ts = [(Vote.rel(Account, Link), CassandraLinkVote),
          (Vote.rel(Account, Comment), CassandraCommentVote)]

    dataattrs = set(['valid_user', 'valid_thing', 'ip', 'organic'])

    for prel, crel in ts:
        vq = prel._query(sort=desc('_date'), data=True, eager_load=False)
        vq = fetch_things2(vq)
        vq = progress(vq, persec=True)
        for v in vq:
            t1 = to36(v._thing1_id)
            t2 = to36(v._thing2_id)
            cv = crel(thing1_id=t1, thing2_id=t2, date=v._date, name=v._name)
            for dkey, dval in v._t.iteritems():
                if dkey in dataattrs:
                    setattr(cv, dkey, dval)

            cv._commit(write_consistency_level=CL.ONE)

예제 #23

0

파일 보기

파일: migrate.py 프로젝트: wqx081/reddit

def port_cassaurls(after_id=None, estimate=15231317):
    from r2.models import Link, LinksByUrl
    from r2.lib.db import tdb_cassandra
    from r2.lib.db.operators import desc
    from r2.lib.db.tdb_cassandra import CL
    from r2.lib.utils import fetch_things2, in_chunks, progress

    q = Link._query(Link.c._spam == (True, False),
                    sort=desc('_date'),
                    data=True)
    if after_id:
        q._after(Link._byID(after_id, data=True))
    q = fetch_things2(q, chunk_size=500)
    q = progress(q, estimate=estimate)
    q = (l for l in q if getattr(l, 'url', 'self') != 'self'
         and not getattr(l, 'is_self', False))
    chunks = in_chunks(q, 500)

    for chunk in chunks:
        with LinksByUrl._cf.batch(write_consistency_level=CL.ONE) as b:
            for l in chunk:
                k = LinksByUrl._key_from_url(l.url)
                if k:
                    b.insert(k, {l._id36: l._id36})

예제 #24

0

파일 보기

파일: msgtime_to_inbox_count.py 프로젝트: yangzxstar/reddit

    # don't show user their own unread stuff
    if msg.author_id == account._id:
        return False

    return True


resume_id = long(sys.argv[1]) if len(sys.argv) > 1 else None

msg_accounts = Account._query(sort=desc("_date"), data=True)

if resume_id:
    msg_accounts._filter(Account.c._id < resume_id)

for account in progress(fetch_things2(msg_accounts), estimate=resume_id):
    current_inbox_count = account.inbox_count
    unread_messages = list(queries.get_unread_inbox(account))

    if account._id % 100000 == 0:
        g.reset_caches()

    if not len(unread_messages):
        if current_inbox_count:
            account._incr('inbox_count', -current_inbox_count)
    else:
        msgs = Message._by_fullname(
            unread_messages,
            data=True,
            return_dict=False,
            ignore_missing=True,

예제 #25

0

파일 보기

#
# The Original Developer is the Initial Developer.  The Initial Developer of
# the Original Code is reddit Inc.
#
# All portions of the code written by reddit are Copyright (c) 2006-2015 reddit
# Inc. All Rights Reserved.
###############################################################################
"""Ensure modmsgtime is properly set on all accounts.

See the comment in Account.is_moderator_somewhere for possible values of this
attribute now.

"""

from r2.lib.db.operators import desc
from r2.lib.utils import fetch_things2, progress
from r2.models import Account, Subreddit

all_accounts = Account._query(sort=desc("_date"))
for account in progress(fetch_things2(all_accounts)):
    is_moderator_somewhere = bool(Subreddit.reverse_moderator_ids(account))
    if is_moderator_somewhere:
        if not account.modmsgtime:
            account.modmsgtime = False
        else:
            # the account already has a date for modmsgtime meaning unread mail
            pass
    else:
        account.modmsgtime = None
    account._commit()

예제 #26

0

파일 보기

파일: msgtime_to_inbox_count.py 프로젝트: 0xcd03/reddit

        return False

    # don't show user their own unread stuff
    if msg.author_id == account._id:
        return False

    return True

resume_id = long(sys.argv[1]) if len(sys.argv) > 1 else None

msg_accounts = Account._query(sort=desc("_date"), data=True)

if resume_id:
    msg_accounts._filter(Account.c._id < resume_id)

for account in progress(fetch_things2(msg_accounts), estimate=resume_id):
    current_inbox_count = account.inbox_count
    unread_messages = list(queries.get_unread_inbox(account))

    if account._id % 100000 == 0:
        g.reset_caches()

    if not len(unread_messages):
        if current_inbox_count:
            account._incr('inbox_count', -current_inbox_count)
    else:
        msgs = Message._by_fullname(
            unread_messages,
            data=True,
            return_dict=False,
            ignore_missing=True,

예제 #27

0

파일 보기

파일: migrate.py 프로젝트: aguamar/reddit

def shorten_byurl_keys():
    """We changed by_url keys from a format like
           byurl_google.com...
       to:
           byurl(1d5920f4b44b27a802bd77c4f0536f5a, google.com...)
       so that they would fit in memcache's 251-char limit
    """

    from datetime import datetime
    from hashlib import md5
    from r2.models import Link
    from r2.lib.filters import _force_utf8
    from pylons import g
    from r2.lib.utils import fetch_things2, in_chunks
    from r2.lib.db.operators import desc
    from r2.lib.utils import base_url, progress

    # from link.py
    def old_by_url_key(url):
        prefix='byurl_'
        s = _force_utf8(base_url(url.lower()))
        return '%s%s' % (prefix, s)
    def new_by_url_key(url):
        maxlen = 250
        template = 'byurl(%s,%s)'
        keyurl = _force_utf8(base_url(url.lower()))
        hexdigest = md5(keyurl).hexdigest()
        usable_len = maxlen-len(template)-len(hexdigest)
        return template % (hexdigest, keyurl[:usable_len])

    verbosity = 1000

    l_q = Link._query(
        Link.c._spam == (True, False),
        data=True,
        sort=desc('_date'))
    for links in (
        in_chunks(
            progress(
                fetch_things2(l_q, verbosity),
                key = lambda link: link._date,
                verbosity=verbosity,
                estimate=int(9.9e6),
                persec=True,
                ),
            verbosity)):
        # only links with actual URLs
        links = filter(lambda link: (not getattr(link, 'is_self', False)
                                     and getattr(link, 'url', '')),
                       links)

        # old key -> new key
        translate = dict((old_by_url_key(link.url),
                          new_by_url_key(link.url))
                         for link in links)

        old = g.permacache.get_multi(translate.keys())
        new = dict((translate[old_key], value)
                   for (old_key, value)
                   in old.iteritems())
        g.permacache.set_multi(new)

예제 #28

0

파일 보기

파일: modmsgtime.py 프로젝트: 383530895/reddit

# The Original Developer is the Initial Developer.  The Initial Developer of
# the Original Code is reddit Inc.
#
# All portions of the code written by reddit are Copyright (c) 2006-2013 reddit
# Inc. All Rights Reserved.
###############################################################################
"""Ensure modmsgtime is properly set on all accounts.

See the comment in Account.is_moderator_somewhere for possible values of this
attribute now.

"""

from r2.lib.db.operators import desc
from r2.lib.utils import fetch_things2, progress
from r2.models import Account, Subreddit


all_accounts = Account._query(sort=desc("_date"))
for account in progress(fetch_things2(all_accounts)):
    is_moderator_somewhere = bool(Subreddit.reverse_moderator_ids(account))
    if is_moderator_somewhere:
        if not account.modmsgtime:
            account.modmsgtime = False
        else:
            # the account already has a date for modmsgtime meaning unread mail
            pass
    else:
        account.modmsgtime = None
    account._commit()