Example #1
0
def time_listings(times = ('year','month','week','day','hour')):
    oldests = dict((t, epoch_seconds(timeago('1 %s' % t)))
                   for t in times)

    @mr_tools.dataspec_m_thing(("url", str),('sr_id', int),)
    def process(link):
        assert link.thing_type == 'link'

        timestamp = link.timestamp
        fname = make_fullname(Link, link.thing_id)

        if not link.spam and not link.deleted:
            sr_id = link.sr_id
            if link.url:
                domains = UrlParser(link.url).domain_permutations()
            else:
                domains = []
            ups, downs = link.ups, link.downs

            for tkey, oldest in oldests.iteritems():
                if timestamp > oldest:
                    sc = score(ups, downs)
                    contr = controversy(ups, downs)
                    yield ('sr-top-%s-%d' % (tkey, sr_id),
                           sc, timestamp, fname)
                    yield ('sr-controversial-%s-%d' % (tkey, sr_id),
                           contr, timestamp, fname)
                    for domain in domains:
                        yield ('domain/top/%s/%s' % (tkey, domain),
                               sc, timestamp, fname)
                        yield ('domain/controversial/%s/%s' % (tkey, domain),
                               contr, timestamp, fname)

    mr_tools.mr_map(process)
Example #2
0
def normalized_hot_cached(sr_ids):
    results = []
    srs = Subreddit._byID(sr_ids, data = True, return_dict = False)
    for sr in srs:
        #items = get_hot(sr)
        items = filter(lambda l: l._date > utils.timeago('%d day' % g.HOT_PAGE_AGE),
                       get_hot(sr))

        if not items:
            continue

        top_score = max(items[0]._hot, 1)
        
        top, rest = items[:2], items[2:]

        if top:
            normals = [l._hot / top_score for l in top]
            results.extend((l, random.choice(normals)) for l in top)
            #random.shuffle(normals)
            #results.extend((l, normals.pop()) for l in top)
        
        if rest:
            results.extend((l, l._hot / top_score) for l in rest)

    results.sort(key = lambda x: (x[1], x[0]._hot), reverse = True)
    return [l[0]._fullname for l in results]
Example #3
0
def process_new_links(period=media_period, force=False):
    """Fetches links from the last period and sets their media
    properities. If force is True, it will fetch properities for links
    even if the properties already exist"""
    links = Link._query(Link.c._date > timeago(period),
                        sort=desc('_date'),
                        data=True)
    results = {}
    jobs = []
    for link in fetch_things2(links):
        if link.is_self or link.promoted:
            continue
        elif not force and (link.has_thumbnail or link.media_object):
            continue

        jobs.append(make_link_info_job(results, link, g.useragent))

    #send links to a queue
    wq = WorkQueue(jobs, num_workers=20, timeout=30)
    wq.start()
    wq.jobs.join()

    #when the queue is finished, do the db writes in this thread
    for link, info in results.items():
        update_link(link, info[0], info[1])
Example #4
0
def user_vote_change_links(period = '1 day'):
    rel = Vote.rel(Account, Link)
    type = tdb.rel_types_id[rel._type_id]
    # rt = rel table
    # dt = data table
    rt, account_tt, link_tt, dt = type.rel_table

    aliases = tdb.alias_generator()
    author_dt = dt.alias(aliases.next())

    link_dt = tdb.types_id[Link._type_id].data_table[0].alias(aliases.next())

    # Create an SQL CASE statement for the subreddit vote multiplier
    cases = []
    for subreddit in subreddits_with_custom_karma_multiplier():
        cases.append( (sa.cast(link_dt.c.value,sa.Integer) == subreddit._id,
                      subreddit.post_karma_multiplier) )
    cases.append( (True, g.post_karma_multiplier) )       # The default article multiplier


    date = utils.timeago(period)
    
    s = sa.select([author_dt.c.value, sa.func.sum(sa.cast(rt.c.name, sa.Integer) * sa.case(cases))],
                  sa.and_(rt.c.date >= date,
                          author_dt.c.thing_id == rt.c.rel_id,
                          author_dt.c.key == 'author_id',
                          link_tt.c.thing_id == rt.c.thing2_id,
                          link_tt.c.date >= date,
                          link_dt.c.key == 'sr_id',
                          link_dt.c.thing_id == rt.c.thing2_id),
                  group_by = author_dt.c.value)

    rows = s.execute().fetchall()
    return [(int(r.value), r.sum) for r in rows]
Example #5
0
def time_listings(times = ('year','month','week','day','hour', 'all')):
    oldests = dict((t, epoch_seconds(timeago('1 %s' % t)))
                   for t in times if t != 'all')
    if 'all' in times:
        oldests['all'] = 0

    @mr_tools.dataspec_m_thing(('author_id', int),)
    def process(link):
        assert link.thing_type == 'link'

        timestamp = link.timestamp
        fname = make_fullname(Link, link.thing_id)

        if not link.spam and not link.deleted:
            author_id = link.author_id
            ups, downs = link.ups, link.downs

            sc = score(ups, downs)
            contr = controversy(ups, downs)
            h = _hot(ups, downs, timestamp)

            for tkey, oldest in oldests.iteritems():
                if timestamp > oldest:
                    yield ('user-top-%s-%d' % (tkey, author_id),
                           sc, timestamp, fname)
                    yield ('user-controversial-%s-%d' % (tkey, author_id),
                           contr, timestamp, fname)
                    if tkey == 'all':
                        yield ('user-new-%s-%d' % (tkey, author_id),
                               timestamp, timestamp, fname)
                        yield ('user-hot-%s-%d' % (tkey, author_id),
                               h, timestamp, fname)


    mr_tools.mr_map(process)
Example #6
0
def time_listings(times = ('year','month','week','day','hour')):
    oldests = dict((t, epoch_seconds(timeago('1 %s' % t)))
                   for t in times)

    @mr_tools.dataspec_m_thing(("url", str),('sr_id', int),)
    def process(link):
        assert link.thing_type == 'link'

        timestamp = link.timestamp
        fname = make_fullname(Link, link.thing_id)

        if not link.spam and not link.deleted:
            sr_id = link.sr_id
            if link.url:
                domains = UrlParser(link.url).domain_permutations()
            else:
                domains = []
            ups, downs = link.ups, link.downs

            for tkey, oldest in oldests.iteritems():
                if timestamp > oldest:
                    sc = score(ups, downs)
                    contr = controversy(ups, downs)
                    yield ('sr-top-%s-%d' % (tkey, sr_id),
                           sc, timestamp, fname)
                    yield ('sr-controversial-%s-%d' % (tkey, sr_id),
                           contr, timestamp, fname)
                    for domain in domains:
                        yield ('domain/top/%s/%s' % (tkey, domain),
                               sc, timestamp, fname)
                        yield ('domain/controversial/%s/%s' % (tkey, domain),
                               contr, timestamp, fname)

    mr_tools.mr_map(process)
Example #7
0
def get_hot(srs, only_fullnames=False):
    """Get the (fullname, hotness, epoch_seconds) for the hottest
       links in a subreddit. Use the query-cache to avoid some lookups
       if we can."""
    from r2.lib.db.thing import Query
    from r2.lib.db.queries import CachedResults

    ret = []
    queries = [sr.get_links("hot", "all") for sr in srs]

    # fetch these all in one go
    cachedresults = filter(lambda q: isinstance(q, CachedResults), queries)
    CachedResults.fetch_multi(cachedresults)

    for q in queries:
        if isinstance(q, Query):
            links = cached_query(q, sr)
            res = [(link._fullname, link._hot, epoch_seconds(link._date)) for link in links]
        elif isinstance(q, CachedResults):
            # we're relying on an implementation detail of
            # CachedResults here, where it's storing tuples that look
            # exactly like the return-type we want, to make our
            # sorting a bit cheaper
            res = list(q.data)

        # remove any that are too old
        age_limit = epoch_seconds(utils.timeago("%d days" % g.HOT_PAGE_AGE))
        res = [(fname if only_fullnames else (fname, hot, date)) for (fname, hot, date) in res if date > age_limit]
        ret.append(res)

    return ret
Example #8
0
def time_listings(times=('year', 'month', 'week', 'day', 'hour', 'all')):
    oldests = dict(
        (t, epoch_seconds(timeago('1 %s' % t))) for t in times if t != 'all')
    if 'all' in times:
        oldests['all'] = 0

    @mr_tools.dataspec_m_thing(
        ('author_id', int), )
    def process(link):
        assert link.thing_type == 'link'

        timestamp = link.timestamp
        fname = make_fullname(Link, link.thing_id)

        if not link.spam and not link.deleted:
            author_id = link.author_id
            ups, downs = link.ups, link.downs

            sc = score(ups, downs)
            contr = controversy(ups, downs)
            h = _hot(ups, downs, timestamp)

            for tkey, oldest in oldests.iteritems():
                if timestamp > oldest:
                    yield ('user-top-%s-%d' % (tkey, author_id), sc, timestamp,
                           fname)
                    yield ('user-controversial-%s-%d' % (tkey, author_id),
                           contr, timestamp, fname)
                    if tkey == 'all':
                        yield ('user-new-%s-%d' % (tkey, author_id), timestamp,
                               timestamp, fname)
                        yield ('user-hot-%s-%d' % (tkey, author_id), h,
                               timestamp, fname)

    mr_tools.mr_map(process)
Example #9
0
def get_hot(srs, only_fullnames=False):
    """Get the (fullname, hotness, epoch_seconds) for the hottest
       links in a subreddit. Use the query-cache to avoid some lookups
       if we can."""
    from r2.lib.db.thing import Query
    from r2.lib.db.queries import CachedResults

    ret = []
    queries = [sr.get_links('hot', 'all') for sr in srs]

    # fetch these all in one go
    cachedresults = filter(lambda q: isinstance(q, CachedResults), queries)
    CachedResults.fetch_multi(cachedresults)

    for q in queries:
        if isinstance(q, Query):
            links = cached_query(q, sr)
            res = [(link._fullname, link._hot, epoch_seconds(link._date))
                   for link in links]
        elif isinstance(q, CachedResults):
            # we're relying on an implementation detail of
            # CachedResults here, where it's storing tuples that look
            # exactly like the return-type we want, to make our
            # sorting a bit cheaper
            res = list(q.data)

        # remove any that are too old
        age_limit = epoch_seconds(utils.timeago('%d days' % g.HOT_PAGE_AGE))
        res = [(fname if only_fullnames else (fname, hot, date))
               for (fname, hot, date) in res if date > age_limit]
        ret.append(res)

    return ret
Example #10
0
File: link.py Project: vin/reddit
    def add_props(cls, user, wrapped):
        from r2.lib.count import incr_counts
        from r2.lib.media import thumbnail_url
        from r2.lib.utils import timeago

        saved = Link._saved(user, wrapped) if user else {}
        hidden = Link._hidden(user, wrapped) if user else {}
        #clicked = Link._clicked(user, wrapped) if user else {}
        clicked = {}

        for item in wrapped:
            show_media = (c.user.pref_media == 'on' or
                          (item.promoted and item.has_thumbnail
                           and c.user.pref_media != 'off') or
                          (c.user.pref_media == 'subreddit' and
                           item.subreddit.show_media))

            if not show_media:
                item.thumbnail = ""
            elif item.has_thumbnail:
                item.thumbnail = thumbnail_url(item)
            else:
                item.thumbnail = g.default_thumb
            
            item.score = max(0, item.score)

            item.domain = (domain(item.url) if not item.is_self
                          else 'self.' + item.subreddit.name)
            if not hasattr(item,'top_link'):
                item.top_link = False
            item.urlprefix = ''
            item.saved = bool(saved.get((user, item, 'save')))
            item.hidden = bool(hidden.get((user, item, 'hide')))
            item.clicked = bool(clicked.get((user, item, 'click')))
            item.num = None
            item.score_fmt = Score.number_only
            item.permalink = item.make_permalink(item.subreddit)
            if item.is_self:
                item.url = item.make_permalink(item.subreddit, force_domain = True)

            if c.user_is_admin:
                item.hide_score = False
            elif item.promoted:
                item.hide_score = True
            elif c.user == item.author:
                item.hide_score = False
            elif item._date > timeago("2 hours"):
                item.hide_score = True
            else:
                item.hide_score = False

            if c.user_is_loggedin and item.author._id == c.user._id:
                item.nofollow = False
            elif item.score <= 1 or item._spam or item.author._spam:
                item.nofollow = True
            else:
                item.nofollow = False
        if c.user_is_loggedin:
            incr_counts(wrapped)
Example #11
0
def share(link, emails, from_name = "", reply_to = "", body = ""):
    """Queues a 'share link' email."""
    now = datetime.datetime.now(g.tz)
    ival = now - timeago(g.new_link_share_delay)
    date = max(now,link._date + ival)
    Email.handler.add_to_queue(c.user, link, emails, from_name, g.share_reply,
                               date, request.ip, Email.Kind.SHARE,
                               body = body, reply_to = reply_to)
Example #12
0
def simplified_timesince(date, include_tense=True):
    if date > timeago("1 minute"):
        return _("just now")

    since = []
    since.append(timesince(date))
    if include_tense:
        since.append(_("ago"))
    return " ".join(since)
Example #13
0
def simplified_timesince(date, include_tense=True):
    if date > timeago("1 minute"):
        return _("just now")

    since = timesince(date)
    if include_tense:
        return _("%s ago") % since
    else:
        return since
Example #14
0
def simplified_timesince(date, include_tense=True):
    if date > timeago("1 minute"):
        return _("just now")

    since = []
    since.append(timesince(date))
    if include_tense:
        since.append(_("ago"))
    return " ".join(since)
def fix_all_broken_things(delete=False):
    from r2.models import Link, Comment

    # 2009-07-21 is the first broken thing at the time of writing.
    from_time = datetime.datetime(2009, 7, 21, tzinfo=g.tz)
    to_time = utils.timeago("60 seconds")

    for (cls, attrs) in ((Link, ("author_id", "sr_id")), (Comment, ("author_id", "sr_id", "body", "link_id"))):
        utils.find_broken_things(cls, attrs, from_time, to_time, delete=delete)
def simplified_timesince(date, include_tense=True):
    if date > timeago("1 minute"):
        return _("just now")

    since = timesince(date)
    if include_tense:
        return _("%s ago") % since
    else:
        return since
Example #17
0
def share(link, emails, from_name = "", reply_to = "", body = ""):
    """Queues a 'share link' email."""
    now = datetime.datetime.now(g.tz)
    ival = now - timeago(g.new_link_share_delay)
    date = max(now,link._date + ival)
    Email.handler.add_to_queue(c.user, emails, from_name, g.share_reply,
                               Email.Kind.SHARE, date = date,
                               body = body, reply_to = reply_to,
                               thing = link)
Example #18
0
def _get_cutoffs(intervals):
    cutoffs = {}
    for interval in intervals:
        if interval == "all":
            cutoffs["all"] = 0.0
        else:
            cutoffs[interval] = epoch_seconds(timeago("1 %s" % interval))

    return cutoffs
Example #19
0
def fix_all_broken_things(delete=False):
    from r2.models import Link, Comment

    # 2009-07-21 is the first broken thing at the time of writing.
    from_time = datetime.datetime(2009, 7, 21, tzinfo=g.tz)
    to_time = utils.timeago('60 seconds')

    for (cls, attrs) in ((Link, ('author_id', 'sr_id')),
                         (Comment, ('author_id', 'sr_id', 'body', 'link_id'))):
        utils.find_broken_things(cls, attrs, from_time, to_time, delete=delete)
Example #20
0
 def keep_fn(self):
     """For merged time-listings, don't show items that are too old
        (this can happen when mr_top hasn't run in a while)"""
     if self.time != 'all' and c.default_sr:
         oldest = timeago('1 %s' % (str(self.time),))
         def keep(item):
             return item._date > oldest and item.keep_item(item)
         return keep
     else:
         return ListingController.keep_fn(self)
Example #21
0
 def keep_fn(self):
     """For merged time-listings, don't show items that are too old
        (this can happen when mr_top hasn't run in a while)"""
     if self.time != 'all' and c.default_sr:
         oldest = timeago('1 %s' % (str(self.time),))
         def keep(item):
             return item._date > oldest and item.keep_item(item)
         return keep
     else:
         return ListingController.keep_fn(self)
Example #22
0
def time_listings(times=('all', )):
    oldests = dict(
        (t, epoch_seconds(timeago('1 %s' % t))) for t in times if t != "all")
    oldests['all'] = epoch_seconds(timeago('10 years'))

    @mr_tools.dataspec_m_thing(
        ("url", str), )
    def process(link):
        assert link.thing_type == 'link'

        timestamp = link.timestamp
        fname = make_fullname(Link, link.thing_id)

        if not link.spam and not link.deleted:
            if link.url:
                domains = UrlParser(link.url).domain_permutations()
            else:
                domains = []
            ups, downs = link.ups, link.downs

            for tkey, oldest in oldests.iteritems():
                if timestamp > oldest:
                    sc = score(ups, downs)
                    contr = controversy(ups, downs)
                    h = _hot(ups, downs, timestamp)
                    upvotes = upvotes(ups)
                    for domain in domains:
                        yield ('domain/top/%s/%s' % (tkey, domain), sc,
                               timestamp, fname)
                        yield ('domain/%s/%s/%s' %
                               (g.voting_upvote_path, tkey, domain), upvotes,
                               timestamp, fname)
                        yield ('domain/%s/%s/%s' %
                               (g.voting_controversial_path, tkey, domain),
                               contr, timestamp, fname)
                        if tkey == "all":
                            yield ('domain/hot/%s/%s' % (tkey, domain), h,
                                   timestamp, fname)
                            yield ('domain/new/%s/%s' % (tkey, domain),
                                   timestamp, timestamp, fname)

    mr_tools.mr_map(process)
Example #23
0
def port_cassahides():
    from r2.models import SaveHide, CassandraHide
    from r2.lib.db.tdb_cassandra import CL
    from r2.lib.db.operators import desc
    from r2.lib.utils import fetch_things2, timeago, progress

    q = SaveHide._query(SaveHide.c._date > timeago("1 week"), SaveHide.c._name == "hide", sort=desc("_date"))
    q = fetch_things2(q)
    q = progress(q, estimate=1953374)

    for sh in q:
        CassandraHide._hide(sh._thing1, sh._thing2, write_consistency_level=CL.ONE)
Example #24
0
def all_comments():
    q = Comment._query(Comment.c._score > 2,
                       Comment.c.sr_id != 6,
                       Comment.c._date > timeago('1 weeks'),
                       sort = desc('_date'),
                       limit = 200,
                       data = True)
    comments = list(q)
    while comments:
        for l in comments:
            yield l
        comments = list(q._after(l))
Example #25
0
 def append_date_clause(self, table, select, all_time=None):
     """Create the date portion of a where clause based on the time
        period specified."""
     if all_time:
         return select
     if self.period and not self.date:
         select.append_whereclause(table.c.date > timeago(self.period))
     if self.date:
         seconds = 24 * 60 * 60
         wheredate = dt.datetime.strptime(self.date, "%Y%m%d")
         select.append_whereclause(table.c.date >= wheredate)
         select.append_whereclause((table.c.date < wheredate + dt.timedelta(0, seconds)))
     return select
def vote_stats(config):
    stats = {}

    link_votes = Vote.rel(Account, Link)
    comment_votes = Vote.rel(Account, Comment)

    for name, rel in (('link', link_votes), ('comment', comment_votes)):
        table = get_rel_table(rel._type_id)[0]
        q = table.count(table.c.date > timeago('1 day'))
        stats[name+'_vote_count_past_day'] = q.execute().fetchone()[0]

    stats['vote_count_past_day'] = stats['link_vote_count_past_day'] + stats['comment_vote_count_past_day']
    return stats
Example #27
0
 def append_date_clause(self, table, select, all_time=None):
     """Create the date portion of a where clause based on the time
        period specified."""
     if all_time:
         return select
     if self.period and not self.date:
         select.append_whereclause(table.c.date > timeago(self.period))
     if self.date:
         seconds = 24 * 60 * 60
         wheredate = dt.datetime.strptime(self.date, "%Y%m%d")
         select.append_whereclause(table.c.date >= wheredate)
         select.append_whereclause(
             (table.c.date < wheredate + dt.timedelta(0, seconds)))
     return select
Example #28
0
def reindex_all(types = None, delete_all_first=False):
    """
        Called from `paster run` to totally re-index everything in the
        database. Spawns a thread to connect to Solr, and sends it
        tokenised Things
    """
    global indexed_types

    start_t = datetime.now()

    if not types:
        types = indexed_types

    # We don't want the default thread-local cache (which is just a
    # dict) to grow un-bounded (normally, we'd use
    # utils.set_emptying_cache, except that that preserves memcached,
    # and we don't even want to get memcached for total indexing,
    # because it would dump out more recent stuff)
    g.cache.caches = (SelfEmptyingCache(),) # + g.cache.caches[1:]

    count = 0
    q=Queue(100)
    indexer=Thread(target=indexer_worker,
                   args=(q,delete_all_first))
    indexer.start()

    try:
        for cls in types:
            for batch in fetch_batches(cls,1000,
                                       timeago("50 years"),
                                       start_t):
                r = tokenize_things([ x for x in batch
                                      if not x._spam and not x._deleted ])

                count += len(r)
                print ("Processing %s #%d(%s): %s"
                       % (cls.__name__, count, q.qsize(), r[0]['contents']))

                if indexer.isAlive():
                    q.put(r)
                else:
                    raise Exception("'tis a shame that I have but one thread to give")
        q.put("done")
        indexer.join()

    except object,e:
        if indexer.isAlive():
            q.put(e,timeout=30)
        raise e
Example #29
0
def reindex_all(types = None, delete_all_first=False):
    """
        Called from `paster run` to totally re-index everything in the
        database. Spawns a thread to connect to Solr, and sends it
        tokenised Things
    """
    global indexed_types

    start_t = datetime.now()

    if not types:
        types = indexed_types

    # We don't want the default thread-local cache (which is just a
    # dict) to grow un-bounded (normally, we'd use
    # utils.set_emptying_cache, except that that preserves memcached,
    # and we don't even want to get memcached for total indexing,
    # because it would dump out more recent stuff)
    g.cache.caches = (SelfEmptyingCache(),) # + g.cache.caches[1:]

    count = 0
    q=Queue(100)
    indexer=Thread(target=indexer_worker,
                   args=(q,delete_all_first))
    indexer.start()

    try:
        for cls in types:
            for batch in fetch_batches(cls,1000,
                                       timeago("50 years"),
                                       start_t):
                r = tokenize_things([ x for x in batch
                                      if not x._spam and not x._deleted ])

                count += len(r)
                print ("Processing %s #%d(%s): %s"
                       % (cls.__name__, count, q.qsize(), r[0]['contents']))

                if indexer.isAlive():
                    q.put(r)
                else:
                    raise Exception("'tis a shame that I have but one thread to give")
        q.put("done")
        indexer.join()

    except object,e:
        if indexer.isAlive():
            q.put(e,timeout=30)
        raise e
Example #30
0
def time_listings(intervals):
    cutoff_by_interval = {
        interval: epoch_seconds(timeago("1 %s" % interval))
        for interval in intervals
    }

    @mr_tools.dataspec_m_thing(
        ("url", str),
        ("sr_id", int),
        ("author_id", int),
    )
    def process(thing):
        if thing.deleted:
            return

        thing_cls = thingcls_by_name[thing.thing_type]
        fname = make_fullname(thing_cls, thing.thing_id)
        thing_score = score(thing.ups, thing.downs)
        thing_controversy = controversy(thing.ups, thing.downs)

        for interval, cutoff in cutoff_by_interval.iteritems():
            if thing.timestamp < cutoff:
                continue

            yield ("user/%s/top/%s/%d" %
                   (thing.thing_type, interval, thing.author_id), thing_score,
                   thing.timestamp, fname)
            yield ("user/%s/controversial/%s/%d" %
                   (thing.thing_type, interval, thing.author_id),
                   thing_controversy, thing.timestamp, fname)

            if thing.spam:
                continue

            if thing.thing_type == "link":
                yield ("sr/link/top/%s/%d" % (interval, thing.sr_id),
                       thing_score, thing.timestamp, fname)
                yield ("sr/link/controversial/%s/%d" % (interval, thing.sr_id),
                       thing_controversy, thing.timestamp, fname)

                if thing.url:
                    for domain in UrlParser(thing.url).domain_permutations():
                        yield ("domain/link/top/%s/%s" % (interval, domain),
                               thing_score, thing.timestamp, fname)
                        yield ("domain/link/controversial/%s/%s" %
                               (interval, domain), thing_controversy,
                               thing.timestamp, fname)

    mr_tools.mr_map(process)
Example #31
0
def time_listings(times = ('all',)):
    oldests = dict((t, epoch_seconds(timeago('1 %s' % t)))
                   for t in times if t != "all")
    oldests['all'] = epoch_seconds(timeago('10 years'))

    @mr_tools.dataspec_m_thing(("url", str),)
    def process(link):
        assert link.thing_type == 'link'

        timestamp = link.timestamp
        fname = make_fullname(Link, link.thing_id)

        if not link.spam and not link.deleted:
            if link.url:
                domains = UrlParser(link.url).domain_permutations()
            else:
                domains = []
            ups, downs = link.ups, link.downs

            for tkey, oldest in oldests.iteritems():
                if timestamp > oldest:
                    sc = score(ups, downs)
                    contr = controversy(ups, downs)
                    h = _hot(ups, downs, timestamp)
                    for domain in domains:
                        yield ('domain/top/%s/%s' % (tkey, domain),
                               sc, timestamp, fname)
                        yield ('domain/controversial/%s/%s' % (tkey, domain),
                               contr, timestamp, fname)
                        if tkey == "all":
                            yield ('domain/hot/%s/%s' % (tkey, domain),
                                   h, timestamp, fname)
                            yield ('domain/new/%s/%s' % (tkey, domain),
                                   timestamp, timestamp, fname)

    mr_tools.mr_map(process)
Example #32
0
def port_cassahides():
    from r2.models import SaveHide, CassandraHide
    from r2.lib.db.tdb_cassandra import CL
    from r2.lib.db.operators import desc
    from r2.lib.utils import fetch_things2, timeago, progress

    q = SaveHide._query(SaveHide.c._date > timeago('1 week'),
                        SaveHide.c._name == 'hide',
                        sort=desc('_date'))
    q = fetch_things2(q)
    q = progress(q, estimate=1953374)

    for sh in q:
        CassandraHide._hide(sh._thing1,
                            sh._thing2,
                            write_consistency_level=CL.ONE)
Example #33
0
    def delete_old(cls, age="3 days", limit=10000):
        cutoff = timeago(age)
        q = cls._query(cls.c._date < cutoff)
        q._limit = limit

        accounts = set()
        defendants = set()
        for j in q:
            accounts.add(j._thing1)
            defendants.add(j._thing2)
            j._delete()

        for a in accounts:
            Jury.by_account(a, _update=True)

        for d in defendants:
            Jury.by_defendant(d, _update=True)
Example #34
0
    def delete_old(cls, age="3 days", limit=10000):
        cutoff = timeago(age)
        q = cls._query(cls.c._date < cutoff)
        q._limit = limit

        accounts = set()
        defendants = set()
        for j in q:
            accounts.add(j._thing1)
            defendants.add(j._thing2)
            j._delete()

        for a in accounts:
            Jury.by_account(a, _update=True)

        for d in defendants:
            Jury.by_defendant(d, _update=True)
Example #35
0
    def preflight_check(self):
        if getattr(self, 'force_run', False):
            return True

        sr_id = getattr(self, 'batched_time_srid', None)
        if not sr_id:
            return True

        # this is a special query that tries to run less often, see
        # the discussion about batched_time_times
        sr = Subreddit._byID(sr_id, data=True)

        if (self.iden in getattr(sr, 'last_batch_query', {}) 
            and sr.last_batch_query[self.iden] > utils.timeago('1 day')):
            # this has been done in the last 24 hours, so we should skip it
            return False

        return True
Example #36
0
    def __init__(self, q, sort, fields = [], subreddits = [], authors = [], 
                 types = [], timerange = None, spam = False, deleted = False):

        self.q = q
        self.fields = fields
        self.sort = sort
        self.subreddits = subreddits
        self.authors = authors
        self.types = types
        self.spam = spam
        self.deleted = deleted

        if timerange in ['hour','week','day','month','year']:
            self.timerange = (timeago("1 %s" % timerange),"NOW")
        elif timerange == 'all' or timerange is None:
            self.timerange = None
        else:
            self.timerange = timerange
Example #37
0
 def keep(item):
     """Avoid showing links that are too young, to give time
     for things like the spam filter and thumbnail fetcher to
     act on them before releasing them into the wild"""
     wouldkeep = item.keep_item(item)
     if c.user_is_loggedin and (c.user_is_admin or item.subreddit.is_moderator(c.user)):
         # let admins and moderators see them regardless
         return wouldkeep
     elif wouldkeep and c.user_is_loggedin and c.user._id == item.author_id:
         # also let the author of the link see them
         return True
     elif item._date > timeago(g.new_incubation):
         # it's too young to show yet
         return False
     else:
         # otherwise, fall back to the regular logic (don't
         # show hidden links, etc)
         return wouldkeep
Example #38
0
    def preflight_check(self):
        if getattr(self, 'force_run', False):
            return True

        sr_id = getattr(self, 'batched_time_srid', None)
        if not sr_id:
            return True

        # this is a special query that tries to run less often, see
        # the discussion about batched_time_times
        sr = Subreddit._byID(sr_id, data=True)

        if (self.iden in getattr(sr, 'last_batch_query', {})
                and sr.last_batch_query[self.iden] > utils.timeago('1 day')):
            # this has been done in the last 24 hours, so we should skip it
            return False

        return True
Example #39
0
    def __init__(self, q, sort, fields = [], subreddits = [], authors = [], 
                 types = [], timerange = None, spam = False, deleted = False):

        self.q = q
        self.fields = fields
        self.sort = sort
        self.subreddits = subreddits
        self.authors = authors
        self.types = types
        self.spam = spam
        self.deleted = deleted

        if timerange in ['hour','week','day','month','year']:
            self.timerange = (timeago("1 %s" % timerange),"NOW")
        elif timerange == 'all' or timerange is None:
            self.timerange = None
        else:
            self.timerange = timerange
Example #40
0
def time_listings(intervals):
    cutoff_by_interval = {interval: epoch_seconds(timeago("1 %s" % interval))
                          for interval in intervals}

    @mr_tools.dataspec_m_thing(
        ("url", str),
        ("sr_id", int),
        ("author_id", int),
    )
    def process(thing):
        if thing.deleted:
            return

        thing_cls = thingcls_by_name[thing.thing_type]
        fname = make_fullname(thing_cls, thing.thing_id)
        thing_score = score(thing.ups, thing.downs)
        thing_controversy = controversy(thing.ups, thing.downs)

        for interval, cutoff in cutoff_by_interval.iteritems():
            if thing.timestamp < cutoff:
                continue

            yield ("user/%s/top/%s/%d" % (thing.thing_type, interval, thing.author_id),
                   thing_score, thing.timestamp, fname)
            yield ("user/%s/controversial/%s/%d" % (thing.thing_type, interval, thing.author_id),
                   thing_controversy, thing.timestamp, fname)

            if thing.spam:
                continue

            if thing.thing_type == "link":
                yield ("sr/link/top/%s/%d" % (interval, thing.sr_id),
                       thing_score, thing.timestamp, fname)
                yield ("sr/link/controversial/%s/%d" % (interval, thing.sr_id),
                       thing_controversy, thing.timestamp, fname)

                if thing.url:
                    for domain in UrlParser(thing.url).domain_permutations():
                        yield ("domain/link/top/%s/%s" % (interval, domain),
                               thing_score, thing.timestamp, fname)
                        yield ("domain/link/controversial/%s/%s" % (interval, domain),
                               thing_controversy, thing.timestamp, fname)

    mr_tools.mr_map(process)
Example #41
0
    def delete_old(cls, age="3 days", limit=500, verbose=False):
        cutoff = timeago(age)
        q = cls._query(cls.c._date < cutoff)
        q._limit = limit

        accounts = set()
        defendants = set()
        for j in q:
            accounts.add(j._thing1)
            defendants.add(j._thing2)
            j._delete()

        for a in accounts:
            Jury.by_account(a, _update=True)

        for d in defendants:
            if verbose:
                print "Deleting juries for defendant %s" % d._fullname
            Jury.by_defendant(d, _update=True)
Example #42
0
    def delete_old(cls, age="3 days", limit=500, verbose=False):
        cutoff = timeago(age)
        q = cls._query(cls.c._date < cutoff)
        q._limit = limit

        accounts = set()
        defendants = set()
        for j in q:
            accounts.add(j._thing1)
            defendants.add(j._thing2)
            j._delete()

        for a in accounts:
            Jury.by_account(a, _update=True)

        for d in defendants:
            if verbose:
                print "Deleting juries for defendant %s" % d._fullname
            Jury.by_defendant(d, _update=True)
Example #43
0
def top_user_change(period = '1 day'):
    rel = Vote.rel(Account, Link)
    rt, account, link, dt = tdb.get_rel_table(rel._type_id)

    author = dt.alias()

    date = utils.timeago(period)
    
    s = sa.select([author.c.value, sa.func.sum(sa.cast(rt.c.name, sa.Integer))],
                  sa.and_(rt.c.date > date,
                          author.c.thing_id == rt.c.rel_id,
                          author.c.key == 'author_id'),
                  group_by = author.c.value,
                  order_by = sa.desc(sa.func.sum(sa.cast(rt.c.name, sa.Integer))),
                  limit = 10)

    rows = s.execute().fetchall()
    
    return [(int(r.value), r.sum) for r in rows]
    def query(self):
        q = SubscriptionStorage._query(SubscriptionStorage.c._thing1_id == c.user._id,
                                       SubscriptionStorage.c._t2_deleted == False,
                                       SubscriptionStorage.c._name == 'subscriptionstorage',
                                       sort = desc('_t2_interestingness'),
                                       eager_load = True,
                                       thing_data = not g.use_query_cache
                                       )
        if not c.user_is_admin:
            q._filter(SubscriptionStorage.c._t2_spam == False)

        q.prewrap_fn = lambda x: x._thing2

        if self.time == 'last':
            q._filter(SubscriptionStorage.c._date >= last_dashboard_visit())
        elif self.time != 'all':
            q._filter(SubscriptionStorage.c._date >= timeago(queries.relation_db_times[self.time]))

        return q
Example #45
0
    def query(self):
        q = SubscriptionStorage._query(
            SubscriptionStorage.c._thing1_id == c.user._id,
            SubscriptionStorage.c._t2_deleted == False,
            SubscriptionStorage.c._name == 'subscriptionstorage',
            sort=desc('_t2_interestingness'),
            eager_load=True,
            thing_data=not g.use_query_cache)
        if not c.user_is_admin:
            q._filter(SubscriptionStorage.c._t2_spam == False)

        q.prewrap_fn = lambda x: x._thing2

        if self.time == 'last':
            q._filter(SubscriptionStorage.c._date >= last_dashboard_visit())
        elif self.time != 'all':
            q._filter(SubscriptionStorage.c._date >= timeago(
                queries.relation_db_times[self.time]))

        return q
Example #46
0
def user_vote_change_comments(period = '1 day'):
    rel = Vote.rel(Account, Comment)
    type = tdb.rel_types_id[rel._type_id]
    # rt = rel table
    # dt = data table
    rt, account_tt, comment_tt, dt = type.rel_table

    aliases = tdb.alias_generator()
    author_dt = dt.alias(aliases.next())

    date = utils.timeago(period)
    
    s = sa.select([author_dt.c.value, sa.func.sum(sa.cast(rt.c.name, sa.Integer))],
                  sa.and_(rt.c.date > date,
                          author_dt.c.thing_id == rt.c.rel_id,
                          author_dt.c.key == 'author_id'),
                  group_by = author_dt.c.value)

    rows = s.execute().fetchall()

    return [(int(r.value), r.sum) for r in rows]
def subreddit_stats(config):
    sr_counts = defaultdict(int)
    for kind in (Link, Comment):
        thing_table, data_table = get_thing_table(kind._type_id)
        first_id = list(kind._query(kind.c._date > timeago('1 day'), sort=asc('_date'), limit=1))
        if not first_id:
            continue
        else:
            first_id = first_id[0]._id

        q = sa.select([data_table.c.value, sa.func.count(data_table.c.value)],
                (data_table.c.thing_id > first_id)
                    & (data_table.c.key == 'sr_id')
                    & (thing_table.c.thing_id == data_table.c.thing_id)
                    & (thing_table.c.spam == False),
                group_by=data_table.c.value)

        for sr_id, count in q.execute():
            sr_counts[sr_id] += count

    return {'subreddits_active_past_day': len(list(count for count in sr_counts.itervalues() if count > 5))}
Example #48
0
def top_user_change(period="1 day"):
    rel = Vote.rel(Account, Link)
    type = tdb.rel_types_id[rel._type_id]
    # rt = rel table
    # dt = data table
    rt, account, link, dt = type.rel_table

    aliases = tdb.alias_generator()
    author = dt.alias(aliases.next())

    date = utils.timeago(period)

    s = sa.select(
        [author.c.value, sa.func.sum(sa.cast(rt.c.name, sa.Integer))],
        sa.and_(rt.c.date > date, author.c.thing_id == rt.c.rel_id, author.c.key == "author_id"),
        group_by=author.c.value,
        order_by=sa.desc(sa.func.sum(sa.cast(rt.c.name, sa.Integer))),
        limit=10,
    )

    rows = s.execute().fetchall()

    return [(int(r.value), r.sum) for r in rows]
Example #49
0
def catch_up_batch_queries():
    # catch up on batched_time_times queries that haven't been run
    # that should be, This should be cronned to run about once an
    # hour. The more often, the more the work of rerunning the actual
    # queries is spread out, but every run has a fixed-cost of looking
    # at every single subreddit
    sr_q = Subreddit._query(sort=desc('_downs'), data=True)
    dayago = utils.timeago('1 day')
    for sr in fetch_things2(sr_q):
        if hasattr(sr, 'last_valid_vote') and sr.last_valid_vote > dayago:
            # if we don't know when the last vote was, it couldn't
            # have been today
            for sort in batched_time_sorts:
                for time in batched_time_times:
                    q = make_batched_time_query(sr, sort, time)
                    if q.preflight_check():
                        # we haven't run the batched_time_times in the
                        # last day
                        add_queries([q])

    # make sure that all of the jobs have been completed or processed
    # by the time we return
    worker.join()
Example #50
0
def catch_up_batch_queries():
    # catch up on batched_time_times queries that haven't been run
    # that should be, This should be cronned to run about once an
    # hour. The more often, the more the work of rerunning the actual
    # queries is spread out, but every run has a fixed-cost of looking
    # at every single subreddit
    sr_q = Subreddit._query(sort=desc("_downs"), data=True)
    dayago = utils.timeago("1 day")
    for sr in fetch_things2(sr_q):
        if hasattr(sr, "last_valid_vote") and sr.last_valid_vote > dayago:
            # if we don't know when the last vote was, it couldn't
            # have been today
            for sort in batched_time_sorts:
                for time in batched_time_times:
                    q = make_batched_time_query(sr, sort, time)
                    if q.preflight_check():
                        # we haven't run the batched_time_times in the
                        # last day
                        add_queries([q])

    # make sure that all of the jobs have been completed or processed
    # by the time we return
    worker.join()
Example #51
0
def user_vote_change_links(period='1 day'):
    rel = Vote.rel(Account, Link)
    type = tdb.rel_types_id[rel._type_id]
    # rt = rel table
    # dt = data table
    rt, account_tt, link_tt, dt = type.rel_table

    aliases = tdb.alias_generator()
    author_dt = dt.alias(aliases.next())

    link_dt = tdb.types_id[Link._type_id].data_table[0].alias(aliases.next())

    # Create an SQL CASE statement for the subreddit vote multiplier
    cases = []
    for subreddit in subreddits_with_custom_karma_multiplier():
        cases.append((sa.cast(link_dt.c.value, sa.Integer) == subreddit._id,
                      subreddit.post_karma_multiplier))
    cases.append(
        (True, g.post_karma_multiplier))  # The default article multiplier

    date = utils.timeago(period)

    s = sa.select([
        author_dt.c.value,
        sa.func.sum(sa.cast(rt.c.name, sa.Integer) * sa.case(cases))
    ],
                  sa.and_(rt.c.date >= date,
                          author_dt.c.thing_id == rt.c.rel_id,
                          author_dt.c.key == 'author_id',
                          link_tt.c.thing_id == rt.c.thing2_id,
                          link_tt.c.date >= date, link_dt.c.key == 'sr_id',
                          link_dt.c.thing_id == rt.c.thing2_id),
                  group_by=author_dt.c.value)

    rows = s.execute().fetchall()
    return [(int(r.value), r.sum) for r in rows]
Example #52
0
def short_timesince(date):
    # returns string in the format of '%{number}{unit}'
    # examples: '2d', '45m', '65d'
    if date > timeago("1 minute"):
        return _("just now")

    now = datetime.datetime.now(g.tz)
    diff = int((now - date).total_seconds())

    days = diff / 86400  # 86400 = 24 hours * 60 minutes * 60 seconds per day
    hours = diff % 86400 / 3600  # 3600 = 60 minutes * 60 seconds per day
    minutes = diff % 3600 / 60  # 60 = 60 seconds per minute
    years = diff / 31536000  # 31536000 = 86400 * 365 days

    if years > 0:
        return "%sy" % years

    if days > 0:
        return "%sd" % days

    if hours > 0:
        return "%sh" % hours

    return "%sm" % minutes
Example #53
0
def user_vote_change_comments(period='1 day'):
    rel = Vote.rel(Account, Comment)
    type = tdb.rel_types_id[rel._type_id]
    # rt = rel table
    # dt = data table
    rt, account_tt, comment_tt, dt = type.rel_table

    aliases = tdb.alias_generator()
    author_dt = dt.alias(aliases.next())

    date = utils.timeago(period)

    s = sa.select(
        [author_dt.c.value,
         sa.func.sum(sa.cast(rt.c.name, sa.Integer))],
        sa.and_(rt.c.date >= date, author_dt.c.thing_id == rt.c.rel_id,
                author_dt.c.key == 'author_id',
                comment_tt.c.thing_id == rt.c.thing2_id,
                comment_tt.c.date >= date),
        group_by=author_dt.c.value)

    rows = s.execute().fetchall()

    return [(int(r.value), r.sum) for r in rows]
Example #54
0
def top_user_change(period='1 day'):
    rel = Vote.rel(Account, Link)
    type = tdb.rel_types_id[rel._type_id]
    # rt = rel table
    # dt = data table
    rt, account, link, dt = type.rel_table

    aliases = tdb.alias_generator()
    author = dt.alias(aliases.next())

    date = utils.timeago(period)

    s = sa.select(
        [author.c.value,
         sa.func.sum(sa.cast(rt.c.name, sa.Integer))],
        sa.and_(rt.c.date > date, author.c.thing_id == rt.c.rel_id,
                author.c.key == 'author_id'),
        group_by=author.c.value,
        order_by=sa.desc(sa.func.sum(sa.cast(rt.c.name, sa.Integer))),
        limit=10)

    rows = s.execute().fetchall()

    return [(int(r.value), r.sum) for r in rows]
Example #55
0
 def operator(self, time):
     from r2.models import Link
     if time != 'all':
         return Link.c._date >= timeago(time)
Example #56
0
    def add_props(cls, user, wrapped):
        from r2.lib.count import incr_counts
        from r2.lib.media import thumbnail_url
        from r2.lib.utils import timeago

        saved = Link._saved(user, wrapped) if user else {}
        hidden = Link._hidden(user, wrapped) if user else {}
        clicked = Link._clicked(user, wrapped) if user else {}
        #clicked = {}

        for item in wrapped:
            show_media = False
            if c.user.pref_compress:
                pass
            elif c.user.pref_media == 'on':
                show_media = True
            elif c.user.pref_media == 'subreddit' and item.subreddit.show_media:
                show_media = True
            elif (item.promoted and item.has_thumbnail
                  and c.user.pref_media != 'off'):
                show_media = True

            if not show_media:
                item.thumbnail = ""
            elif item.has_thumbnail:
                item.thumbnail = thumbnail_url(item)
            else:
                item.thumbnail = g.default_thumb

            item.domain = (domain(item.url) if not item.is_self else 'self.' +
                           item.subreddit.name)
            if not hasattr(item, 'top_link'):
                item.top_link = False
            item.urlprefix = ''
            item.saved = bool(saved.get((user, item, 'save')))
            item.hidden = bool(hidden.get((user, item, 'hide')))
            item.clicked = clicked.get((user, item, 'click'))
            item.num = None
            item.score_fmt = Score.signed_number
            item.permalink = item.make_permalink(item.subreddit)
            if item.is_self:
                item.url = item.make_permalink(item.subreddit,
                                               force_domain=True)

            if c.user_is_admin:
                item.hide_score = False
            elif item.promoted:
                item.hide_score = True
            elif c.user == item.author:
                item.hide_score = False
            elif item._date > timeago("2 hours"):
                item.hide_score = True
            else:
                item.hide_score = False

            # Don't allow users to vote on their own posts and don't
            # allow users to vote on collapsed posts shown when
            # viewing comment permalinks.
            item.votable = bool(
                c.user != item.author
                and not getattr(item, 'for_comment_permalink', False))

            if c.user_is_loggedin and item.author._id == c.user._id:
                item.nofollow = False
            elif item.score <= 1 or item._spam or item.author._spam:
                item.nofollow = True
            else:
                item.nofollow = False

            if c.user_is_loggedin and item.subreddit.name == c.user.draft_sr_name:
                item.draft = True
            else:
                item.draft = False

        if c.user_is_loggedin:
            incr_counts(wrapped)
Example #57
0
def get_link_counts(period = count_period):
    links = Link._query(Link.c._date >= utils.timeago(period),
                        limit=50, data = True)
    return dict((l._fullname, (0, l.sr_id)) for l in links)
Example #58
0
def only_recent(items):
    return filter(lambda l: l._date > utils.timeago('%d day' % g.HOT_PAGE_AGE),
                  items)