def populate_spam_filtered(): from r2.lib.db.queries import get_spam_links, get_spam_comments from r2.lib.db.queries import get_spam_filtered_links, get_spam_filtered_comments from r2.models.query_cache import CachedQueryMutator def was_filtered(thing): if thing._spam and not thing._deleted and \ getattr(thing, 'verdict', None) != 'mod-removed': return True else: return False q = Subreddit._query(sort=asc('_date')) for sr in fetch_things2(q): print 'Processing %s' % sr.name links = Thing._by_fullname(get_spam_links(sr), data=True, return_dict=False) comments = Thing._by_fullname(get_spam_comments(sr), data=True, return_dict=False) insert_links = [l for l in links if was_filtered(l)] insert_comments = [c for c in comments if was_filtered(c)] with CachedQueryMutator() as m: m.insert(get_spam_filtered_links(sr), insert_links) m.insert(get_spam_filtered_comments(sr), insert_comments)
def add_all_srs(): """Adds every listing query for every subreddit to the queue.""" q = Subreddit._query(sort = asc('_date')) for sr in fetch_things2(q): add_queries(all_queries(get_links, sr, ('hot', 'new', 'old'), ['all'])) add_queries(all_queries(get_links, sr, ('top', 'controversial'), db_times.keys())) add_queries([get_links(sr, 'toplinks', 'all')])
def gen_keys(): yield promoted_memo_key # just let this one do its own writing load_all_reddits() yield queries.get_all_comments().iden l_q = Link._query(Link.c._spam == (True, False), Link.c._deleted == (True, False), sort=desc('_date'), data=True, ) for link in fetch_things2(l_q, verbosity): yield comments_key(link._id) yield last_modified_key(link, 'comments') a_q = Account._query(Account.c._spam == (True, False), sort=desc('_date'), ) for account in fetch_things2(a_q, verbosity): yield messages_key(account._id) yield last_modified_key(account, 'overview') yield last_modified_key(account, 'commented') yield last_modified_key(account, 'submitted') yield last_modified_key(account, 'liked') yield last_modified_key(account, 'disliked') yield queries.get_comments(account, 'new', 'all').iden yield queries.get_submitted(account, 'new', 'all').iden yield queries.get_liked(account).iden yield queries.get_disliked(account).iden yield queries.get_hidden(account).iden yield queries.get_saved(account).iden yield queries.get_inbox_messages(account).iden yield queries.get_unread_messages(account).iden yield queries.get_inbox_comments(account).iden yield queries.get_unread_comments(account).iden yield queries.get_inbox_selfreply(account).iden yield queries.get_unread_selfreply(account).iden yield queries.get_sent(account).iden sr_q = Subreddit._query(Subreddit.c._spam == (True, False), sort=desc('_date'), ) for sr in fetch_things2(sr_q, verbosity): yield last_modified_key(sr, 'stylesheet_contents') yield queries.get_links(sr, 'hot', 'all').iden yield queries.get_links(sr, 'new', 'all').iden for sort in 'top', 'controversial': for time in 'hour', 'day', 'week', 'month', 'year', 'all': yield queries.get_links(sr, sort, time, merge_batched=False).iden yield queries.get_spam_links(sr).iden yield queries.get_spam_comments(sr).iden yield queries.get_reported_links(sr).iden yield queries.get_reported_comments(sr).iden yield queries.get_subreddit_messages(sr).iden yield queries.get_unread_subreddit_messages(sr).iden
def backfill(after=None): q = Subreddit._query(sort=asc('_date')) if after: sr = Subreddit._by_name(after) q = q._after(sr) for sr in fetch_things2(q): backfill_sr(sr)
def send_account_summary_email(account_thing_id, verbose=False, send_email=send_email): account = Account._byID(account_thing_id, data=True) if not should_send_activity_summary_email(account): return # if we've never sent an email, only tell about the last 24 hours a_day_ago = datetime.datetime.now(pytz.utc) - datetime.timedelta(hours=24) if getattr(account, 'last_email_sent_at', None) is None: account.last_email_sent_at = a_day_ago c.content_langs = 'en-US' # Find all the "active" links for this user. Frontpage uses the c.user global # to find the right subreddits for the current user c.user = account c.user_is_loggedin = True thing_ids = [] for link in Frontpage.get_links('active', 'all'): thing_ids.append(link) active_links_hash = Link._by_fullname(thing_ids, data=True) active_links = [active_links_hash[t_id] for t_id in thing_ids if active_links_hash[t_id]._active > account.last_email_sent_at] idx = 0 for ll in active_links: idx += 1 ll.num = idx # Find all new spaces created since we last sent the user an email new_spaces = list(fetch_things2(Subreddit._query( Subreddit.c._date > account.last_email_sent_at, sort=asc('_date')))) # don't bother sending email if there's noting to report. if len(new_spaces) == 0 and len(active_links) == 0: return # Get the date and time now = datetime.datetime.now(pytz.timezone('US/Eastern')) date_string = now.strftime("%A %B %d, %Y") time_string = now.strftime("%I:%M %p") # Render the template html_email_template = g.mako_lookup.get_template('summary_email.html') html_body = html_email_template.render( last_email_sent_at=account.last_email_sent_at, new_spaces=new_spaces, active_links=active_links, date_string=date_string, time_string=time_string) # with open('out.html', 'w') as ff: # ff.write(html_body) if verbose: print "sending email to %s" % (account.email,) send_email(account.email, html_body, date_string) account.last_email_sent_at = datetime.datetime.now(pytz.utc) account._commit()
def add_all_ban_report_srs(): """Adds the initial spam/reported pages to the report queue""" q = Subreddit._query(sort = asc('_date')) for sr in fetch_things2(q): add_queries([get_spam_links(sr), get_spam_comments(sr), get_reported_links(sr), get_reported_comments(sr), ])
def add_allow_top_to_srs(): "Add the allow_top property to all stored subreddits" from r2.models import Subreddit from r2.lib.db.operators import desc from r2.lib.utils import fetch_things2 q = Subreddit._query(Subreddit.c._spam == (True,False), sort = desc('_date')) for sr in fetch_things2(q): sr.allow_top = True; sr._commit()
def add_all_srs(): """Recalculates every listing query for every subreddit. Very, very slow.""" q = Subreddit._query(sort=asc("_date")) for sr in fetch_things2(q): for q in all_queries(get_links, sr, ("hot", "new"), ["all"]): q.update() for q in all_queries(get_links, sr, time_filtered_sorts, db_times.keys()): q.update() get_spam_links(sr).update() # get_spam_comments(sr).update() get_reported_links(sr).update()
def gen_keys(): yield promoted_memo_key # just let this one do its own writing load_all_reddits() yield queries.get_all_comments().iden l_q = Link._query( Link.c._spam == (True, False), Link.c._deleted == (True, False), sort=desc("_date"), data=True ) for link in fetch_things2(l_q, verbosity): yield comments_key(link._id) yield last_modified_key(link, "comments") a_q = Account._query(Account.c._spam == (True, False), sort=desc("_date")) for account in fetch_things2(a_q, verbosity): yield messages_key(account._id) yield last_modified_key(account, "overview") yield last_modified_key(account, "commented") yield last_modified_key(account, "submitted") yield last_modified_key(account, "liked") yield last_modified_key(account, "disliked") yield queries.get_comments(account, "new", "all").iden yield queries.get_submitted(account, "new", "all").iden yield queries.get_liked(account).iden yield queries.get_disliked(account).iden yield queries.get_hidden(account).iden yield queries.get_saved(account).iden yield queries.get_inbox_messages(account).iden yield queries.get_unread_messages(account).iden yield queries.get_inbox_comments(account).iden yield queries.get_unread_comments(account).iden yield queries.get_inbox_selfreply(account).iden yield queries.get_unread_selfreply(account).iden yield queries.get_sent(account).iden sr_q = Subreddit._query(Subreddit.c._spam == (True, False), sort=desc("_date")) for sr in fetch_things2(sr_q, verbosity): yield last_modified_key(sr, "stylesheet_contents") yield queries.get_links(sr, "hot", "all").iden yield queries.get_links(sr, "new", "all").iden for sort in "top", "controversial": for time in "hour", "day", "week", "month", "year", "all": yield queries.get_links(sr, sort, time, merge_batched=False).iden yield queries.get_spam_links(sr).iden yield queries.get_spam_comments(sr).iden yield queries.get_reported_links(sr).iden yield queries.get_reported_comments(sr).iden yield queries.get_subreddit_messages(sr).iden yield queries.get_unread_subreddit_messages(sr).iden
def popular_searches(): top_reddits = Subreddit._query(Subreddit.c.type == 'public', sort = desc('_downs'), limit = 100, data = True) top_searches = {} for sr in top_reddits: name = sr.name.lower() for i in xrange(min(len(name), 3)): query = name[:i + 1] r = search_reddits(query) top_searches[query] = r return top_searches
def popular_searches(): top_reddits = Subreddit._query(Subreddit.c.type == 'public', sort=desc('_downs'), limit=100, data=True) top_searches = {} for sr in top_reddits: name = sr.name.lower() for i in xrange(min(len(name), 3)): query = name[:i + 1] r = search_reddits(query) top_searches[query] = r return top_searches
def add_all_srs(): """Recalculates every listing query for every subreddit. Very, very slow.""" q = Subreddit._query(sort = asc('_date')) for sr in fetch_things2(q): for q in all_queries(get_links, sr, ('hot', 'new'), ['all']): q.update() for q in all_queries(get_links, sr, time_filtered_sorts, db_times.keys()): q.update() get_spam_links(sr).update() get_spam_comments(sr).update() get_reported_links(sr).update() get_reported_comments(sr).update()
def add_all_srs(): """Adds every listing query for every subreddit to the queue.""" q = Subreddit._query(sort=asc("_date")) for sr in fetch_things2(q): add_queries(all_queries(get_links, sr, ("hot", "new"), ["all"])) add_queries(all_queries(get_links, sr, ("top", "controversial"), db_times.keys())) add_queries( [ get_spam_links(sr), # get_spam_comments(sr), get_reported_links(sr), # get_reported_comments(sr), ] )
def popular_searches(include_over_18=True): top_reddits = Subreddit._query(Subreddit.c.type == 'public', sort=desc('_downs'), limit=100, data=True) top_searches = {} for sr in top_reddits: if sr.over_18 and not include_over_18: continue name = sr.name.lower() for i in xrange(min(len(name), 3)): query = name[:i + 1] r = search_reddits(query, include_over_18) top_searches[query] = r return top_searches
def popular_searches(include_over_18=True): top_reddits = Subreddit._query(Subreddit.c.type == 'public', sort = desc('_downs'), limit = 100, data = True) top_searches = {} for sr in top_reddits: if sr.over_18 and not include_over_18: continue name = sr.name.lower() for i in xrange(min(len(name), 3)): query = name[:i + 1] r = search_reddits(query, include_over_18) top_searches[query] = r return top_searches
def cache_lists(): def _chop(srs): srs.sort(key=lambda s: s._downs, reverse=True) return srs[:limit] # bylang =:= dict((lang, over18_state) -> [Subreddit]) # lang =:= all | lang() # nsfwstate =:= no_over18 | allow_over18 | only_over18 bylang = {} for sr in fetch_things2(Subreddit._query(sort=desc('_date'), data=True)): aid = getattr(sr, 'author_id', None) if aid is not None and aid < 0: # skip special system reddits like promos continue if sr.type not in ('public', 'restricted'): # skips reddits that can't appear in the default list # because of permissions continue g.log.debug(sr.name) for lang in 'all', sr.lang: over18s = ['allow_over18'] if sr.over_18: over18s.append('only_over18') else: over18s.append('no_over18') for over18 in over18s: k = (lang, over18) bylang.setdefault(k, []).append(sr) # keep the lists small while we work if len(bylang[k]) > limit*2: g.log.debug('Shrinking %s' % (k,)) bylang[k] = _chop(bylang[k]) for (lang, over18), srs in bylang.iteritems(): srs = _chop(srs) sr_tuples = map(lambda sr: (sr._downs, sr.allow_top, sr._id), srs) g.log.debug("For %s/%s setting %s" % (lang, over18, map(lambda sr: sr.name, srs))) g.permacache.set(cached_srs_key(lang, over18), sr_tuples)
def load_all_reddits(): query_cache = {} q = Subreddit._query(Subreddit.c.type == 'public', Subreddit.c._downs > 1, sort=(desc('_downs'), desc('_ups')), data=True) for sr in utils.fetch_things2(q): name = sr.name.lower() for i in xrange(len(name)): prefix = name[:i + 1] names = query_cache.setdefault(prefix, []) if len(names) < 10: names.append(sr.name) for name_prefix, subreddits in query_cache.iteritems(): SubredditsByPartialName._set_values(name_prefix, {'srs': subreddits})
def load_all_reddits(): query_cache = {} q = Subreddit._query(Subreddit.c.type == 'public', Subreddit.c._downs > 1, sort = (desc('_downs'), desc('_ups')), data = True) for sr in utils.fetch_things2(q): name = sr.name.lower() for i in xrange(len(name)): prefix = name[:i + 1] names = query_cache.setdefault(prefix, []) if len(names) < 10: names.append((sr.name, sr.over_18)) for name_prefix, subreddits in query_cache.iteritems(): SubredditsByPartialName._set_values(name_prefix, {'tups': subreddits})
def cache_lists(): def _chop(srs): srs.sort(key=lambda s: s._downs, reverse=True) return srs[:limit] # bylang =:= dict((lang, over18_state) -> [Subreddit]) # lang =:= all | lang() # nsfwstate =:= no_over18 | allow_over18 | only_over18 bylang = {} for sr in fetch_things2(Subreddit._query(sort=desc('_date'), data=True)): aid = getattr(sr, 'author_id', None) if aid is not None and aid < 0: # skip special system reddits like promos continue type = getattr(sr, 'type', 'private') if type not in ('public', 'restricted'): # skips reddits that can't appear in the default list # because of permissions continue for lang in 'all', sr.lang: over18s = ['allow_over18'] if sr.over_18: over18s.append('only_over18') else: over18s.append('no_over18') for over18 in over18s: k = (lang, over18) bylang.setdefault(k, []).append(sr) # keep the lists small while we work if len(bylang[k]) > limit*2: bylang[k] = _chop(bylang[k]) for (lang, over18), srs in bylang.iteritems(): srs = _chop(srs) sr_tuples = map(lambda sr: (sr._downs, sr.allow_top, sr._id), srs) print "For %s/%s setting %s" % (lang, over18, map(lambda sr: sr.name, srs[:50])) SubredditPopularityByLanguage._set_values(lang, {over18: sr_tuples})
def set_from_weights(cls, all_weights): weights = all_weights.copy() all_ads = itertools.chain.from_iterable(all_weights.itervalues()) weights[cls.ALL_ADS_ID] = all_ads if '' in weights: weights[cls.FRONT_PAGE] = weights.pop('') timeslot = datetime.datetime.now(g.tz) while weights: srid, promos = weights.popitem() weight_refs = [WeightingRef.from_promo(*promo) for promo in promos] sbw = cls(srid, timeslot, weight_refs, is_srid=True) sbw.set_as_latest() # Clear out expired ads query = Subreddit._query(sort=db_ops.desc('_date'), data=False) for subreddit in fetch_things2(query): if subreddit._id not in all_weights: cls.clear(subreddit, timeslot=timeslot)
def populate_spam_filtered(): from r2.lib.db.queries import get_spam_links, get_spam_comments from r2.lib.db.queries import get_spam_filtered_links, get_spam_filtered_comments from r2.models.query_cache import CachedQueryMutator def was_filtered(thing): if thing._spam and not thing._deleted and getattr(thing, "verdict", None) != "mod-removed": return True else: return False q = Subreddit._query(sort=asc("_date")) for sr in fetch_things2(q): print "Processing %s" % sr.name links = Thing._by_fullname(get_spam_links(sr), data=True, return_dict=False) comments = Thing._by_fullname(get_spam_comments(sr), data=True, return_dict=False) insert_links = [l for l in links if was_filtered(l)] insert_comments = [c for c in comments if was_filtered(c)] with CachedQueryMutator() as m: m.insert(get_spam_filtered_links(sr), insert_links) m.insert(get_spam_filtered_comments(sr), insert_comments)
def catch_up_batch_queries(): # catch up on batched_time_times queries that haven't been run # that should be, This should be cronned to run about once an # hour. The more often, the more the work of rerunning the actual # queries is spread out, but every run has a fixed-cost of looking # at every single subreddit sr_q = Subreddit._query(sort=desc("_downs"), data=True) dayago = utils.timeago("1 day") for sr in fetch_things2(sr_q): if hasattr(sr, "last_valid_vote") and sr.last_valid_vote > dayago: # if we don't know when the last vote was, it couldn't # have been today for sort in batched_time_sorts: for time in batched_time_times: q = make_batched_time_query(sr, sort, time) if q.preflight_check(): # we haven't run the batched_time_times in the # last day add_queries([q]) # make sure that all of the jobs have been completed or processed # by the time we return worker.join()
def catch_up_batch_queries(): # catch up on batched_time_times queries that haven't been run # that should be, This should be cronned to run about once an # hour. The more often, the more the work of rerunning the actual # queries is spread out, but every run has a fixed-cost of looking # at every single subreddit sr_q = Subreddit._query(sort=desc('_downs'), data=True) dayago = utils.timeago('1 day') for sr in fetch_things2(sr_q): if hasattr(sr, 'last_valid_vote') and sr.last_valid_vote > dayago: # if we don't know when the last vote was, it couldn't # have been today for sort in batched_time_sorts: for time in batched_time_times: q = make_batched_time_query(sr, sort, time) if q.preflight_check(): # we haven't run the batched_time_times in the # last day add_queries([q]) # make sure that all of the jobs have been completed or processed # by the time we return worker.join()
def load_all_reddits(): query_cache = {} q = Subreddit._query(Subreddit.c.type == 'public', Subreddit.c._spam == False, Subreddit.c._downs > 1, sort = (desc('_downs'), desc('_ups')), data = True) for sr in utils.fetch_things2(q): if sr.quarantine: continue name = sr.name.lower() for i in xrange(len(name)): prefix = name[:i + 1] names = query_cache.setdefault(prefix, []) if len(names) < 10: unadvertisable = ( sr.hide_ads or sr.name in g.live_config['anti_ads_subreddits'] ) names.append((sr.name, sr.over_18, unadvertisable)) for name_prefix, subreddits in query_cache.iteritems(): SubredditsByPartialName._set_values(name_prefix, {'tups': subreddits})
def get_sr_counts(period=count_period): srs = Subreddit._query() return dict((l._fullname, (0, l.sr_id)) for l in links)
def get_live_subreddit_stylesheets(): """List all currently visible subreddit stylesheet files.""" subreddits = Subreddit._query(sort=desc("_date")) for sr in fetch_things2(subreddits): if sr.stylesheet_is_static: yield sr.static_stylesheet_name
def get_sr_counts(): srs = utils.fetch_things2(Subreddit._query(sort=desc("_date"))) return dict((sr._fullname, sr._ups) for sr in srs)