def subreddit_stats(config, ranges): def get_id(*args, **kwargs): kwargs.setdefault('limit', 1) results = list(kind._query(*args, **kwargs)) if not results: return None else: return results[0]._id sr_counts = defaultdict(int) for kind in (Link, Comment): thing_table, data_table = get_thing_table(kind._type_id) first_id = get_id(kind.c._date > ranges['yesterday'][0], sort=asc('_date')) last_id = get_id(kind.c._date < ranges['yesterday'][1], sort=desc('_date')) if not first_id or not last_id: continue q = sa.select([data_table.c.value, sa.func.count(data_table.c.value)], (data_table.c.thing_id > first_id) & (data_table.c.thing_id < last_id) & (data_table.c.key == 'sr_id') & (thing_table.c.thing_id == data_table.c.thing_id) & (thing_table.c.spam == False), group_by=data_table.c.value) for sr_id, count in q.execute(): sr_counts[sr_id] += count return {'subreddits_active_yesterday': len(list(count for count in sr_counts.itervalues() if count > 5))}
def subreddit_stats(config, ranges): def get_id(*args, **kwargs): kwargs.setdefault('limit', 1) results = list(kind._query(*args, **kwargs)) if not results: return None else: return results[0]._id sr_counts = defaultdict(int) for kind in (Link, Comment): thing_table, data_table = get_thing_table(kind._type_id) first_id = get_id(kind.c._date > ranges['yesterday'][0], sort=asc('_date')) last_id = get_id(kind.c._date < ranges['yesterday'][1], sort=desc('_date')) if not first_id or not last_id: continue q = sa.select([data_table.c.value, sa.func.count(data_table.c.value)], (data_table.c.thing_id > first_id) & (data_table.c.thing_id < last_id) & (data_table.c.key == 'sr_id') & (thing_table.c.thing_id == data_table.c.thing_id) & (thing_table.c.spam == False), group_by=data_table.c.value) for sr_id, count in q.execute(): sr_counts[sr_id] += count return { 'subreddits_active_yesterday': len(list(count for count in sr_counts.itervalues() if count > 5)) }
def operator(self, sort): if sort == 'new': return operators.desc('_t1_date') elif sort == 'old': return operators.asc('_t1_date') elif sort == 'top': return operators.desc('_t1_score')
def operator(self, sort): if sort == "new": return operators.desc("_t1_date") elif sort == "old": return operators.asc("_t1_date") elif sort == "top": return operators.desc("_t1_score")
def fetch_things(t_class,since,until,batch_fn=None, *query_params, **extra_query_dict): """ Simple utility function to fetch all Things of class t_class (spam or not, but not deleted) that were created from 'since' to 'until' """ from r2.lib.db.operators import asc if not batch_fn: batch_fn = lambda x: x query_params = ([t_class.c._date >= since, t_class.c._date < until, t_class.c._spam == (True,False)] + list(query_params)) query_dict = {'sort': asc('_date'), 'limit': 100, 'data': True} query_dict.update(extra_query_dict) q = t_class._query(*query_params, **query_dict) orig_rules = deepcopy(q._rules) things = list(q) while things: things = batch_fn(things) for t in things: yield t q._rules = deepcopy(orig_rules) q._after(t) things = list(q)
def add_all_srs(): """Adds every listing query for every subreddit to the queue.""" q = Subreddit._query(sort = asc('_date')) for sr in fetch_things2(q): add_queries(all_queries(get_links, sr, ('hot', 'new', 'old'), ['all'])) add_queries(all_queries(get_links, sr, ('top', 'controversial'), db_times.keys())) add_queries([get_links(sr, 'toplinks', 'all')])
def reset_last_email_sent_at_for_all_accounts(): start_of_epoc = pytz.utc.localize(datetime.datetime.utcfromtimestamp(0)) accounts = fetch_things2(Account._query(Account.c.email != None, sort=asc('_date'))) for account in accounts: account.last_email_sent_at = start_of_epoc account._commit()
def test_send_summary_emails(): accounts = fetch_things2(Account._query(Account.c.email != None, sort=asc('_date'))) for account in accounts: a_day_ago = datetime.datetime.now(pytz.utc) - datetime.timedelta(hours=24) account.last_email_sent_at = a_day_ago account._commit() send_account_summary_email(account._id, verbose=True)
def test_comment_order_full_asc(self): sort = operators.asc("_confidence") builder = CommentBuilder(self.link, sort, num=1500) builder._get_comments() self.assertEqual( builder.comment_order, [109, 108, 107, 100, 103, 102, 106, 105, 101, 104, 110])
def backfill(after=None): q = Subreddit._query(sort=asc('_date')) if after: sr = Subreddit._by_name(after) q = q._after(sr) for sr in fetch_things2(q): backfill_sr(sr)
def send_account_summary_email(account_thing_id, verbose=False, send_email=send_email): account = Account._byID(account_thing_id, data=True) if not should_send_activity_summary_email(account): return # if we've never sent an email, only tell about the last 24 hours a_day_ago = datetime.datetime.now(pytz.utc) - datetime.timedelta(hours=24) if getattr(account, 'last_email_sent_at', None) is None: account.last_email_sent_at = a_day_ago c.content_langs = 'en-US' # Find all the "active" links for this user. Frontpage uses the c.user global # to find the right subreddits for the current user c.user = account c.user_is_loggedin = True thing_ids = [] for link in Frontpage.get_links('active', 'all'): thing_ids.append(link) active_links_hash = Link._by_fullname(thing_ids, data=True) active_links = [active_links_hash[t_id] for t_id in thing_ids if active_links_hash[t_id]._active > account.last_email_sent_at] idx = 0 for ll in active_links: idx += 1 ll.num = idx # Find all new spaces created since we last sent the user an email new_spaces = list(fetch_things2(Subreddit._query( Subreddit.c._date > account.last_email_sent_at, sort=asc('_date')))) # don't bother sending email if there's noting to report. if len(new_spaces) == 0 and len(active_links) == 0: return # Get the date and time now = datetime.datetime.now(pytz.timezone('US/Eastern')) date_string = now.strftime("%A %B %d, %Y") time_string = now.strftime("%I:%M %p") # Render the template html_email_template = g.mako_lookup.get_template('summary_email.html') html_body = html_email_template.render( last_email_sent_at=account.last_email_sent_at, new_spaces=new_spaces, active_links=active_links, date_string=date_string, time_string=time_string) # with open('out.html', 'w') as ff: # ff.write(html_body) if verbose: print "sending email to %s" % (account.email,) send_email(account.email, html_body, date_string) account.last_email_sent_at = datetime.datetime.now(pytz.utc) account._commit()
def add_all_ban_report_srs(): """Adds the initial spam/reported pages to the report queue""" q = Subreddit._query(sort = asc('_date')) for sr in fetch_things2(q): add_queries([get_spam_links(sr), get_spam_comments(sr), get_reported_links(sr), get_reported_comments(sr), ])
def operator(self, sort): if sort == 'hot': return operators.desc('_hot') elif sort == 'new': return operators.desc('_date') elif sort == 'old': return operators.asc('_date') elif sort == 'top': return operators.desc('_score') elif sort == 'controversial': return operators.desc('_controversy')
def operator(self, sort): if sort == "hot": return operators.desc("_hot") elif sort == "new": return operators.desc("_date") elif sort == "old": return operators.asc("_date") elif sort == "top": return operators.desc("_score") elif sort == "controversial": return operators.desc("_controversy")
def add_all_srs(): """Recalculates every listing query for every subreddit. Very, very slow.""" q = Subreddit._query(sort=asc("_date")) for sr in fetch_things2(q): for q in all_queries(get_links, sr, ("hot", "new"), ["all"]): q.update() for q in all_queries(get_links, sr, time_filtered_sorts, db_times.keys()): q.update() get_spam_links(sr).update() # get_spam_comments(sr).update() get_reported_links(sr).update()
def test_comment_order_full_asc(self): sort = operators.asc("_confidence") builder = CommentBuilder(self.link, sort, num=1500) builder.load_comment_order() comment_order = [ comment_tuple.comment_id for comment_tuple in builder.ordered_comment_tuples ] self.assertEqual(comment_order, [109, 108, 107, 100, 103, 102, 106, 105, 101, 104, 110]) self.assertEqual(builder.missing_root_comments, set()) self.assertEqual(builder.missing_root_count, 0)
class SortMenu(NavMenu): name = 'sort' hidden_options = [] button_cls = QueryButton # these are _ prefixed to avoid colliding with NavMenu attributes _default = 'hot' _options = ('hot', 'new', 'top', 'old', g.voting_upvote_path, g.voting_controversial_path) _type = 'lightdrop' _title = N_("sorted by") def __init__(self, default=None, title='', base_path='', separator='|', _id='', css_class=''): options = self.make_buttons() default = default or self._default base_path = base_path or request.path title = title or _(self._title) NavMenu.__init__(self, options, default=default, title=title, type=self._type, base_path=base_path, separator=separator, _id=_id, css_class=css_class) def make_buttons(self): buttons = [] for name in self._options: css_class = 'hidden' if name in self.hidden_options else '' button = self.button_cls(self.make_title(name), name, self.name, css_class=css_class) buttons.append(button) return buttons def make_title(self, attr): return menu[attr] _mapping = { "hot": operators.desc('_hot'), "new": operators.desc('_date'), "old": operators.asc('_date'), "top": operators.desc('_score'), g.voting_upvote_path: operators.desc('_upvotes'), g.voting_controversial_path: operators.desc('_controversy'), "confidence": operators.desc('_confidence'), "random": operators.shuffled('_confidence'), "qa": operators.desc('_qa'), } _reverse_mapping = {v: k for k, v in _mapping.iteritems()} @classmethod def operator(cls, sort): return cls._mapping.get(sort) @classmethod def sort(cls, operator): return cls._reverse_mapping.get(operator)
def test_comment_order_full_asc(self): sort = operators.asc("_confidence") builder = CommentBuilder(self.link, sort, num=1500) builder.load_comment_order() comment_order = [ comment_tuple.comment_id for comment_tuple in builder.ordered_comment_tuples ] self.assertEqual( comment_order, [109, 108, 107, 100, 103, 102, 106, 105, 101, 104, 110]) self.assertEqual(builder.missing_root_comments, set()) self.assertEqual(builder.missing_root_count, 0)
def add_all_srs(): """Recalculates every listing query for every subreddit. Very, very slow.""" q = Subreddit._query(sort = asc('_date')) for sr in fetch_things2(q): for q in all_queries(get_links, sr, ('hot', 'new'), ['all']): q.update() for q in all_queries(get_links, sr, time_filtered_sorts, db_times.keys()): q.update() get_spam_links(sr).update() get_spam_comments(sr).update() get_reported_links(sr).update() get_reported_comments(sr).update()
def add_all_srs(): """Recalculates every listing query for every subsciteit. Very, very slow.""" q = Subsciteit._query(sort = asc('_date')) for sr in fetch_things2(q): for q in all_queries(get_links, sr, ('hot', 'new'), ['all'],no_children=True): q.update() for q in all_queries(get_links, sr, time_filtered_sorts, db_times.keys(),no_children=True): q.update() get_spam_links(sr).update() get_spam_comments(sr).update() get_reported_links(sr).update() get_reported_comments(sr).update()
def add_all_srs(): """Adds every listing query for every subreddit to the queue.""" q = Subreddit._query(sort=asc("_date")) for sr in fetch_things2(q): add_queries(all_queries(get_links, sr, ("hot", "new"), ["all"])) add_queries(all_queries(get_links, sr, ("top", "controversial"), db_times.keys())) add_queries( [ get_spam_links(sr), # get_spam_comments(sr), get_reported_links(sr), # get_reported_comments(sr), ] )
def operator(self, sort): if sort == "hot": return operators.desc("_hot") elif sort == "new": return operators.desc("_date") elif sort == "old": return operators.asc("_date") elif sort == "top": return operators.desc("_score") elif sort == "controversial": return operators.desc("_controversy") elif sort == "confidence": return operators.desc("_confidence") elif sort == "interestingness": return operators.desc("_interestingness")
def operator(self, sort): if sort == 'hot': return operators.desc('_hot') elif sort == 'new': return operators.desc('_date') elif sort == 'old': return operators.asc('_date') elif sort == 'top': return operators.desc('_score') elif sort == 'controversial': return operators.desc('_controversy') elif sort == 'confidence': return operators.desc('_confidence') elif sort == 'interestingness': return operators.desc('_interestingness')
def operator(self, sort): if sort == 'hot': return operators.desc('_hot') elif sort == 'new': return operators.desc('_date') elif sort == 'old': return operators.asc('_date') elif sort == 'top': return operators.desc('_score') elif sort == 'controversial': return operators.desc('_controversy') elif sort == 'confidence': return operators.desc('_confidence') elif sort == 'random': return operators.shuffled('_confidence')
def _setsort(self, sorts): sorts = tup(sorts) # make sure sorts are wrapped in a Sort obj have_date = False op_sorts = [] for s in sorts: if not isinstance(s, operators.sort): s = operators.asc(s) op_sorts.append(s) if s.col.endswith("_date"): have_date = True if op_sorts and not have_date: op_sorts.append(operators.desc("_date")) self._sort_param = op_sorts return self
def _setsort(self, sorts): sorts = tup(sorts) #make sure sorts are wrapped in a Sort obj have_date = False op_sorts = [] for s in sorts: if not isinstance(s, operators.sort): s = operators.asc(s) op_sorts.append(s) if s.col.endswith('_date'): have_date = True if op_sorts and not have_date: op_sorts.append(operators.desc('_date')) self._sort_param = op_sorts return self
def queue_summary_emails(): start = datetime.datetime.now() # find all accounts that should get an email # this implementation is slow, as it iterates over all accounts that have an email # address. One idea to make it faster is to turn the "last_email_sent_at" data # attribute into an actual sql column you can query accounts = fetch_things2(Account._query(Account.c.email != None, sort=asc('_date'))) for account in accounts: if should_send_activity_summary_email(account): # using _add_item over add_item as that skips using a daemon thread to talk # to the amqp server that might not finish it's job before the process exits amqp._add_item('summary_email_q', str(account._id)) print "Queued summary email for %r" % (account.email,) end = datetime.datetime.now() print "Time to scan accounts to queue emails: %s" % (end - start)
def subreddit_stats(config): sr_counts = defaultdict(int) for kind in (Link, Comment): thing_table, data_table = get_thing_table(kind._type_id) first_id = list(kind._query(kind.c._date > timeago('1 day'), sort=asc('_date'), limit=1)) if not first_id: continue else: first_id = first_id[0]._id q = sa.select([data_table.c.value, sa.func.count(data_table.c.value)], (data_table.c.thing_id > first_id) & (data_table.c.key == 'sr_id') & (thing_table.c.thing_id == data_table.c.thing_id) & (thing_table.c.spam == False), group_by=data_table.c.value) for sr_id, count in q.execute(): sr_counts[sr_id] += count return {'subreddits_active_past_day': len(list(count for count in sr_counts.itervalues() if count > 5))}
def _all_awards_cache(cls): return [a._id for a in Award._query(sort=asc('_date'), limit=100)]
def next_link(self): q = self._link_nav_query(sort=operators.asc('_date')) return self._link_for_query(q)
def next_link(self): q = self._link_nav_query(sort = operators.asc('_date')) return self._link_for_query(q)
def next_in_top(self): q = self._link_nav_query(Link.c.top_link == True, operators.asc('_date')) return self._link_for_query(q)
def next_by_author(self): q = self._link_nav_query(Link.c.author_id == self.author_id, operators.asc('_date')) return self._link_for_query(q)
def test_comment_order_full_asc(self): sort = operators.asc("_confidence") builder = CommentBuilder(self.link, sort, num=1500) builder._get_comments() self.assertEqual(builder.comment_order, [109, 108, 107, 100, 103, 102, 106, 105, 101, 104, 110])
def next_by_tag(self, tag): return self._next_link_for_tag(tag, operators.asc('_t1_date'))
def add_all_users(): q = Account._query(sort=asc('_date')) for user in fetch_things2(q): update_user(user)
def next_in_promoted(self): q = self._link_nav_query(Link.c.blessed == True, operators.asc('_date')) return self._link_for_query(q)
def _all_awards_cache(cls): return [ a._id for a in Award._query(sort=asc('_date'), limit=100) ]
def find_all_subreddits(): iterator = rate_limit_query( Subreddit._query( *[Subreddit.c.type != type_ for type_ in Subreddit.private_types], sort=asc('_date'))) return itertools.ifilter(is_subreddit_to_crawl, iterator)
def add_all_users(): q = Account._query(sort = asc('_date')) for user in fetch_things2(q): update_user(user)