def subreddit_stats(config, ranges):
    def get_id(*args, **kwargs):
        kwargs.setdefault('limit', 1)
        results = list(kind._query(*args, **kwargs))
        if not results:
            return None
        else:
            return results[0]._id

    sr_counts = defaultdict(int)
    for kind in (Link, Comment):
        thing_table, data_table = get_thing_table(kind._type_id)
        first_id = get_id(kind.c._date > ranges['yesterday'][0], sort=asc('_date'))
        last_id = get_id(kind.c._date < ranges['yesterday'][1], sort=desc('_date'))
        if not first_id or not last_id:
            continue

        q = sa.select([data_table.c.value, sa.func.count(data_table.c.value)],
                (data_table.c.thing_id > first_id)
                    & (data_table.c.thing_id < last_id)
                    & (data_table.c.key == 'sr_id')
                    & (thing_table.c.thing_id == data_table.c.thing_id)
                    & (thing_table.c.spam == False),
                group_by=data_table.c.value)

        for sr_id, count in q.execute():
            sr_counts[sr_id] += count

    return {'subreddits_active_yesterday': len(list(count for count in sr_counts.itervalues() if count > 5))}
Example #2
0
def subreddit_stats(config, ranges):
    def get_id(*args, **kwargs):
        kwargs.setdefault('limit', 1)
        results = list(kind._query(*args, **kwargs))
        if not results:
            return None
        else:
            return results[0]._id

    sr_counts = defaultdict(int)
    for kind in (Link, Comment):
        thing_table, data_table = get_thing_table(kind._type_id)
        first_id = get_id(kind.c._date > ranges['yesterday'][0],
                          sort=asc('_date'))
        last_id = get_id(kind.c._date < ranges['yesterday'][1],
                         sort=desc('_date'))
        if not first_id or not last_id:
            continue

        q = sa.select([data_table.c.value,
                       sa.func.count(data_table.c.value)],
                      (data_table.c.thing_id > first_id)
                      & (data_table.c.thing_id < last_id)
                      & (data_table.c.key == 'sr_id')
                      & (thing_table.c.thing_id == data_table.c.thing_id)
                      & (thing_table.c.spam == False),
                      group_by=data_table.c.value)

        for sr_id, count in q.execute():
            sr_counts[sr_id] += count

    return {
        'subreddits_active_yesterday':
        len(list(count for count in sr_counts.itervalues() if count > 5))
    }
Example #3
0
 def operator(self, sort):
     if sort == 'new':
         return operators.desc('_t1_date')
     elif sort == 'old':
         return operators.asc('_t1_date')
     elif sort == 'top':
         return operators.desc('_t1_score')
Example #4
0
 def operator(self, sort):
     if sort == "new":
         return operators.desc("_t1_date")
     elif sort == "old":
         return operators.asc("_t1_date")
     elif sort == "top":
         return operators.desc("_t1_score")
Example #5
0
def fetch_things(t_class,since,until,batch_fn=None,
                 *query_params, **extra_query_dict):
    """
        Simple utility function to fetch all Things of class t_class
        (spam or not, but not deleted) that were created from 'since'
        to 'until'
    """

    from r2.lib.db.operators import asc

    if not batch_fn:
        batch_fn = lambda x: x

    query_params = ([t_class.c._date >= since,
                     t_class.c._date <  until,
                     t_class.c._spam == (True,False)]
                    + list(query_params))
    query_dict   = {'sort':  asc('_date'),
                    'limit': 100,
                    'data':  True}
    query_dict.update(extra_query_dict)

    q = t_class._query(*query_params,
                        **query_dict)
    
    orig_rules = deepcopy(q._rules)

    things = list(q)
    while things:
        things = batch_fn(things)
        for t in things:
            yield t
        q._rules = deepcopy(orig_rules)
        q._after(t)
        things = list(q)
Example #6
0
 def operator(self, sort):
     if sort == 'new':
         return operators.desc('_t1_date')
     elif sort == 'old':
         return operators.asc('_t1_date')
     elif sort == 'top':
         return operators.desc('_t1_score')
Example #7
0
def add_all_srs():
    """Adds every listing query for every subreddit to the queue."""
    q = Subreddit._query(sort = asc('_date'))
    for sr in fetch_things2(q):
        add_queries(all_queries(get_links, sr, ('hot', 'new', 'old'), ['all']))
        add_queries(all_queries(get_links, sr, ('top', 'controversial'), db_times.keys()))
        add_queries([get_links(sr, 'toplinks', 'all')])
def reset_last_email_sent_at_for_all_accounts():
    start_of_epoc = pytz.utc.localize(datetime.datetime.utcfromtimestamp(0))

    accounts = fetch_things2(Account._query(Account.c.email != None, sort=asc('_date')))
    for account in accounts:
        account.last_email_sent_at = start_of_epoc
        account._commit()
def test_send_summary_emails():
    accounts = fetch_things2(Account._query(Account.c.email != None, sort=asc('_date')))
    for account in accounts:
        a_day_ago = datetime.datetime.now(pytz.utc) - datetime.timedelta(hours=24)
        account.last_email_sent_at = a_day_ago
        account._commit()
        send_account_summary_email(account._id, verbose=True)
Example #10
0
 def test_comment_order_full_asc(self):
     sort = operators.asc("_confidence")
     builder = CommentBuilder(self.link, sort, num=1500)
     builder._get_comments()
     self.assertEqual(
         builder.comment_order,
         [109, 108, 107, 100, 103, 102, 106, 105, 101, 104, 110])
Example #11
0
def add_all_srs():
    """Adds every listing query for every subreddit to the queue."""
    q = Subreddit._query(sort = asc('_date'))
    for sr in fetch_things2(q):
        add_queries(all_queries(get_links, sr, ('hot', 'new', 'old'), ['all']))
        add_queries(all_queries(get_links, sr, ('top', 'controversial'), db_times.keys()))
        add_queries([get_links(sr, 'toplinks', 'all')])
Example #12
0
def fetch_things(t_class,since,until,batch_fn=None,
                 *query_params, **extra_query_dict):
    """
        Simple utility function to fetch all Things of class t_class
        (spam or not, but not deleted) that were created from 'since'
        to 'until'
    """

    from r2.lib.db.operators import asc

    if not batch_fn:
        batch_fn = lambda x: x

    query_params = ([t_class.c._date >= since,
                     t_class.c._date <  until,
                     t_class.c._spam == (True,False)]
                    + list(query_params))
    query_dict   = {'sort':  asc('_date'),
                    'limit': 100,
                    'data':  True}
    query_dict.update(extra_query_dict)

    q = t_class._query(*query_params,
                        **query_dict)

    orig_rules = deepcopy(q._rules)

    things = list(q)
    while things:
        things = batch_fn(things)
        for t in things:
            yield t
        q._rules = deepcopy(orig_rules)
        q._after(t)
        things = list(q)
Example #13
0
def backfill(after=None):
    q = Subreddit._query(sort=asc('_date'))
    if after:
        sr = Subreddit._by_name(after)
        q = q._after(sr)

    for sr in fetch_things2(q):
        backfill_sr(sr)
Example #14
0
def backfill(after=None):
    q = Subreddit._query(sort=asc('_date'))
    if after:
        sr = Subreddit._by_name(after)
        q = q._after(sr)

    for sr in fetch_things2(q):
        backfill_sr(sr)
def send_account_summary_email(account_thing_id, verbose=False, send_email=send_email):
    account = Account._byID(account_thing_id, data=True)
    if not should_send_activity_summary_email(account):
        return

    # if we've never sent an email, only tell about the last 24 hours
    a_day_ago = datetime.datetime.now(pytz.utc) - datetime.timedelta(hours=24)
    if getattr(account, 'last_email_sent_at', None) is None:
        account.last_email_sent_at = a_day_ago

    c.content_langs = 'en-US'

    # Find all the "active" links for this user.  Frontpage uses the c.user global
    # to find the right subreddits for the current user
    c.user = account
    c.user_is_loggedin = True
    thing_ids = []
    for link in Frontpage.get_links('active', 'all'):
        thing_ids.append(link)
    active_links_hash = Link._by_fullname(thing_ids, data=True)

    active_links = [active_links_hash[t_id] for t_id in thing_ids if active_links_hash[t_id]._active > account.last_email_sent_at]
    idx = 0
    for ll in active_links:
        idx += 1
        ll.num = idx 

    # Find all new spaces created since we last sent the user an email
    new_spaces = list(fetch_things2(Subreddit._query(
        Subreddit.c._date > account.last_email_sent_at,
        sort=asc('_date'))))

    # don't bother sending email if there's noting to report.
    if len(new_spaces) == 0 and len(active_links) == 0:
        return

    # Get the date and time
    now = datetime.datetime.now(pytz.timezone('US/Eastern'))
    date_string = now.strftime("%A %B %d, %Y")
    time_string = now.strftime("%I:%M %p")

    # Render the template
    html_email_template = g.mako_lookup.get_template('summary_email.html')
    html_body = html_email_template.render(
        last_email_sent_at=account.last_email_sent_at,
        new_spaces=new_spaces, 
        active_links=active_links,
        date_string=date_string,
        time_string=time_string)

    # with open('out.html', 'w') as ff:
    #     ff.write(html_body)
    if verbose:
        print "sending email to %s" % (account.email,)
    send_email(account.email, html_body, date_string)

    account.last_email_sent_at = datetime.datetime.now(pytz.utc)
    account._commit()
Example #16
0
def add_all_ban_report_srs():
    """Adds the initial spam/reported pages to the report queue"""
    q = Subreddit._query(sort = asc('_date'))
    for sr in fetch_things2(q):
        add_queries([get_spam_links(sr),
                     get_spam_comments(sr),
                     get_reported_links(sr),
                     get_reported_comments(sr),
                     ])
Example #17
0
def add_all_ban_report_srs():
    """Adds the initial spam/reported pages to the report queue"""
    q = Subreddit._query(sort = asc('_date'))
    for sr in fetch_things2(q):
        add_queries([get_spam_links(sr),
                     get_spam_comments(sr),
                     get_reported_links(sr),
                     get_reported_comments(sr),
                     ])
Example #18
0
 def operator(self, sort):
     if sort == 'hot':
         return operators.desc('_hot')
     elif sort == 'new':
         return operators.desc('_date')
     elif sort == 'old':
         return operators.asc('_date')
     elif sort == 'top':
         return operators.desc('_score')
     elif sort == 'controversial':
         return operators.desc('_controversy')
Example #19
0
 def operator(self, sort):
     if sort == "hot":
         return operators.desc("_hot")
     elif sort == "new":
         return operators.desc("_date")
     elif sort == "old":
         return operators.asc("_date")
     elif sort == "top":
         return operators.desc("_score")
     elif sort == "controversial":
         return operators.desc("_controversy")
Example #20
0
 def operator(self, sort):
     if sort == 'hot':
         return operators.desc('_hot')
     elif sort == 'new':
         return operators.desc('_date')
     elif sort == 'old':
         return operators.asc('_date')
     elif sort == 'top':
         return operators.desc('_score')
     elif sort == 'controversial':
         return operators.desc('_controversy')
Example #21
0
def add_all_srs():
    """Recalculates every listing query for every subreddit. Very,
       very slow."""
    q = Subreddit._query(sort=asc("_date"))
    for sr in fetch_things2(q):
        for q in all_queries(get_links, sr, ("hot", "new"), ["all"]):
            q.update()
        for q in all_queries(get_links, sr, time_filtered_sorts, db_times.keys()):
            q.update()
        get_spam_links(sr).update()
        # get_spam_comments(sr).update()
        get_reported_links(sr).update()
Example #22
0
 def test_comment_order_full_asc(self):
     sort = operators.asc("_confidence")
     builder = CommentBuilder(self.link, sort, num=1500)
     builder.load_comment_order()
     comment_order = [
         comment_tuple.comment_id
         for comment_tuple in builder.ordered_comment_tuples
     ]
     self.assertEqual(comment_order,
         [109, 108, 107, 100, 103, 102, 106, 105, 101, 104, 110])
     self.assertEqual(builder.missing_root_comments, set())
     self.assertEqual(builder.missing_root_count, 0)
Example #23
0
class SortMenu(NavMenu):
    name = 'sort'
    hidden_options = []
    button_cls = QueryButton

    # these are _ prefixed to avoid colliding with NavMenu attributes
    _default = 'hot'
    _options = ('hot', 'new', 'top', 'old', g.voting_upvote_path, g.voting_controversial_path)
    _type = 'lightdrop'
    _title = N_("sorted by")

    def __init__(self, default=None, title='', base_path='', separator='|',
                 _id='', css_class=''):
        options = self.make_buttons()
        default = default or self._default
        base_path = base_path or request.path
        title = title or _(self._title)
        NavMenu.__init__(self, options, default=default, title=title,
                         type=self._type, base_path=base_path,
                         separator=separator, _id=_id, css_class=css_class)

    def make_buttons(self):
        buttons = []
        for name in self._options:
            css_class = 'hidden' if name in self.hidden_options else ''
            button = self.button_cls(self.make_title(name), name, self.name,
                                     css_class=css_class)
            buttons.append(button)
        return buttons

    def make_title(self, attr):
        return menu[attr]

    _mapping = {
        "hot": operators.desc('_hot'),
        "new": operators.desc('_date'),
        "old": operators.asc('_date'),
        "top": operators.desc('_score'),
        g.voting_upvote_path: operators.desc('_upvotes'),
        g.voting_controversial_path: operators.desc('_controversy'),
        "confidence": operators.desc('_confidence'),
        "random": operators.shuffled('_confidence'),
        "qa": operators.desc('_qa'),
    }
    _reverse_mapping = {v: k for k, v in _mapping.iteritems()}

    @classmethod
    def operator(cls, sort):
        return cls._mapping.get(sort)

    @classmethod
    def sort(cls, operator):
        return cls._reverse_mapping.get(operator)
Example #24
0
 def test_comment_order_full_asc(self):
     sort = operators.asc("_confidence")
     builder = CommentBuilder(self.link, sort, num=1500)
     builder.load_comment_order()
     comment_order = [
         comment_tuple.comment_id
         for comment_tuple in builder.ordered_comment_tuples
     ]
     self.assertEqual(
         comment_order,
         [109, 108, 107, 100, 103, 102, 106, 105, 101, 104, 110])
     self.assertEqual(builder.missing_root_comments, set())
     self.assertEqual(builder.missing_root_count, 0)
Example #25
0
def add_all_srs():
    """Recalculates every listing query for every subreddit. Very,
       very slow."""
    q = Subreddit._query(sort = asc('_date'))
    for sr in fetch_things2(q):
        for q in all_queries(get_links, sr, ('hot', 'new'), ['all']):
            q.update()
        for q in all_queries(get_links, sr, time_filtered_sorts, db_times.keys()):
            q.update()
        get_spam_links(sr).update()
        get_spam_comments(sr).update()
        get_reported_links(sr).update()
        get_reported_comments(sr).update()
Example #26
0
def add_all_srs():
    """Recalculates every listing query for every subsciteit. Very,
       very slow."""
    q = Subsciteit._query(sort = asc('_date'))
    for sr in fetch_things2(q):
        for q in all_queries(get_links, sr, ('hot', 'new'), ['all'],no_children=True):
            q.update()
        for q in all_queries(get_links, sr, time_filtered_sorts, db_times.keys(),no_children=True):
            q.update()
        get_spam_links(sr).update()
        get_spam_comments(sr).update()
        get_reported_links(sr).update()
        get_reported_comments(sr).update()
Example #27
0
def add_all_srs():
    """Adds every listing query for every subreddit to the queue."""
    q = Subreddit._query(sort=asc("_date"))
    for sr in fetch_things2(q):
        add_queries(all_queries(get_links, sr, ("hot", "new"), ["all"]))
        add_queries(all_queries(get_links, sr, ("top", "controversial"), db_times.keys()))
        add_queries(
            [
                get_spam_links(sr),
                # get_spam_comments(sr),
                get_reported_links(sr),
                # get_reported_comments(sr),
            ]
        )
Example #28
0
 def operator(self, sort):
     if sort == "hot":
         return operators.desc("_hot")
     elif sort == "new":
         return operators.desc("_date")
     elif sort == "old":
         return operators.asc("_date")
     elif sort == "top":
         return operators.desc("_score")
     elif sort == "controversial":
         return operators.desc("_controversy")
     elif sort == "confidence":
         return operators.desc("_confidence")
     elif sort == "interestingness":
         return operators.desc("_interestingness")
Example #29
0
 def operator(self, sort):
     if sort == 'hot':
         return operators.desc('_hot')
     elif sort == 'new':
         return operators.desc('_date')
     elif sort == 'old':
         return operators.asc('_date')
     elif sort == 'top':
         return operators.desc('_score')
     elif sort == 'controversial':
         return operators.desc('_controversy')
     elif sort == 'confidence':
         return operators.desc('_confidence')
     elif sort == 'interestingness':
         return operators.desc('_interestingness')
Example #30
0
 def operator(self, sort):
     if sort == 'hot':
         return operators.desc('_hot')
     elif sort == 'new':
         return operators.desc('_date')
     elif sort == 'old':
         return operators.asc('_date')
     elif sort == 'top':
         return operators.desc('_score')
     elif sort == 'controversial':
         return operators.desc('_controversy')
     elif sort == 'confidence':
         return operators.desc('_confidence')
     elif sort == 'interestingness':
         return operators.desc('_interestingness')
Example #31
0
 def operator(self, sort):
     if sort == 'hot':
         return operators.desc('_hot')
     elif sort == 'new':
         return operators.desc('_date')
     elif sort == 'old':
         return operators.asc('_date')
     elif sort == 'top':
         return operators.desc('_score')
     elif sort == 'controversial':
         return operators.desc('_controversy')
     elif sort == 'confidence':
         return operators.desc('_confidence')
     elif sort == 'random':
         return operators.shuffled('_confidence')
Example #32
0
 def operator(self, sort):
     if sort == 'hot':
         return operators.desc('_hot')
     elif sort == 'new':
         return operators.desc('_date')
     elif sort == 'old':
         return operators.asc('_date')
     elif sort == 'top':
         return operators.desc('_score')
     elif sort == 'controversial':
         return operators.desc('_controversy')
     elif sort == 'confidence':
         return operators.desc('_confidence')
     elif sort == 'random':
         return operators.shuffled('_confidence')
Example #33
0
    def _setsort(self, sorts):
        sorts = tup(sorts)
        # make sure sorts are wrapped in a Sort obj
        have_date = False
        op_sorts = []
        for s in sorts:
            if not isinstance(s, operators.sort):
                s = operators.asc(s)
            op_sorts.append(s)
            if s.col.endswith("_date"):
                have_date = True
        if op_sorts and not have_date:
            op_sorts.append(operators.desc("_date"))

        self._sort_param = op_sorts
        return self
Example #34
0
    def _setsort(self, sorts):
        sorts = tup(sorts)
        #make sure sorts are wrapped in a Sort obj
        have_date = False
        op_sorts = []
        for s in sorts:
            if not isinstance(s, operators.sort):
                s = operators.asc(s)
            op_sorts.append(s)
            if s.col.endswith('_date'):
                have_date = True
        if op_sorts and not have_date:
            op_sorts.append(operators.desc('_date'))

        self._sort_param = op_sorts
        return self
def queue_summary_emails():
    start = datetime.datetime.now()
    # find all accounts that should get an email

    # this implementation is slow, as it iterates over all accounts that have an email
    # address.  One idea to make it faster is to turn the "last_email_sent_at" data 
    # attribute into an actual sql column you can query

    accounts = fetch_things2(Account._query(Account.c.email != None, sort=asc('_date')))
    for account in accounts:
        if should_send_activity_summary_email(account):
            # using _add_item over add_item as that skips using a daemon thread to talk
            # to the amqp server that might not finish it's job before the process exits
            amqp._add_item('summary_email_q', str(account._id))
            print "Queued summary email for %r" % (account.email,)
    end = datetime.datetime.now()
    print "Time to scan accounts to queue emails: %s" % (end - start)
def subreddit_stats(config):
    sr_counts = defaultdict(int)
    for kind in (Link, Comment):
        thing_table, data_table = get_thing_table(kind._type_id)
        first_id = list(kind._query(kind.c._date > timeago('1 day'), sort=asc('_date'), limit=1))
        if not first_id:
            continue
        else:
            first_id = first_id[0]._id

        q = sa.select([data_table.c.value, sa.func.count(data_table.c.value)],
                (data_table.c.thing_id > first_id)
                    & (data_table.c.key == 'sr_id')
                    & (thing_table.c.thing_id == data_table.c.thing_id)
                    & (thing_table.c.spam == False),
                group_by=data_table.c.value)

        for sr_id, count in q.execute():
            sr_counts[sr_id] += count

    return {'subreddits_active_past_day': len(list(count for count in sr_counts.itervalues() if count > 5))}
Example #37
0
 def _all_awards_cache(cls):
     return [a._id for a in Award._query(sort=asc('_date'), limit=100)]
Example #38
0
 def next_link(self):
     q = self._link_nav_query(sort=operators.asc('_date'))
     return self._link_for_query(q)
Example #39
0
 def next_link(self):
   q = self._link_nav_query(sort = operators.asc('_date'))
   return self._link_for_query(q)
Example #40
0
 def next_in_top(self):
   q = self._link_nav_query(Link.c.top_link == True, operators.asc('_date'))
   return self._link_for_query(q)
Example #41
0
 def next_by_author(self):
     q = self._link_nav_query(Link.c.author_id == self.author_id,
                              operators.asc('_date'))
     return self._link_for_query(q)
Example #42
0
 def next_in_top(self):
     q = self._link_nav_query(Link.c.top_link == True,
                              operators.asc('_date'))
     return self._link_for_query(q)
Example #43
0
 def test_comment_order_full_asc(self):
     sort = operators.asc("_confidence")
     builder = CommentBuilder(self.link, sort, num=1500)
     builder._get_comments()
     self.assertEqual(builder.comment_order,
         [109, 108, 107, 100, 103, 102, 106, 105, 101, 104, 110])
Example #44
0
 def next_by_tag(self, tag):
   return self._next_link_for_tag(tag, operators.asc('_t1_date'))
Example #45
0
 def next_by_author(self):
   q = self._link_nav_query(Link.c.author_id == self.author_id, operators.asc('_date'))
   return self._link_for_query(q)
Example #46
0
def add_all_users():
    q = Account._query(sort=asc('_date'))
    for user in fetch_things2(q):
        update_user(user)
Example #47
0
 def next_in_promoted(self):
   q = self._link_nav_query(Link.c.blessed == True, operators.asc('_date'))
   return self._link_for_query(q)
Example #48
0
 def _all_awards_cache(cls):
     return [ a._id for a in Award._query(sort=asc('_date'), limit=100) ]
Example #49
0
def find_all_subreddits():
    iterator = rate_limit_query(
        Subreddit._query(
            *[Subreddit.c.type != type_ for type_ in Subreddit.private_types],
            sort=asc('_date')))
    return itertools.ifilter(is_subreddit_to_crawl, iterator)
Example #50
0
 def next_in_promoted(self):
     q = self._link_nav_query(Link.c.blessed == True,
                              operators.asc('_date'))
     return self._link_for_query(q)
Example #51
0
def add_all_users():
    q = Account._query(sort = asc('_date'))
    for user in fetch_things2(q):
        update_user(user)
Example #52
0
 def next_by_tag(self, tag):
     return self._next_link_for_tag(tag, operators.asc('_t1_date'))