Esempio n. 1
0
def _to_fn(cls, id_):
    '''Convert id_ to a fullname (equivalent to "link._fullname", but doesn't
    require an instance of the class)
    
    '''
    return (cls._type_prefix + r2utils.to36(cls._type_id) + '_' +
            r2utils.to36(id_))
Esempio n. 2
0
def port_cassavotes():
    from r2.models import Vote, Account, Link, Comment
    from r2.models.vote import CassandraVote, CassandraLinkVote, CassandraCommentVote
    from r2.lib.db.tdb_cassandra import CL
    from r2.lib.utils import fetch_things2, to36, progress

    ts = [(Vote.rel(Account, Link), CassandraLinkVote),
          (Vote.rel(Account, Comment), CassandraCommentVote)]

    dataattrs = set(['valid_user', 'valid_thing', 'ip', 'organic'])

    for prel, crel in ts:
        vq = prel._query(sort=desc('_date'),
                         data=True,
                         eager_load=False)
        vq = fetch_things2(vq)
        vq = progress(vq, persec=True)
        for v in vq:
            t1 = to36(v._thing1_id)
            t2 = to36(v._thing2_id)
            cv = crel(thing1_id = t1,
                      thing2_id = t2,
                      date=v._date,
                      name=v._name)
            for dkey, dval in v._t.iteritems():
                if dkey in dataattrs:
                    setattr(cv, dkey, dval)

            cv._commit(write_consistency_level=CL.ONE)
Esempio n. 3
0
 def __init__(self, link, depth, parent_id = None):
     if parent_id is not None:
         id36 = utils.to36(parent_id)
         self.parent_id = parent_id
         self.parent_name = "t%s_%s" % (utils.to36(Comment._type_id), id36)
         self.parent_permalink = link.make_permalink_slow() + id36
     self.link_name = link._fullname
     self.link_id = link._id
     self.depth = depth
     self.children = []
     self.count = 0
Esempio n. 4
0
def get_recommended(userid, age = 2, sort='relevance', num_users=10):
    u = get_users_for_user(userid)[:num_users]
    if not u: return []

    voter = Vote.rels[(Account, Link)]

    tables = tdb.get_rel_type_table(voter._type_id)
    votertable = tables[0]
    acct_col = votertable.c.thing1_id
    link_col = votertable.c.thing2_id
    date_col = votertable.c.date
    count = sa.func.count(acct_col)

    linktable = tables[2]
#    dlinktable, linktable = tdb.types_id[Link._type_id].data_table
    link_id_col = linktable.c.thing_id

    query = [sa.or_(*[acct_col == x for x in u]),
             date_col > datetime.now(g.tz)-timedelta(age)]
    cols = [link_col, count]

    if sort == 'new':
        sort = 'date'
    elif sort == 'top':
        sort = 'score'

    if sort and sort != 'relevance':
        query.append(link_id_col == link_col)
        s = tdb.translate_sort(linktable, sort)
        order = [sa.desc(s), sa.desc(link_id_col)]
        cols = [link_id_col, count]
        group_by = [link_id_col, s]
    else:
        order = [sa.desc(count), sa.desc(link_col)]
        group_by = link_col

#    #TODO: wish I could just use query_rules
#    if c.user and c.user.subreddits:
#        query.append(dlinktable.c.thing_id == linktable.c.thing_id)
#        q = sa.and_(dlinktable.c.key == 'sr_id',
#                    sa.or_(*[dlinktable.c.value == x
#                             for x in c.user.subreddits]))
#        query.append(q)

    res = sa.select(cols, sa.and_(*query),
                    group_by=group_by,
                    order_by=order).execute()


    prefix = "t%s" % to36(Link._type_id)
    return ["%s_%s" % (prefix, to36(x)) for x, y in res.fetchall()]
Esempio n. 5
0
    def _process(t):
        thing_id = t.thing_id
        id36 = to36(thing_id)

        link_id = t.link_id
        link_id36 = to36(link_id)

        ups, downs, timestamp = t.ups, t.downs, t.timestamp

        yield link_id36+'_controversy', id36, sorts.controversy(ups, downs)
        yield link_id36+'_hot',         id36, sorts._hot(ups, downs, timestamp)
        yield link_id36+'_confidence',  id36, sorts.confidence(ups, downs)
        yield link_id36+'_score',       id36, sorts.score(ups, downs)
        yield link_id36+'_date',        id36, timestamp
Esempio n. 6
0
    def __init__(self, link, depth, parent_id=None):
        from r2.lib.wrapped import CachedVariable

        if parent_id is not None:
            id36 = utils.to36(parent_id)
            self.parent_id = parent_id
            self.parent_name = "t%s_%s" % (utils.to36(Comment._type_id), id36)
            self.parent_permalink = link.make_permalink_slow() + id36
        self.link_name = link._fullname
        self.link_id = link._id
        self.depth = depth
        self.children = []
        self.count = 0
        self.previous_visits_hex = CachedVariable("previous_visits_hex")
Esempio n. 7
0
def port_cassasaves(after_id=None, estimate=12489897):
    from r2.models import SaveHide, CassandraSave
    from r2.lib.db.operators import desc
    from r2.lib.db.tdb_cassandra import CL
    from r2.lib.utils import fetch_things2, to36, progress

    q = SaveHide._query(SaveHide.c._name == "save", sort=desc("_date"), data=False, eager_load=False)

    if after_id is not None:
        q._after(SaveHide._byID(after_id))

    for sh in progress(fetch_things2(q), estimate=estimate):

        csh = CassandraSave(thing1_id=to36(sh._thing1_id), thing2_id=to36(sh._thing2_id), date=sh._date)
        csh._commit(write_consistency_level=CL.ONE)
Esempio n. 8
0
    def add_target_fields(self, target):
        if not target:
            return
        from r2.models import Comment, Link, Message

        self.add("target_id", target._id)
        self.add("target_fullname", target._fullname)
        self.add("target_type", target.__class__.__name__.lower())

        # If the target is an Account or Subreddit (or has a "name" attr),
        # add the target_name
        if hasattr(target, "name"):
            self.add("target_name", target.name)
        # Pass in the author of the target for comments, links, & messages
        elif isinstance(target, (Comment, Link, Message)):
            author = target.author_slow
            if target._deleted or author._deleted:
                self.add("target_author_id", 0)
                self.add("target_author_name", "[deleted]")
            else:
                self.add("target_author_id", author._id)
                self.add("target_author_name", author.name)
            if isinstance(target, Link) and not target.is_self:
                self.add("target_url", target.url)
                self.add("target_url_domain", target.link_domain())
            elif isinstance(target, Comment):
                link_fullname = Link._fullname_from_id36(to36(target.link_id))
                self.add("link_id", target.link_id)
                self.add("link_fullname", link_fullname)
Esempio n. 9
0
def queue_vote(user, thing, dir, ip, organic = False,
               cheater = False, store = True):
    # set the vote in memcached so the UI gets updated immediately
    key = prequeued_vote_key(user, thing)
    g.cache.set(key, '1' if dir is True else '0' if dir is None else '-1')
    # queue the vote to be stored unless told not to
    if store:
        if g.amqp_host:
            if isinstance(thing, Link):
                if thing._id36 in g.live_config["fastlane_links"]:
                    qname = vote_fastlane_q
                else:
                    qname = vote_link_q

            elif isinstance(thing, Comment):
                if utils.to36(thing.link_id) in g.live_config["fastlane_links"]:
                    qname = vote_fastlane_q
                else:
                    qname = vote_comment_q
            else:
                log.warning("%s tried to vote on %r. that's not a link or comment!",
                            user, thing)
                return

            amqp.add_item(qname,
                          pickle.dumps((user._id, thing._fullname,
                                        dir, ip, organic, cheater)))
        else:
            handle_vote(user, thing, dir, ip, organic)
Esempio n. 10
0
 def query(self):
     if c.user_is_sponsor:
         if self.sort == "future_promos":
             return queries.get_all_unapproved_links()
         elif self.sort == "pending_promos":
             return queries.get_all_accepted_links()
         elif self.sort == "unpaid_promos":
             return queries.get_all_unpaid_links()
         elif self.sort == "rejected_promos":
             return queries.get_all_rejected_links()
         elif self.sort == "live_promos" and self.sr:
             return self.live_by_subreddit(self.sr)
         elif self.sort == 'live_promos':
             return queries.get_all_live_links()
         elif self.sort == 'underdelivered':
             q = queries.get_underdelivered_campaigns()
             campaigns = PromoCampaign._by_fullname(list(q), data=True,
                                                    return_dict=False)
             link_ids = [camp.link_id for camp in campaigns]
             return [Link._fullname_from_id36(to36(id)) for id in link_ids]
         elif self.sort == 'reported':
             return queries.get_reported_links(get_promote_srid())
         return queries.get_all_promoted_links()
     else:
         if self.sort == "future_promos":
             return queries.get_unapproved_links(c.user._id)
         elif self.sort == "pending_promos":
             return queries.get_accepted_links(c.user._id)
         elif self.sort == "unpaid_promos":
             return queries.get_unpaid_links(c.user._id)
         elif self.sort == "rejected_promos":
             return queries.get_rejected_links(c.user._id)
         elif self.sort == "live_promos":
             return queries.get_live_links(c.user._id)
         return queries.get_promoted_links(c.user._id)
Esempio n. 11
0
        def cached_query_wrapper(*args):
            # build the row key from the function name and arguments
            assert fn.__name__.startswith("get_")
            row_key_components = [fn.__name__[len('get_'):]]

            if len(args) > 0:
                # we want to accept either a Thing or a thing's ID at this
                # layer, but the query itself should always get just an ID
                if isinstance(args[0], Thing):
                    args = list(args)
                    args[0] = args[0]._id

                thing_id = to36(args[0])
                row_key_components.append(thing_id)

            row_key_components.extend(str(x) for x in args[1:])
            row_key = '.'.join(row_key_components)

            query = fn(*args)

            if query:
                # sql-backed query
                query_sort = query._sort
                is_precomputed = _is_query_precomputed(query)
            else:
                # pure-cassandra query
                assert sort
                query_sort = sort
                is_precomputed = False

            return CachedQuery(model, row_key, query_sort, filter_fn,
                               is_precomputed)
Esempio n. 12
0
def sup_json_cached(period, last_time):
    #we need to re-add MIN_PERIOD because we moved back that far with
    #the call to make_last_time
    target_time = last_time + MIN_PERIOD - period

    updates = ''
    #loop backwards adding MIN_PERIOD chunks until last_time is as old
    #as target time
    while last_time >= target_time:
        updates += g.cache.get(cache_key(last_time)) or ''
        last_time -= MIN_PERIOD

    supdates = []
    if updates:
        for u in ifilter(None, updates.split(',')):
            sup_id, time = u.split(':')
            time = int(time)
            if time >= target_time:
                supdates.append([sup_id, to36(time)])

    update_time = datetime.utcnow()
    since_time = datetime.utcfromtimestamp(target_time)
    json = simplejson.dumps({'updated_time' : rfc3339_date_str(update_time),
                             'since_time' : rfc3339_date_str(since_time),
                             'period' : period,
                             'available_periods' : period_urls(),
                             'updates' : supdates})

    #undo json escaping
    json = json.replace('\/', '/')
    return json
Esempio n. 13
0
 def _get_sr_restriction(sr):
     '''Return a solr-appropriate query string that restricts
     results to only contain results from self.sr
     
     '''
     bq = []
     if (not sr) or sr == All or isinstance(sr, DefaultSR):
         return None
     elif isinstance(sr, MultiReddit):
         for sr_id in sr.sr_ids:
             bq.append("sr_id:%s" % sr_id)
     elif isinstance(sr, DomainSR):
         bq = ["site:'%s'" % sr.domain]
     elif sr == Friends:
         if not c.user_is_loggedin or not c.user.friends:
             return None
         friend_ids = c.user.friends
         friends = ["author_fullname:'%s'" %
                    Account._fullname_from_id36(r2utils.to36(id_))
                    for id_ in friend_ids]
         bq.extend(friends)
     elif isinstance(sr, ModContribSR):
         for sr_id in sr.sr_ids:
             bq.append("sr_id:%s" % sr_id)
     elif not isinstance(sr, FakeSubreddit):
         bq = ["sr_id:%s" % sr._id]
     return ' OR '.join(bq)
Esempio n. 14
0
    def add_props(cls, user, wrapped):
        #fetch parent links
        links = Link._byID(set(l.link_id for l in wrapped), True)
        

        #get srs for comments that don't have them (old comments)
        for cm in wrapped:
            if not hasattr(cm, 'sr_id'):
                cm.sr_id = links[cm.link_id].sr_id
        
        subreddits = Subreddit._byID(set(cm.sr_id for cm in wrapped),
                                     data=True,return_dict=False)
        can_reply_srs = set(s._id for s in subreddits if s.can_comment(user))

        min_score = c.user.pref_min_comment_score

        cids = dict((w._id, w) for w in wrapped)

        for item in wrapped:
            item.link = links.get(item.link_id)
            if not hasattr(item, 'subreddit'):
                item.subreddit = item.subreddit_slow
            if hasattr(item, 'parent_id'):
                parent = Comment._byID(item.parent_id, data=True)
                parent_author = Account._byID(parent.author_id, data=True)
                item.parent_author = parent_author

                if not c.full_comment_listing and cids.has_key(item.parent_id):
                    item.parent_permalink = '#' + utils.to36(item.parent_id)
                else:
                    item.parent_permalink = parent.make_anchored_permalink(item.link, item.subreddit)
            else:
                item.parent_permalink = None
                item.parent_author = None

            item.can_reply = (item.sr_id in can_reply_srs)

            # Don't allow users to vote on their own comments
            item.votable = bool(c.user != item.author)

            # not deleted on profile pages,
            # deleted if spam and not author or admin
            item.deleted = (not c.profilepage and
                           (item._deleted or
                            (item._spam and
                             item.author != c.user and
                             not item.show_spam)))

            # don't collapse for admins, on profile pages, or if deleted
            item.collapsed = ((item.score < min_score) and
                             not (c.profilepage or
                                  item.deleted or
                                  c.user_is_admin))
                
            if not hasattr(item,'editted'):
                item.editted = False
            #will get updated in builder
            item.num_children = 0
            item.score_fmt = Score.points
            item.permalink = item.make_permalink(item.link, item.subreddit)
Esempio n. 15
0
    def GET_oldinfo(self, article, type, dest, rest=None, comment=''):
        """Legacy: supporting permalink pages from '06,
           and non-search-engine-friendly links"""
        if not (dest in ('comments','related','details')):
                dest = 'comments'
        if type == 'ancient':
            #this could go in config, but it should never change
            max_link_id = 10000000
            new_id = max_link_id - int(article._id)
            return self.redirect('/info/' + to36(new_id) + '/' + rest)
        if type == 'old':
            new_url = "/%s/%s/%s" % \
                      (dest, article._id36, 
                       quote_plus(title_to_url(article.title).encode('utf-8')))
            if not c.default_sr:
                new_url = "/r/%s%s" % (c.site.name, new_url)
            if comment:
                new_url = new_url + "/%s" % comment._id36
            if c.extension:
                new_url = new_url + "/.%s" % c.extension

            new_url = new_url + query_string(request.get)

            # redirect should be smarter and handle extensions, etc.
            return self.redirect(new_url, code=301)
Esempio n. 16
0
File: flair.py Progetto: 1900/reddit
 def by_sr(cls, sr_id, create=False):
     try:
         return cls._byID(to36(sr_id))
     except tdb_cassandra.NotFound:
         if create:
             return cls._new(sr_id)
         raise
Esempio n. 17
0
        def cached_query_wrapper(*args):
            # build the row key from the function name and arguments
            assert fn.__name__.startswith("get_")
            row_key_components = [fn.__name__[len('get_'):]]

            if len(args) > 0:
                # we want to accept either a Thing or a thing's ID at this
                # layer, but the query itself should always get just an ID
                if isinstance(args[0], Thing):
                    args = list(args)
                    args[0] = args[0]._id

                if isinstance(args[0], (int, long)):
                    serialized = to36(args[0])
                else:
                    serialized = str(args[0])
                row_key_components.append(serialized)

            row_key_components.extend(str(x) for x in args[1:])
            row_key = '.'.join(row_key_components)

            query = fn(*args)

            query_sort = query._sort
            try:
                is_precomputed = query.precomputed
            except AttributeError:
                is_precomputed = _is_query_precomputed(query)

            return CachedQuery(model, row_key, query_sort, filter_fn,
                               is_precomputed)
Esempio n. 18
0
    def _get_sr_restriction(sr):
        """Return a cloudsearch appropriate query string that restricts
        results to only contain results from self.sr
        
        """
        bq = []
        if (not sr) or sr == All or isinstance(sr, DefaultSR):
            return None
        elif isinstance(sr, MultiReddit):
            bq = ["(or"]
            for sr_id in sr.sr_ids:
                bq.append("sr_id:%s" % sr_id)
            bq.append(")")
        elif isinstance(sr, DomainSR):
            bq = ["site:'%s'" % sr.domain]
        elif sr == Friends:
            if not c.user_is_loggedin or not c.user.friends:
                return None
            bq = ["(or"]
            # The query limit is roughly 8k bytes. Limit to 200 friends to
            # avoid getting too close to that limit
            friend_ids = c.user.friends[:200]
            friends = ["author_fullname:'%s'" % Account._fullname_from_id36(r2utils.to36(id_)) for id_ in friend_ids]
            bq.extend(friends)
            bq.append(")")
        elif isinstance(sr, ModContribSR):
            bq = ["(or"]
            for sr_id in sr.sr_ids:
                bq.append("sr_id:%s" % sr_id)
            bq.append(")")
        elif not isinstance(sr, FakeSubreddit):
            bq = ["sr_id:%s" % sr._id]

        return " ".join(bq)
Esempio n. 19
0
    def _restrict_sr(sr):
        '''Return a cloudsearch appropriate query string that restricts
        results to only contain results from self.sr
        
        '''
        if isinstance(sr, MultiReddit):
            if not sr.sr_ids:
                raise InvalidQuery
            srs = ["sr_id:%s" % sr_id for sr_id in sr.sr_ids]
            return "(or %s)" % ' '.join(srs)
        elif isinstance(sr, DomainSR):
            return "site:'%s'" % sr.domain
        elif isinstance(sr, FriendsSR):
            if not c.user_is_loggedin or not c.user.friends:
                raise InvalidQuery
            # The query limit is roughly 8k bytes. Limit to 200 friends to
            # avoid getting too close to that limit
            friend_ids = c.user.friends[:200]
            friends = ["author_fullname:'%s'" %
                       Account._fullname_from_id36(r2utils.to36(id_))
                       for id_ in friend_ids]
            return "(or %s)" % ' '.join(friends)
        elif not isinstance(sr, FakeSubreddit):
            return "sr_id:%s" % sr._id

        return None
Esempio n. 20
0
def _comment_page_links(comment_page_data):
    for comment_info in comment_page_data:
        path = u"/r/{0}/comments/{1}/{2}/".format(
            comment_info.subreddit,
            to36(int(comment_info.thing_id)),
            urllib.quote(title_to_url(comment_info.title).encode("utf-8")),
        )
        yield _absolute_url(path)
Esempio n. 21
0
 def get_house_link_names(cls):
     now = promote.promo_datetime_now()
     pws = PromotionWeights.get_campaigns(now)
     campaign_ids = {pw.promo_idx for pw in pws}
     q = PromoCampaign._query(PromoCampaign.c._id.in_(campaign_ids),
                              PromoCampaign.c.priority_name == 'house',
                              data=True)
     return [Link._fullname_from_id36(to36(camp.link_id)) for camp in q]
Esempio n. 22
0
 def get_house_link_names(cls):
     now = promote.promo_datetime_now()
     campaign_ids = PromotionWeights.get_campaign_ids(now)
     q = PromoCampaign._query(PromoCampaign.c._id.in_(campaign_ids),
                              PromoCampaign.c.priority_name == 'house',
                              data=True)
     link_names = {Link._fullname_from_id36(to36(camp.link_id))
                   for camp in q}
     return sorted(link_names, reverse=True)
Esempio n. 23
0
def _desired_things(items, types):
    '''Pull fullnames that represent instances of 'types' out of items'''
    # This will fail if the _type_id for some things is >36
    fullnames = set()
    type_ids = [r2utils.to36(type_._type_id) for type_ in types]
    for item in items:
        if item['fullname'][1] in type_ids:
            fullnames.add(item['fullname'])
    return fullnames
Esempio n. 24
0
    def to_serializable(self, sr, author, current_user=None):

        return {
            'id': to36(self.id),
            'date': self.date.isoformat(),
            'author': to_serializable_author(author, sr, current_user,
                                             self.is_author_hidden),
            'body': safemarkdown(self.body),
            'isInternal': self.is_internal
        }
Esempio n. 25
0
def fullname_regex(thing_cls = None, multiple = False):
    pattern = "[%s%s]" % (Relation._type_prefix, Thing._type_prefix)
    if thing_cls:
        pattern += utils.to36(thing_cls._type_id)
    else:
        pattern += r"[0-9a-z]+"
    pattern += r"_[0-9a-z]+"
    if multiple:
        pattern = r"(%s *,? *)+" % pattern
    return re.compile(r"\A" + pattern + r"\Z")
Esempio n. 26
0
def new_comment(comment, inbox_rels):
    author = Account._byID(comment.author_id)
    job = [
        get_comments(author, "new", "all"),
        get_comments(author, "top", "all"),
        get_comments(author, "controversial", "all"),
    ]

    sr = Subreddit._byID(comment.sr_id)

    with CachedQueryMutator() as m:
        if comment._deleted:
            job_key = "delete_items"
            job.append(get_sr_comments(sr))
            m.delete(get_all_comments(), [comment])
        else:
            job_key = "insert_items"
            if comment._spam:
                m.insert(get_spam_comments(sr), [comment])
            if was_spam_filtered(comment):
                m.insert(get_spam_filtered_comments(sr), [comment])

            if utils.to36(comment.link_id) in g.live_config["fastlane_links"]:
                amqp.add_item("new_fastlane_comment", comment._fullname)
            else:
                amqp.add_item("new_comment", comment._fullname)

            if not g.amqp_host:
                add_comment_tree([comment])

        job_dict = {job_key: comment}
        add_queries(job, **job_dict)

        # note that get_all_comments() is updated by the amqp process
        # r2.lib.db.queries.run_new_comments (to minimise lock contention)

        if inbox_rels:
            for inbox_rel in tup(inbox_rels):
                inbox_owner = inbox_rel._thing1
                if inbox_rel._name == "inbox":
                    query = get_inbox_comments(inbox_owner)
                elif inbox_rel._name == "selfreply":
                    query = get_inbox_selfreply(inbox_owner)
                else:
                    raise ValueError("wtf is " + inbox_rel._name)

                if not comment._deleted:
                    m.insert(query, [inbox_rel])
                else:
                    m.delete(query, [inbox_rel])

                set_unread(comment, inbox_owner, unread=not comment._deleted, mutator=m)
Esempio n. 27
0
    def add_target_fields(self, target):
        if not target:
            return
        from r2.models import Comment, Link, Message

        self.add("target_id", target._id)
        self.add("target_fullname", target._fullname)
        self.add("target_age_seconds", target._age.total_seconds())

        target_type = target.__class__.__name__.lower()
        if target_type == "link" and target.is_self:
            target_type = "self"
        self.add("target_type", target_type)

        # If the target is an Account or Subreddit (or has a "name" attr),
        # add the target_name
        if hasattr(target, "name"):
            self.add("target_name", target.name)

        # Add info about the target's author for comments, links, & messages
        if isinstance(target, (Comment, Link, Message)):
            author = target.author_slow
            if target._deleted or author._deleted:
                self.add("target_author_id", 0)
                self.add("target_author_name", "[deleted]")
            else:
                self.add("target_author_id", author._id)
                self.add("target_author_name", author.name)

        # Add info about the url being linked to for link posts
        if isinstance(target, Link):
            self.add("target_title", target.title)
            if not target.is_self:
                self.add("target_url", target.url)
                self.add("target_url_domain", target.link_domain())

        # Add info about the link being commented on for comments
        if isinstance(target, Comment):
            link_fullname = Link._fullname_from_id36(to36(target.link_id))
            self.add("link_id", target.link_id)
            self.add("link_fullname", link_fullname)

        # Add info about when target was originally posted for links/comments
        if isinstance(target, (Comment, Link)):
            self.add("target_created_ts", _datetime_to_millis(target._date))

        hooks.get_hook("eventcollector.add_target_fields").call(
            event=self,
            target=target,
        )
Esempio n. 28
0
def migrate_srmember_subscribers(after_user_id=39566712):
    columns = {}
    rowkey = None
    proc_time = time.time()

    for i, rel in enumerate(get_srmembers(after_user_id)):
        sr_id = rel._thing1_id
        user_id = rel._thing2_id
        action_date = rel._date
        new_rowkey = to36(user_id)

        if new_rowkey != rowkey and columns:
            SubscribedSubredditsByAccount._cf.insert(
                rowkey, columns, timestamp=1434403336829573)
            columns = {}

        columns[to36(sr_id)] = action_date
        rowkey = new_rowkey

        if i % 1000 == 0:
            new_proc_time = time.time()
            duration = new_proc_time - proc_time
            print "%s (%.3f): %s - %s" % (i, duration, user_id, action_date)
            proc_time = new_proc_time
Esempio n. 29
0
    def ordered_msg_and_action_ids(self):
        order_elements = self.messages + self.mod_actions
        ordered_elements = sorted(order_elements, key=lambda x: x.date)

        ordered_id_array = []
        for element in ordered_elements:
            key = 'messages'
            if isinstance(element, ModmailConversationAction):
                key = 'modActions'

            ordered_id_array.append({
                'key': key,
                'id': to36(element.id)
            })

        return ordered_id_array
Esempio n. 30
0
def get_scheduled(date, sr_name=""):
    all_promotions = PromotionWeights.get_campaigns(date)
    fp_promotions = [p for p in all_promotions if p.sr_name == sr_name]
    campaigns = PromoCampaign._byID([i.promo_idx for i in fp_promotions], return_dict=False, data=True)
    links = Link._by_fullname([i.thing_name for i in fp_promotions], return_dict=False, data=True)
    links = {l._id: l for l in links}
    kept = []
    for camp in campaigns:
        if camp.trans_id == 0:
            continue

        link = links[camp.link_id]
        if link._spam or not promote.is_accepted(link):
            continue

        kept.append(camp._id)

    return [
        ("%s_%s" % (PC_PREFIX, to36(p.promo_idx)), p.thing_name, p.bid) for p in fp_promotions if p.promo_idx in kept
    ]
Esempio n. 31
0
 def _fullname(self):
     return "t%s_%s" % (utils.to36(Comment._type_id), self._id36)
Esempio n. 32
0
 def _id36(self):
     return utils.to36(self.children[0]) if self.children else '_'
Esempio n. 33
0
 def make_message_fullname(mid):
     return "t%s_%s" % (utils.to36(Message._type_id), utils.to36(mid))
Esempio n. 34
0
def compare_pageviews(daysago=0, verbose=False):
    """Evaluate past delivery for promoted links.

    Check frontpage promoted links for their actual delivery compared to what
    would be expected based on their bids.

    """

    date = (datetime.datetime.now(g.tz) -
            datetime.timedelta(days=daysago)).date()

    scheduled = get_scheduled(date)
    pageviews_by_camp = get_campaign_pageviews(date)
    campaigns = filter_campaigns(date, pageviews_by_camp.keys())
    actual = []
    for camp in campaigns:
        link_fullname = '%s_%s' % (LINK_PREFIX, to36(camp.link_id))
        i = (camp._fullname, link_fullname, pageviews_by_camp[camp._fullname])
        actual.append(i)

    scheduled_links = {link for camp, link, pageviews in scheduled}
    actual_links = {link for camp, link, pageviews in actual}

    bid_by_link = defaultdict(int)
    total_bid = 0

    pageviews_by_link = defaultdict(int)
    total_pageviews = 0

    for camp, link, bid in scheduled:
        if link not in actual_links:
            if verbose:
                print '%s not found in actual, skipping' % link
            continue

        bid_by_link[link] += bid
        total_bid += bid

    for camp, link, pageviews in actual:
        # not ideal: links shouldn't be here
        if link not in scheduled_links:
            if verbose:
                print '%s not found in schedule, skipping' % link
            continue

        pageviews_by_link[link] += pageviews
        total_pageviews += pageviews

    errors = []
    for link, bid in sorted(bid_by_link.items(), key=lambda t: t[1]):
        pageviews = pageviews_by_link.get(link, 0)
        expected = bid / total_bid
        realized = float(pageviews) / total_pageviews
        difference = (realized - expected) / expected
        errors.append(difference)
        if verbose:
            print '%s - %s - %s - %s' % (link, expected, realized, difference)

    mean_error, min_error, max_error, stdev_error = error_statistics(errors)

    print '%s' % date
    print('error %s max, %s min, %s +- %s' %
          (max_error, min_error, mean_error, stdev_error))
    print 'total bid %s' % total_bid
    print('pageviews for promoted links targeted only to frontpage %s' %
          total_pageviews)
    print('frontpage pageviews for all promoted links %s' %
          sum(pageviews_by_camp.values()))
    print 'promoted eligible pageviews %s' % get_frontpage_pageviews(date)
Esempio n. 35
0
 def _key(link):
     return utils.to36(link._id)
Esempio n. 36
0
 def _new(cls, sr_id, flair_type=USER_FLAIR):
     idx = cls(_id=to36(sr_id), sr_id=sr_id)
     idx._commit()
     return idx
Esempio n. 37
0
def get_recommended_content(prefs, src, settings):
    """Get a mix of content from subreddits recommended for someone with
    the given preferences (likes and dislikes.)

    Returns a list of ExploreItems.

    """
    # numbers chosen empirically to give enough results for explore page
    num_liked = 10  # how many liked srs to use when generating the recs
    num_recs = 20  # how many recommended srs to ask for
    num_discovery = 2  # how many discovery-related subreddits to mix in
    num_rising = 4  # how many rising links to mix in
    num_items = 20  # total items to return
    rising_items = discovery_items = comment_items = hot_items = []

    # make a list of srs that shouldn't be recommended
    default_srid36s = [to36(srid) for srid in Subreddit.default_subreddits()]
    omit_srid36s = list(
        prefs.likes.union(prefs.dislikes, prefs.recent_views, default_srid36s))
    # pick random subset of the user's liked srs
    liked_srid36s = random_sample(prefs.likes,
                                  num_liked) if settings.personalized else []
    # pick random subset of discovery srs
    candidates = set(get_discovery_srid36s()).difference(prefs.dislikes)
    discovery_srid36s = random_sample(candidates, num_discovery)
    # multiget subreddits
    to_fetch = liked_srid36s + discovery_srid36s
    srs = Subreddit._byID36(to_fetch)
    liked_srs = [srs[sr_id36] for sr_id36 in liked_srid36s]
    discovery_srs = [srs[sr_id36] for sr_id36 in discovery_srid36s]
    if settings.personalized:
        # generate recs from srs we know the user likes
        recommended_srs = get_recommendations(liked_srs,
                                              count=num_recs,
                                              to_omit=omit_srid36s,
                                              source=src,
                                              match_set=False,
                                              over18=settings.nsfw)
        random.shuffle(recommended_srs)
        # split list of recommended srs in half
        midpoint = len(recommended_srs) / 2
        srs_slice1 = recommended_srs[:midpoint]
        srs_slice2 = recommended_srs[midpoint:]
        # get hot links plus top comments from one half
        comment_items = get_comment_items(srs_slice1, src)
        # just get hot links from the other half
        hot_items = get_hot_items(srs_slice2, TYPE_HOT, src)
    if settings.discovery:
        # get links from subreddits dedicated to discovery
        discovery_items = get_hot_items(discovery_srs, TYPE_DISCOVERY, 'disc')
    if settings.rising:
        # grab some (non-personalized) rising items
        omit_sr_ids = set(int(id36, 36) for id36 in omit_srid36s)
        rising_items = get_rising_items(omit_sr_ids, count=num_rising)
    # combine all items and randomize order to get a mix of types
    all_recs = list(
        chain(rising_items, comment_items, discovery_items, hot_items))
    random.shuffle(all_recs)
    # make sure subreddits aren't repeated
    seen_srs = set()
    recs = []
    for r in all_recs:
        if not settings.nsfw and r.is_over18():
            continue
        if not is_visible(r.sr):  # could happen in rising items
            continue
        if r.sr._id not in seen_srs:
            recs.append(r)
            seen_srs.add(r.sr._id)
        if len(recs) >= num_items:
            break
    return recs
def bulk_upsert(links):
    updates = filter(lambda user: getattr(user, "dfp_creative_id", False),
                     links)
    inserts = filter(lambda user: not getattr(user, "dfp_creative_id", False),
                     links)

    dfp_creative_service = DfpService("CreativeService")
    creatives = []

    if updates:
        existing_creatives = {}
        statement = dfp.FilterStatement(
            "WHERE id IN (%s)" %
            ", ".join([str(link.dfp_creative_id) for link in updates]))

        while True:
            response = dfp_creative_service.execute(
                "getCreativesByStatement",
                statement.ToStatement(),
            )

            if "results" in response:
                for creative in response["results"]:
                    existing_creatives[creative.id] = creative
                statement.offset += dfp.SUGGESTED_PAGE_LIMIT
            else:
                break

        updated = dfp_creative_service.execute("updateCreatives", [
            _link_to_creative(
                link=link,
                existing=existing_creatives[link.dfp_creative_id],
            ) for link in updates
        ])

        creatives += updated

    if inserts:
        authors = Account._byID([link.author_id for link in inserts],
                                return_dict=False)
        advertisers = advertisers_service.bulk_upsert(authors)
        advertisers_by_author = {
            advertiser.externalId: advertiser
            for advertiser in advertisers
        }

        inserted = dfp_creative_service.execute("createCreatives", [
            _link_to_creative(
                link=link,
                advertiser=advertisers_by_author[Account._fullname_from_id36(
                    to36(link.author_id))],
            ) for link in inserts
        ])

        creatives += inserted

    creatives_by_fullname = {
        utils.get_template_variable(creative, "link_id"): creative
        for creative in creatives
    }

    for link in links:
        creative = creatives_by_fullname[link._fullname]
        link.dfp_creative_id = creative.id
        link._commit()

    return creatives
Esempio n. 39
0
 def _id36(self):
     return to36(self._id)
Esempio n. 40
0
 def _fullname_prefix(cls):
     return cls._type_prefix + to36(cls._type_id)
Esempio n. 41
0
 def _fullname_from_id36(cls, id36):
     return cls._type_prefix + to36(cls._type_id) + '_' + id36
Esempio n. 42
0
 def _key(link):
     revision = getattr(link, 'comment_tree_id', 0)
     if revision:
         return '%s:%s' % (utils.to36(link._id), utils.to36(revision))
     else:
         return utils.to36(link._id)
Esempio n. 43
0
    def add_props(cls, user, wrapped):
        #fetch parent links
        links = Link._byID(set(l.link_id for l in wrapped), True)

        #get srs for comments that don't have them (old comments)
        for cm in wrapped:
            if not hasattr(cm, 'sr_id'):
                cm.sr_id = links[cm.link_id].sr_id

        subreddits = Subreddit._byID(set(cm.sr_id for cm in wrapped),
                                     data=True,
                                     return_dict=False)
        can_reply_srs = set(s._id for s in subreddits if s.can_comment(user))

        min_score = c.user.pref_min_comment_score

        cids = dict((w._id, w) for w in wrapped)

        for item in wrapped:
            item.link = links.get(item.link_id)
            if not hasattr(item, 'subreddit'):
                item.subreddit = item.subreddit_slow
            if hasattr(item, 'parent_id'):
                parent = Comment._byID(item.parent_id, data=True)
                parent_author = Account._byID(parent.author_id, data=True)
                item.parent_author = parent_author

                if not c.full_comment_listing and cids.has_key(item.parent_id):
                    item.parent_permalink = '#' + utils.to36(item.parent_id)
                else:
                    item.parent_permalink = parent.make_anchored_permalink(
                        item.link, item.subreddit)
            else:
                item.parent_permalink = None
                item.parent_author = None

            item.can_reply = (item.sr_id in can_reply_srs)

            # Don't allow users to vote on their own comments
            item.votable = bool(c.user != item.author and not item.retracted)
            if item.votable and c.profilepage:
                # Can only vote on profile page under certain conditions
                item.votable = bool(
                    (c.user.safe_karma > g.karma_to_vote_in_overview)
                    and (g.karma_percentage_to_be_voted >
                         item.author.percent_up()))

            # not deleted on profile pages,
            # deleted if spam and not author or admin
            item.deleted = (not c.profilepage
                            and (item._deleted or
                                 (item._spam and item.author != c.user
                                  and not item.show_spam)))

            # don't collapse for admins, on profile pages, or if deleted
            item.collapsed = (
                (item.score < min_score)
                and not (c.profilepage or item.deleted or c.user_is_admin))

            if not hasattr(item, 'editted'):
                item.editted = False
            #will get updated in builder
            item.num_children = 0
            item.score_fmt = Score.points
            item.permalink = item.make_permalink(item.link, item.subreddit)
            item.can_be_deleted = item.can_delete()
Esempio n. 44
0
 def new(cls, id, title, **properties):
     if not id:
         id = utils.to36(simpleflake.simpleflake())
     event = cls(id, title=title, **properties)
     event._commit()
     return event
Esempio n. 45
0
    def _make_wrapped_tree(self):
        timer = self.timer
        comments = self.comments
        cid_tree = self.cid_tree
        top_level_candidates = self.top_level_candidates
        depth = self.depth
        more_recursions = self.more_recursions
        offset_depth = self.offset_depth
        dont_collapse = self.dont_collapse
        timer.intermediate("waiting")

        if not comments and not top_level_candidates:
            timer.stop()
            return []

        # retrieve num_children for the visible comments
        needs_num_children = [c._id for c in comments] + top_level_candidates
        num_children = get_num_children(needs_num_children, cid_tree)
        timer.intermediate("calc_num_children")

        wrapped = self.wrap_items(comments)
        timer.intermediate("wrap_comments")
        wrapped_by_id = {comment._id: comment for comment in wrapped}

        if self.children:
            # rewrite the parent links to use anchor tags
            for comment_id in self.children:
                if comment_id in wrapped_by_id:
                    item = wrapped_by_id[comment_id]
                    if item.parent_id:
                        item.parent_permalink = '#' + to36(item.parent_id)

        final = []

        # We have some special collapsing rules for the Q&A sort type.
        # However, we want to show everything when we're building a specific
        # set of children (like from "load more" links) or when viewing a
        # comment permalink.
        qa_sort_hiding = ((self.sort.col == '_qa') and not self.children
                          and self.comment is None)
        if qa_sort_hiding:
            special_responder_ids = self.link.responder_ids
        else:
            special_responder_ids = ()

        max_relation_walks = g.max_comment_parent_walk
        for comment in wrapped:
            # skip deleted comments with no children
            if (comment.deleted and not cid_tree.has_key(comment._id)
                    and not self.show_deleted):
                comment.hidden_completely = True
                continue

            comment.num_children = num_children[comment._id]
            comment.edits_visible = self.edits_visible

            parent = wrapped_by_id.get(comment.parent_id)
            if qa_sort_hiding:
                author_is_special = comment.author_id in special_responder_ids
            else:
                author_is_special = False

            # In the Q&A sort type, we want to collapse all comments other than
            # those that are:
            #
            # 1. Top-level comments,
            # 2. Responses from the OP(s),
            # 3. Responded to by the OP(s) (dealt with below),
            # 4. Within one level of an OP reply, or
            # 5. Otherwise normally prevented from collapse (eg distinguished
            #    comments).
            if (qa_sort_hiding and depth[comment._id] != 0 and  # (1)
                    not author_is_special and  # (2)
                    not (parent and parent.author_id in special_responder_ids
                         and feature.is_enabled('qa_show_replies')) and  # (4)
                    not comment.prevent_collapse):  # (5)
                comment.hidden = True

            if comment.collapsed:
                if comment._id in dont_collapse or author_is_special:
                    comment.collapsed = False
                    comment.hidden = False

            if parent:
                if author_is_special:
                    # Un-collapse parents as necessary.  It's a lot easier to
                    # do this here, upwards, than to check through all the
                    # children when we were iterating at the parent.
                    ancestor = parent
                    counter = 0
                    while (ancestor and not getattr(ancestor, 'walked', False)
                           and counter < max_relation_walks):
                        ancestor.hidden = False
                        # In case we haven't processed this comment yet.
                        ancestor.prevent_collapse = True
                        # This allows us to short-circuit when the rest of the
                        # tree has already been uncollapsed.
                        ancestor.walked = True

                        ancestor = wrapped_by_id.get(ancestor.parent_id)
                        counter += 1

        # One more time through to actually add things to the final list.  We
        # couldn't do that the first time because in the Q&A sort we don't know
        # if a comment should be visible until after we've processed all its
        # children.
        for comment in wrapped:
            if getattr(comment, 'hidden_completely', False):
                # Don't add it to the tree, don't put it in "load more", don't
                # acknowledge its existence at all.
                continue

            if getattr(comment, 'hidden', False):
                # Remove it from the list of visible comments so it'll
                # automatically be a candidate for the "load more" links.
                del wrapped_by_id[comment._id]
                # And don't add it to the tree.
                continue

            # add the comment as a child of its parent or to the top level of
            # the tree if it has no parent
            parent = wrapped_by_id.get(comment.parent_id)
            if parent:
                if not hasattr(parent, 'child'):
                    add_child_listing(parent, comment)
                else:
                    parent.child.things.append(comment)
            else:
                final.append(comment)

        for parent_id, more_recursion in more_recursions.iteritems():
            if parent_id not in wrapped_by_id:
                continue

            parent = wrapped_by_id[parent_id]
            add_child_listing(parent, more_recursion)

        timer.intermediate("build_comments")

        if not self.load_more:
            timer.stop()
            return final

        # build MoreChildren for visible comments
        visible_comments = wrapped_by_id.keys()
        for visible_id in visible_comments:
            if visible_id in more_recursions:
                # don't add a MoreChildren if we already have a MoreRecursion
                continue

            children = cid_tree.get(visible_id, ())
            missing_children = [
                child for child in children if child not in visible_comments
            ]
            if missing_children:
                visible_children = (child for child in children
                                    if child in visible_comments)
                visible_count = sum(1 + num_children[child]
                                    for child in visible_children)
                missing_count = num_children[visible_id] - visible_count
                missing_depth = depth.get(visible_id, 0) + 1 - offset_depth

                if missing_depth < self.max_depth:
                    mc = MoreChildren(self.link,
                                      self.sort,
                                      depth=missing_depth,
                                      parent_id=visible_id)
                    mc.children.extend(missing_children)
                    w = Wrapped(mc)
                    w.count = missing_count
                else:
                    mr = MoreRecursion(self.link,
                                       depth=missing_depth,
                                       parent_id=visible_id)
                    w = Wrapped(mr)

                # attach the MoreChildren
                parent = wrapped_by_id[visible_id]
                if hasattr(parent, 'child'):
                    parent.child.things.append(w)
                else:
                    add_child_listing(parent, w)

        # build MoreChildren for missing root level comments
        if top_level_candidates:
            mc = MoreChildren(self.link, self.sort, depth=0, parent_id=None)
            mc.children.extend(top_level_candidates)
            w = Wrapped(mc)
            w.count = sum(1 + num_children[comment]
                          for comment in top_level_candidates)
            final.append(w)

        if isinstance(self.sort, operators.shuffled):
            shuffle(final)

        timer.intermediate("build_morechildren")
        timer.stop()
        return final
Esempio n. 46
0
def sort_comments_key(link_id, sort):
    assert sort.startswith('_')
    return '%s%s' % (to36(link_id), sort)
Esempio n. 47
0
    def add_props(cls, user, wrapped):
        from r2.lib.template_helpers import add_attr
        from r2.lib import promote
        #fetch parent links
        links = Link._byID(set(l.link_id for l in wrapped),
                           data=True,
                           return_dict=True)

        #get srs for comments that don't have them (old comments)
        for cm in wrapped:
            if not hasattr(cm, 'sr_id'):
                cm.sr_id = links[cm.link_id].sr_id

        subreddits = Subreddit._byID(set(cm.sr_id for cm in wrapped),
                                     data=True,
                                     return_dict=False)
        cids = dict((w._id, w) for w in wrapped)
        parent_ids = set(
            cm.parent_id for cm in wrapped
            if getattr(cm, 'parent_id', None) and cm.parent_id not in cids)
        parents = {}
        if parent_ids:
            parents = Comment._byID(parent_ids, data=True)

        can_reply_srs = set(s._id for s in subreddits if s.can_comment(user)) \
                        if c.user_is_loggedin else set()
        can_reply_srs.add(promote.get_promote_srid())

        min_score = user.pref_min_comment_score

        profilepage = c.profilepage
        user_is_admin = c.user_is_admin
        user_is_loggedin = c.user_is_loggedin
        focal_comment = c.focal_comment

        for item in wrapped:
            # for caching:
            item.profilepage = c.profilepage
            item.link = links.get(item.link_id)

            if (item.link._score <= 1 or item.score < 3 or item.link._spam
                    or item._spam or item.author._spam):
                item.nofollow = True
            else:
                item.nofollow = False

            if not hasattr(item, 'subreddit'):
                item.subreddit = item.subreddit_slow
            if item.author_id == item.link.author_id and not item.link._deleted:
                add_attr(item.attribs,
                         'S',
                         link=item.link.make_permalink(item.subreddit))
            if not hasattr(item, 'target'):
                item.target = None
            if item.parent_id:
                if item.parent_id in cids:
                    item.parent_permalink = '#' + utils.to36(item.parent_id)
                else:
                    parent = parents[item.parent_id]
                    item.parent_permalink = parent.make_permalink(
                        item.link, item.subreddit)
            else:
                item.parent_permalink = None

            item.can_reply = c.can_reply or (item.sr_id in can_reply_srs)

            # not deleted on profile pages,
            # deleted if spam and not author or admin
            item.deleted = (
                not profilepage and
                (item._deleted or
                 (item._spam and item.author != user and not item.show_spam)))

            extra_css = ''
            if item.deleted:
                extra_css += "grayed"
                if not user_is_admin:
                    item.author = DeletedUser()
                    item.body = '[deleted]'

            if focal_comment == item._id36:
                extra_css += " border"

            # don't collapse for admins, on profile pages, or if deleted
            item.collapsed = (
                (item.score < min_score)
                and not (profilepage or item.deleted or user_is_admin))

            item.editted = getattr(item, "editted", False)

            #will get updated in builder
            item.num_children = 0
            item.score_fmt = Score.points
            item.permalink = item.make_permalink(item.link, item.subreddit)

            item.is_author = (user == item.author)
            item.is_focal = (focal_comment == item._id36)

            #will seem less horrible when add_props is in pages.py
            from r2.lib.pages import UserText
            item.usertext = UserText(item,
                                     item.body,
                                     editable=item.is_author,
                                     nofollow=item.nofollow,
                                     target=item.target,
                                     extra_css=extra_css)
        # Run this last
        Printable.add_props(user, wrapped)