def _to_fn(cls, id_): '''Convert id_ to a fullname (equivalent to "link._fullname", but doesn't require an instance of the class) ''' return (cls._type_prefix + r2utils.to36(cls._type_id) + '_' + r2utils.to36(id_))
def port_cassavotes(): from r2.models import Vote, Account, Link, Comment from r2.models.vote import CassandraVote, CassandraLinkVote, CassandraCommentVote from r2.lib.db.tdb_cassandra import CL from r2.lib.utils import fetch_things2, to36, progress ts = [(Vote.rel(Account, Link), CassandraLinkVote), (Vote.rel(Account, Comment), CassandraCommentVote)] dataattrs = set(['valid_user', 'valid_thing', 'ip', 'organic']) for prel, crel in ts: vq = prel._query(sort=desc('_date'), data=True, eager_load=False) vq = fetch_things2(vq) vq = progress(vq, persec=True) for v in vq: t1 = to36(v._thing1_id) t2 = to36(v._thing2_id) cv = crel(thing1_id = t1, thing2_id = t2, date=v._date, name=v._name) for dkey, dval in v._t.iteritems(): if dkey in dataattrs: setattr(cv, dkey, dval) cv._commit(write_consistency_level=CL.ONE)
def __init__(self, link, depth, parent_id = None): if parent_id is not None: id36 = utils.to36(parent_id) self.parent_id = parent_id self.parent_name = "t%s_%s" % (utils.to36(Comment._type_id), id36) self.parent_permalink = link.make_permalink_slow() + id36 self.link_name = link._fullname self.link_id = link._id self.depth = depth self.children = [] self.count = 0
def get_recommended(userid, age = 2, sort='relevance', num_users=10): u = get_users_for_user(userid)[:num_users] if not u: return [] voter = Vote.rels[(Account, Link)] tables = tdb.get_rel_type_table(voter._type_id) votertable = tables[0] acct_col = votertable.c.thing1_id link_col = votertable.c.thing2_id date_col = votertable.c.date count = sa.func.count(acct_col) linktable = tables[2] # dlinktable, linktable = tdb.types_id[Link._type_id].data_table link_id_col = linktable.c.thing_id query = [sa.or_(*[acct_col == x for x in u]), date_col > datetime.now(g.tz)-timedelta(age)] cols = [link_col, count] if sort == 'new': sort = 'date' elif sort == 'top': sort = 'score' if sort and sort != 'relevance': query.append(link_id_col == link_col) s = tdb.translate_sort(linktable, sort) order = [sa.desc(s), sa.desc(link_id_col)] cols = [link_id_col, count] group_by = [link_id_col, s] else: order = [sa.desc(count), sa.desc(link_col)] group_by = link_col # #TODO: wish I could just use query_rules # if c.user and c.user.subreddits: # query.append(dlinktable.c.thing_id == linktable.c.thing_id) # q = sa.and_(dlinktable.c.key == 'sr_id', # sa.or_(*[dlinktable.c.value == x # for x in c.user.subreddits])) # query.append(q) res = sa.select(cols, sa.and_(*query), group_by=group_by, order_by=order).execute() prefix = "t%s" % to36(Link._type_id) return ["%s_%s" % (prefix, to36(x)) for x, y in res.fetchall()]
def _process(t): thing_id = t.thing_id id36 = to36(thing_id) link_id = t.link_id link_id36 = to36(link_id) ups, downs, timestamp = t.ups, t.downs, t.timestamp yield link_id36+'_controversy', id36, sorts.controversy(ups, downs) yield link_id36+'_hot', id36, sorts._hot(ups, downs, timestamp) yield link_id36+'_confidence', id36, sorts.confidence(ups, downs) yield link_id36+'_score', id36, sorts.score(ups, downs) yield link_id36+'_date', id36, timestamp
def __init__(self, link, depth, parent_id=None): from r2.lib.wrapped import CachedVariable if parent_id is not None: id36 = utils.to36(parent_id) self.parent_id = parent_id self.parent_name = "t%s_%s" % (utils.to36(Comment._type_id), id36) self.parent_permalink = link.make_permalink_slow() + id36 self.link_name = link._fullname self.link_id = link._id self.depth = depth self.children = [] self.count = 0 self.previous_visits_hex = CachedVariable("previous_visits_hex")
def port_cassasaves(after_id=None, estimate=12489897): from r2.models import SaveHide, CassandraSave from r2.lib.db.operators import desc from r2.lib.db.tdb_cassandra import CL from r2.lib.utils import fetch_things2, to36, progress q = SaveHide._query(SaveHide.c._name == "save", sort=desc("_date"), data=False, eager_load=False) if after_id is not None: q._after(SaveHide._byID(after_id)) for sh in progress(fetch_things2(q), estimate=estimate): csh = CassandraSave(thing1_id=to36(sh._thing1_id), thing2_id=to36(sh._thing2_id), date=sh._date) csh._commit(write_consistency_level=CL.ONE)
def add_target_fields(self, target): if not target: return from r2.models import Comment, Link, Message self.add("target_id", target._id) self.add("target_fullname", target._fullname) self.add("target_type", target.__class__.__name__.lower()) # If the target is an Account or Subreddit (or has a "name" attr), # add the target_name if hasattr(target, "name"): self.add("target_name", target.name) # Pass in the author of the target for comments, links, & messages elif isinstance(target, (Comment, Link, Message)): author = target.author_slow if target._deleted or author._deleted: self.add("target_author_id", 0) self.add("target_author_name", "[deleted]") else: self.add("target_author_id", author._id) self.add("target_author_name", author.name) if isinstance(target, Link) and not target.is_self: self.add("target_url", target.url) self.add("target_url_domain", target.link_domain()) elif isinstance(target, Comment): link_fullname = Link._fullname_from_id36(to36(target.link_id)) self.add("link_id", target.link_id) self.add("link_fullname", link_fullname)
def queue_vote(user, thing, dir, ip, organic = False, cheater = False, store = True): # set the vote in memcached so the UI gets updated immediately key = prequeued_vote_key(user, thing) g.cache.set(key, '1' if dir is True else '0' if dir is None else '-1') # queue the vote to be stored unless told not to if store: if g.amqp_host: if isinstance(thing, Link): if thing._id36 in g.live_config["fastlane_links"]: qname = vote_fastlane_q else: qname = vote_link_q elif isinstance(thing, Comment): if utils.to36(thing.link_id) in g.live_config["fastlane_links"]: qname = vote_fastlane_q else: qname = vote_comment_q else: log.warning("%s tried to vote on %r. that's not a link or comment!", user, thing) return amqp.add_item(qname, pickle.dumps((user._id, thing._fullname, dir, ip, organic, cheater))) else: handle_vote(user, thing, dir, ip, organic)
def query(self): if c.user_is_sponsor: if self.sort == "future_promos": return queries.get_all_unapproved_links() elif self.sort == "pending_promos": return queries.get_all_accepted_links() elif self.sort == "unpaid_promos": return queries.get_all_unpaid_links() elif self.sort == "rejected_promos": return queries.get_all_rejected_links() elif self.sort == "live_promos" and self.sr: return self.live_by_subreddit(self.sr) elif self.sort == 'live_promos': return queries.get_all_live_links() elif self.sort == 'underdelivered': q = queries.get_underdelivered_campaigns() campaigns = PromoCampaign._by_fullname(list(q), data=True, return_dict=False) link_ids = [camp.link_id for camp in campaigns] return [Link._fullname_from_id36(to36(id)) for id in link_ids] elif self.sort == 'reported': return queries.get_reported_links(get_promote_srid()) return queries.get_all_promoted_links() else: if self.sort == "future_promos": return queries.get_unapproved_links(c.user._id) elif self.sort == "pending_promos": return queries.get_accepted_links(c.user._id) elif self.sort == "unpaid_promos": return queries.get_unpaid_links(c.user._id) elif self.sort == "rejected_promos": return queries.get_rejected_links(c.user._id) elif self.sort == "live_promos": return queries.get_live_links(c.user._id) return queries.get_promoted_links(c.user._id)
def cached_query_wrapper(*args): # build the row key from the function name and arguments assert fn.__name__.startswith("get_") row_key_components = [fn.__name__[len('get_'):]] if len(args) > 0: # we want to accept either a Thing or a thing's ID at this # layer, but the query itself should always get just an ID if isinstance(args[0], Thing): args = list(args) args[0] = args[0]._id thing_id = to36(args[0]) row_key_components.append(thing_id) row_key_components.extend(str(x) for x in args[1:]) row_key = '.'.join(row_key_components) query = fn(*args) if query: # sql-backed query query_sort = query._sort is_precomputed = _is_query_precomputed(query) else: # pure-cassandra query assert sort query_sort = sort is_precomputed = False return CachedQuery(model, row_key, query_sort, filter_fn, is_precomputed)
def sup_json_cached(period, last_time): #we need to re-add MIN_PERIOD because we moved back that far with #the call to make_last_time target_time = last_time + MIN_PERIOD - period updates = '' #loop backwards adding MIN_PERIOD chunks until last_time is as old #as target time while last_time >= target_time: updates += g.cache.get(cache_key(last_time)) or '' last_time -= MIN_PERIOD supdates = [] if updates: for u in ifilter(None, updates.split(',')): sup_id, time = u.split(':') time = int(time) if time >= target_time: supdates.append([sup_id, to36(time)]) update_time = datetime.utcnow() since_time = datetime.utcfromtimestamp(target_time) json = simplejson.dumps({'updated_time' : rfc3339_date_str(update_time), 'since_time' : rfc3339_date_str(since_time), 'period' : period, 'available_periods' : period_urls(), 'updates' : supdates}) #undo json escaping json = json.replace('\/', '/') return json
def _get_sr_restriction(sr): '''Return a solr-appropriate query string that restricts results to only contain results from self.sr ''' bq = [] if (not sr) or sr == All or isinstance(sr, DefaultSR): return None elif isinstance(sr, MultiReddit): for sr_id in sr.sr_ids: bq.append("sr_id:%s" % sr_id) elif isinstance(sr, DomainSR): bq = ["site:'%s'" % sr.domain] elif sr == Friends: if not c.user_is_loggedin or not c.user.friends: return None friend_ids = c.user.friends friends = ["author_fullname:'%s'" % Account._fullname_from_id36(r2utils.to36(id_)) for id_ in friend_ids] bq.extend(friends) elif isinstance(sr, ModContribSR): for sr_id in sr.sr_ids: bq.append("sr_id:%s" % sr_id) elif not isinstance(sr, FakeSubreddit): bq = ["sr_id:%s" % sr._id] return ' OR '.join(bq)
def add_props(cls, user, wrapped): #fetch parent links links = Link._byID(set(l.link_id for l in wrapped), True) #get srs for comments that don't have them (old comments) for cm in wrapped: if not hasattr(cm, 'sr_id'): cm.sr_id = links[cm.link_id].sr_id subreddits = Subreddit._byID(set(cm.sr_id for cm in wrapped), data=True,return_dict=False) can_reply_srs = set(s._id for s in subreddits if s.can_comment(user)) min_score = c.user.pref_min_comment_score cids = dict((w._id, w) for w in wrapped) for item in wrapped: item.link = links.get(item.link_id) if not hasattr(item, 'subreddit'): item.subreddit = item.subreddit_slow if hasattr(item, 'parent_id'): parent = Comment._byID(item.parent_id, data=True) parent_author = Account._byID(parent.author_id, data=True) item.parent_author = parent_author if not c.full_comment_listing and cids.has_key(item.parent_id): item.parent_permalink = '#' + utils.to36(item.parent_id) else: item.parent_permalink = parent.make_anchored_permalink(item.link, item.subreddit) else: item.parent_permalink = None item.parent_author = None item.can_reply = (item.sr_id in can_reply_srs) # Don't allow users to vote on their own comments item.votable = bool(c.user != item.author) # not deleted on profile pages, # deleted if spam and not author or admin item.deleted = (not c.profilepage and (item._deleted or (item._spam and item.author != c.user and not item.show_spam))) # don't collapse for admins, on profile pages, or if deleted item.collapsed = ((item.score < min_score) and not (c.profilepage or item.deleted or c.user_is_admin)) if not hasattr(item,'editted'): item.editted = False #will get updated in builder item.num_children = 0 item.score_fmt = Score.points item.permalink = item.make_permalink(item.link, item.subreddit)
def GET_oldinfo(self, article, type, dest, rest=None, comment=''): """Legacy: supporting permalink pages from '06, and non-search-engine-friendly links""" if not (dest in ('comments','related','details')): dest = 'comments' if type == 'ancient': #this could go in config, but it should never change max_link_id = 10000000 new_id = max_link_id - int(article._id) return self.redirect('/info/' + to36(new_id) + '/' + rest) if type == 'old': new_url = "/%s/%s/%s" % \ (dest, article._id36, quote_plus(title_to_url(article.title).encode('utf-8'))) if not c.default_sr: new_url = "/r/%s%s" % (c.site.name, new_url) if comment: new_url = new_url + "/%s" % comment._id36 if c.extension: new_url = new_url + "/.%s" % c.extension new_url = new_url + query_string(request.get) # redirect should be smarter and handle extensions, etc. return self.redirect(new_url, code=301)
def by_sr(cls, sr_id, create=False): try: return cls._byID(to36(sr_id)) except tdb_cassandra.NotFound: if create: return cls._new(sr_id) raise
def cached_query_wrapper(*args): # build the row key from the function name and arguments assert fn.__name__.startswith("get_") row_key_components = [fn.__name__[len('get_'):]] if len(args) > 0: # we want to accept either a Thing or a thing's ID at this # layer, but the query itself should always get just an ID if isinstance(args[0], Thing): args = list(args) args[0] = args[0]._id if isinstance(args[0], (int, long)): serialized = to36(args[0]) else: serialized = str(args[0]) row_key_components.append(serialized) row_key_components.extend(str(x) for x in args[1:]) row_key = '.'.join(row_key_components) query = fn(*args) query_sort = query._sort try: is_precomputed = query.precomputed except AttributeError: is_precomputed = _is_query_precomputed(query) return CachedQuery(model, row_key, query_sort, filter_fn, is_precomputed)
def _get_sr_restriction(sr): """Return a cloudsearch appropriate query string that restricts results to only contain results from self.sr """ bq = [] if (not sr) or sr == All or isinstance(sr, DefaultSR): return None elif isinstance(sr, MultiReddit): bq = ["(or"] for sr_id in sr.sr_ids: bq.append("sr_id:%s" % sr_id) bq.append(")") elif isinstance(sr, DomainSR): bq = ["site:'%s'" % sr.domain] elif sr == Friends: if not c.user_is_loggedin or not c.user.friends: return None bq = ["(or"] # The query limit is roughly 8k bytes. Limit to 200 friends to # avoid getting too close to that limit friend_ids = c.user.friends[:200] friends = ["author_fullname:'%s'" % Account._fullname_from_id36(r2utils.to36(id_)) for id_ in friend_ids] bq.extend(friends) bq.append(")") elif isinstance(sr, ModContribSR): bq = ["(or"] for sr_id in sr.sr_ids: bq.append("sr_id:%s" % sr_id) bq.append(")") elif not isinstance(sr, FakeSubreddit): bq = ["sr_id:%s" % sr._id] return " ".join(bq)
def _restrict_sr(sr): '''Return a cloudsearch appropriate query string that restricts results to only contain results from self.sr ''' if isinstance(sr, MultiReddit): if not sr.sr_ids: raise InvalidQuery srs = ["sr_id:%s" % sr_id for sr_id in sr.sr_ids] return "(or %s)" % ' '.join(srs) elif isinstance(sr, DomainSR): return "site:'%s'" % sr.domain elif isinstance(sr, FriendsSR): if not c.user_is_loggedin or not c.user.friends: raise InvalidQuery # The query limit is roughly 8k bytes. Limit to 200 friends to # avoid getting too close to that limit friend_ids = c.user.friends[:200] friends = ["author_fullname:'%s'" % Account._fullname_from_id36(r2utils.to36(id_)) for id_ in friend_ids] return "(or %s)" % ' '.join(friends) elif not isinstance(sr, FakeSubreddit): return "sr_id:%s" % sr._id return None
def _comment_page_links(comment_page_data): for comment_info in comment_page_data: path = u"/r/{0}/comments/{1}/{2}/".format( comment_info.subreddit, to36(int(comment_info.thing_id)), urllib.quote(title_to_url(comment_info.title).encode("utf-8")), ) yield _absolute_url(path)
def get_house_link_names(cls): now = promote.promo_datetime_now() pws = PromotionWeights.get_campaigns(now) campaign_ids = {pw.promo_idx for pw in pws} q = PromoCampaign._query(PromoCampaign.c._id.in_(campaign_ids), PromoCampaign.c.priority_name == 'house', data=True) return [Link._fullname_from_id36(to36(camp.link_id)) for camp in q]
def get_house_link_names(cls): now = promote.promo_datetime_now() campaign_ids = PromotionWeights.get_campaign_ids(now) q = PromoCampaign._query(PromoCampaign.c._id.in_(campaign_ids), PromoCampaign.c.priority_name == 'house', data=True) link_names = {Link._fullname_from_id36(to36(camp.link_id)) for camp in q} return sorted(link_names, reverse=True)
def _desired_things(items, types): '''Pull fullnames that represent instances of 'types' out of items''' # This will fail if the _type_id for some things is >36 fullnames = set() type_ids = [r2utils.to36(type_._type_id) for type_ in types] for item in items: if item['fullname'][1] in type_ids: fullnames.add(item['fullname']) return fullnames
def to_serializable(self, sr, author, current_user=None): return { 'id': to36(self.id), 'date': self.date.isoformat(), 'author': to_serializable_author(author, sr, current_user, self.is_author_hidden), 'body': safemarkdown(self.body), 'isInternal': self.is_internal }
def fullname_regex(thing_cls = None, multiple = False): pattern = "[%s%s]" % (Relation._type_prefix, Thing._type_prefix) if thing_cls: pattern += utils.to36(thing_cls._type_id) else: pattern += r"[0-9a-z]+" pattern += r"_[0-9a-z]+" if multiple: pattern = r"(%s *,? *)+" % pattern return re.compile(r"\A" + pattern + r"\Z")
def new_comment(comment, inbox_rels): author = Account._byID(comment.author_id) job = [ get_comments(author, "new", "all"), get_comments(author, "top", "all"), get_comments(author, "controversial", "all"), ] sr = Subreddit._byID(comment.sr_id) with CachedQueryMutator() as m: if comment._deleted: job_key = "delete_items" job.append(get_sr_comments(sr)) m.delete(get_all_comments(), [comment]) else: job_key = "insert_items" if comment._spam: m.insert(get_spam_comments(sr), [comment]) if was_spam_filtered(comment): m.insert(get_spam_filtered_comments(sr), [comment]) if utils.to36(comment.link_id) in g.live_config["fastlane_links"]: amqp.add_item("new_fastlane_comment", comment._fullname) else: amqp.add_item("new_comment", comment._fullname) if not g.amqp_host: add_comment_tree([comment]) job_dict = {job_key: comment} add_queries(job, **job_dict) # note that get_all_comments() is updated by the amqp process # r2.lib.db.queries.run_new_comments (to minimise lock contention) if inbox_rels: for inbox_rel in tup(inbox_rels): inbox_owner = inbox_rel._thing1 if inbox_rel._name == "inbox": query = get_inbox_comments(inbox_owner) elif inbox_rel._name == "selfreply": query = get_inbox_selfreply(inbox_owner) else: raise ValueError("wtf is " + inbox_rel._name) if not comment._deleted: m.insert(query, [inbox_rel]) else: m.delete(query, [inbox_rel]) set_unread(comment, inbox_owner, unread=not comment._deleted, mutator=m)
def add_target_fields(self, target): if not target: return from r2.models import Comment, Link, Message self.add("target_id", target._id) self.add("target_fullname", target._fullname) self.add("target_age_seconds", target._age.total_seconds()) target_type = target.__class__.__name__.lower() if target_type == "link" and target.is_self: target_type = "self" self.add("target_type", target_type) # If the target is an Account or Subreddit (or has a "name" attr), # add the target_name if hasattr(target, "name"): self.add("target_name", target.name) # Add info about the target's author for comments, links, & messages if isinstance(target, (Comment, Link, Message)): author = target.author_slow if target._deleted or author._deleted: self.add("target_author_id", 0) self.add("target_author_name", "[deleted]") else: self.add("target_author_id", author._id) self.add("target_author_name", author.name) # Add info about the url being linked to for link posts if isinstance(target, Link): self.add("target_title", target.title) if not target.is_self: self.add("target_url", target.url) self.add("target_url_domain", target.link_domain()) # Add info about the link being commented on for comments if isinstance(target, Comment): link_fullname = Link._fullname_from_id36(to36(target.link_id)) self.add("link_id", target.link_id) self.add("link_fullname", link_fullname) # Add info about when target was originally posted for links/comments if isinstance(target, (Comment, Link)): self.add("target_created_ts", _datetime_to_millis(target._date)) hooks.get_hook("eventcollector.add_target_fields").call( event=self, target=target, )
def migrate_srmember_subscribers(after_user_id=39566712): columns = {} rowkey = None proc_time = time.time() for i, rel in enumerate(get_srmembers(after_user_id)): sr_id = rel._thing1_id user_id = rel._thing2_id action_date = rel._date new_rowkey = to36(user_id) if new_rowkey != rowkey and columns: SubscribedSubredditsByAccount._cf.insert( rowkey, columns, timestamp=1434403336829573) columns = {} columns[to36(sr_id)] = action_date rowkey = new_rowkey if i % 1000 == 0: new_proc_time = time.time() duration = new_proc_time - proc_time print "%s (%.3f): %s - %s" % (i, duration, user_id, action_date) proc_time = new_proc_time
def ordered_msg_and_action_ids(self): order_elements = self.messages + self.mod_actions ordered_elements = sorted(order_elements, key=lambda x: x.date) ordered_id_array = [] for element in ordered_elements: key = 'messages' if isinstance(element, ModmailConversationAction): key = 'modActions' ordered_id_array.append({ 'key': key, 'id': to36(element.id) }) return ordered_id_array
def get_scheduled(date, sr_name=""): all_promotions = PromotionWeights.get_campaigns(date) fp_promotions = [p for p in all_promotions if p.sr_name == sr_name] campaigns = PromoCampaign._byID([i.promo_idx for i in fp_promotions], return_dict=False, data=True) links = Link._by_fullname([i.thing_name for i in fp_promotions], return_dict=False, data=True) links = {l._id: l for l in links} kept = [] for camp in campaigns: if camp.trans_id == 0: continue link = links[camp.link_id] if link._spam or not promote.is_accepted(link): continue kept.append(camp._id) return [ ("%s_%s" % (PC_PREFIX, to36(p.promo_idx)), p.thing_name, p.bid) for p in fp_promotions if p.promo_idx in kept ]
def _fullname(self): return "t%s_%s" % (utils.to36(Comment._type_id), self._id36)
def _id36(self): return utils.to36(self.children[0]) if self.children else '_'
def make_message_fullname(mid): return "t%s_%s" % (utils.to36(Message._type_id), utils.to36(mid))
def compare_pageviews(daysago=0, verbose=False): """Evaluate past delivery for promoted links. Check frontpage promoted links for their actual delivery compared to what would be expected based on their bids. """ date = (datetime.datetime.now(g.tz) - datetime.timedelta(days=daysago)).date() scheduled = get_scheduled(date) pageviews_by_camp = get_campaign_pageviews(date) campaigns = filter_campaigns(date, pageviews_by_camp.keys()) actual = [] for camp in campaigns: link_fullname = '%s_%s' % (LINK_PREFIX, to36(camp.link_id)) i = (camp._fullname, link_fullname, pageviews_by_camp[camp._fullname]) actual.append(i) scheduled_links = {link for camp, link, pageviews in scheduled} actual_links = {link for camp, link, pageviews in actual} bid_by_link = defaultdict(int) total_bid = 0 pageviews_by_link = defaultdict(int) total_pageviews = 0 for camp, link, bid in scheduled: if link not in actual_links: if verbose: print '%s not found in actual, skipping' % link continue bid_by_link[link] += bid total_bid += bid for camp, link, pageviews in actual: # not ideal: links shouldn't be here if link not in scheduled_links: if verbose: print '%s not found in schedule, skipping' % link continue pageviews_by_link[link] += pageviews total_pageviews += pageviews errors = [] for link, bid in sorted(bid_by_link.items(), key=lambda t: t[1]): pageviews = pageviews_by_link.get(link, 0) expected = bid / total_bid realized = float(pageviews) / total_pageviews difference = (realized - expected) / expected errors.append(difference) if verbose: print '%s - %s - %s - %s' % (link, expected, realized, difference) mean_error, min_error, max_error, stdev_error = error_statistics(errors) print '%s' % date print('error %s max, %s min, %s +- %s' % (max_error, min_error, mean_error, stdev_error)) print 'total bid %s' % total_bid print('pageviews for promoted links targeted only to frontpage %s' % total_pageviews) print('frontpage pageviews for all promoted links %s' % sum(pageviews_by_camp.values())) print 'promoted eligible pageviews %s' % get_frontpage_pageviews(date)
def _key(link): return utils.to36(link._id)
def _new(cls, sr_id, flair_type=USER_FLAIR): idx = cls(_id=to36(sr_id), sr_id=sr_id) idx._commit() return idx
def get_recommended_content(prefs, src, settings): """Get a mix of content from subreddits recommended for someone with the given preferences (likes and dislikes.) Returns a list of ExploreItems. """ # numbers chosen empirically to give enough results for explore page num_liked = 10 # how many liked srs to use when generating the recs num_recs = 20 # how many recommended srs to ask for num_discovery = 2 # how many discovery-related subreddits to mix in num_rising = 4 # how many rising links to mix in num_items = 20 # total items to return rising_items = discovery_items = comment_items = hot_items = [] # make a list of srs that shouldn't be recommended default_srid36s = [to36(srid) for srid in Subreddit.default_subreddits()] omit_srid36s = list( prefs.likes.union(prefs.dislikes, prefs.recent_views, default_srid36s)) # pick random subset of the user's liked srs liked_srid36s = random_sample(prefs.likes, num_liked) if settings.personalized else [] # pick random subset of discovery srs candidates = set(get_discovery_srid36s()).difference(prefs.dislikes) discovery_srid36s = random_sample(candidates, num_discovery) # multiget subreddits to_fetch = liked_srid36s + discovery_srid36s srs = Subreddit._byID36(to_fetch) liked_srs = [srs[sr_id36] for sr_id36 in liked_srid36s] discovery_srs = [srs[sr_id36] for sr_id36 in discovery_srid36s] if settings.personalized: # generate recs from srs we know the user likes recommended_srs = get_recommendations(liked_srs, count=num_recs, to_omit=omit_srid36s, source=src, match_set=False, over18=settings.nsfw) random.shuffle(recommended_srs) # split list of recommended srs in half midpoint = len(recommended_srs) / 2 srs_slice1 = recommended_srs[:midpoint] srs_slice2 = recommended_srs[midpoint:] # get hot links plus top comments from one half comment_items = get_comment_items(srs_slice1, src) # just get hot links from the other half hot_items = get_hot_items(srs_slice2, TYPE_HOT, src) if settings.discovery: # get links from subreddits dedicated to discovery discovery_items = get_hot_items(discovery_srs, TYPE_DISCOVERY, 'disc') if settings.rising: # grab some (non-personalized) rising items omit_sr_ids = set(int(id36, 36) for id36 in omit_srid36s) rising_items = get_rising_items(omit_sr_ids, count=num_rising) # combine all items and randomize order to get a mix of types all_recs = list( chain(rising_items, comment_items, discovery_items, hot_items)) random.shuffle(all_recs) # make sure subreddits aren't repeated seen_srs = set() recs = [] for r in all_recs: if not settings.nsfw and r.is_over18(): continue if not is_visible(r.sr): # could happen in rising items continue if r.sr._id not in seen_srs: recs.append(r) seen_srs.add(r.sr._id) if len(recs) >= num_items: break return recs
def bulk_upsert(links): updates = filter(lambda user: getattr(user, "dfp_creative_id", False), links) inserts = filter(lambda user: not getattr(user, "dfp_creative_id", False), links) dfp_creative_service = DfpService("CreativeService") creatives = [] if updates: existing_creatives = {} statement = dfp.FilterStatement( "WHERE id IN (%s)" % ", ".join([str(link.dfp_creative_id) for link in updates])) while True: response = dfp_creative_service.execute( "getCreativesByStatement", statement.ToStatement(), ) if "results" in response: for creative in response["results"]: existing_creatives[creative.id] = creative statement.offset += dfp.SUGGESTED_PAGE_LIMIT else: break updated = dfp_creative_service.execute("updateCreatives", [ _link_to_creative( link=link, existing=existing_creatives[link.dfp_creative_id], ) for link in updates ]) creatives += updated if inserts: authors = Account._byID([link.author_id for link in inserts], return_dict=False) advertisers = advertisers_service.bulk_upsert(authors) advertisers_by_author = { advertiser.externalId: advertiser for advertiser in advertisers } inserted = dfp_creative_service.execute("createCreatives", [ _link_to_creative( link=link, advertiser=advertisers_by_author[Account._fullname_from_id36( to36(link.author_id))], ) for link in inserts ]) creatives += inserted creatives_by_fullname = { utils.get_template_variable(creative, "link_id"): creative for creative in creatives } for link in links: creative = creatives_by_fullname[link._fullname] link.dfp_creative_id = creative.id link._commit() return creatives
def _id36(self): return to36(self._id)
def _fullname_prefix(cls): return cls._type_prefix + to36(cls._type_id)
def _fullname_from_id36(cls, id36): return cls._type_prefix + to36(cls._type_id) + '_' + id36
def _key(link): revision = getattr(link, 'comment_tree_id', 0) if revision: return '%s:%s' % (utils.to36(link._id), utils.to36(revision)) else: return utils.to36(link._id)
def add_props(cls, user, wrapped): #fetch parent links links = Link._byID(set(l.link_id for l in wrapped), True) #get srs for comments that don't have them (old comments) for cm in wrapped: if not hasattr(cm, 'sr_id'): cm.sr_id = links[cm.link_id].sr_id subreddits = Subreddit._byID(set(cm.sr_id for cm in wrapped), data=True, return_dict=False) can_reply_srs = set(s._id for s in subreddits if s.can_comment(user)) min_score = c.user.pref_min_comment_score cids = dict((w._id, w) for w in wrapped) for item in wrapped: item.link = links.get(item.link_id) if not hasattr(item, 'subreddit'): item.subreddit = item.subreddit_slow if hasattr(item, 'parent_id'): parent = Comment._byID(item.parent_id, data=True) parent_author = Account._byID(parent.author_id, data=True) item.parent_author = parent_author if not c.full_comment_listing and cids.has_key(item.parent_id): item.parent_permalink = '#' + utils.to36(item.parent_id) else: item.parent_permalink = parent.make_anchored_permalink( item.link, item.subreddit) else: item.parent_permalink = None item.parent_author = None item.can_reply = (item.sr_id in can_reply_srs) # Don't allow users to vote on their own comments item.votable = bool(c.user != item.author and not item.retracted) if item.votable and c.profilepage: # Can only vote on profile page under certain conditions item.votable = bool( (c.user.safe_karma > g.karma_to_vote_in_overview) and (g.karma_percentage_to_be_voted > item.author.percent_up())) # not deleted on profile pages, # deleted if spam and not author or admin item.deleted = (not c.profilepage and (item._deleted or (item._spam and item.author != c.user and not item.show_spam))) # don't collapse for admins, on profile pages, or if deleted item.collapsed = ( (item.score < min_score) and not (c.profilepage or item.deleted or c.user_is_admin)) if not hasattr(item, 'editted'): item.editted = False #will get updated in builder item.num_children = 0 item.score_fmt = Score.points item.permalink = item.make_permalink(item.link, item.subreddit) item.can_be_deleted = item.can_delete()
def new(cls, id, title, **properties): if not id: id = utils.to36(simpleflake.simpleflake()) event = cls(id, title=title, **properties) event._commit() return event
def _make_wrapped_tree(self): timer = self.timer comments = self.comments cid_tree = self.cid_tree top_level_candidates = self.top_level_candidates depth = self.depth more_recursions = self.more_recursions offset_depth = self.offset_depth dont_collapse = self.dont_collapse timer.intermediate("waiting") if not comments and not top_level_candidates: timer.stop() return [] # retrieve num_children for the visible comments needs_num_children = [c._id for c in comments] + top_level_candidates num_children = get_num_children(needs_num_children, cid_tree) timer.intermediate("calc_num_children") wrapped = self.wrap_items(comments) timer.intermediate("wrap_comments") wrapped_by_id = {comment._id: comment for comment in wrapped} if self.children: # rewrite the parent links to use anchor tags for comment_id in self.children: if comment_id in wrapped_by_id: item = wrapped_by_id[comment_id] if item.parent_id: item.parent_permalink = '#' + to36(item.parent_id) final = [] # We have some special collapsing rules for the Q&A sort type. # However, we want to show everything when we're building a specific # set of children (like from "load more" links) or when viewing a # comment permalink. qa_sort_hiding = ((self.sort.col == '_qa') and not self.children and self.comment is None) if qa_sort_hiding: special_responder_ids = self.link.responder_ids else: special_responder_ids = () max_relation_walks = g.max_comment_parent_walk for comment in wrapped: # skip deleted comments with no children if (comment.deleted and not cid_tree.has_key(comment._id) and not self.show_deleted): comment.hidden_completely = True continue comment.num_children = num_children[comment._id] comment.edits_visible = self.edits_visible parent = wrapped_by_id.get(comment.parent_id) if qa_sort_hiding: author_is_special = comment.author_id in special_responder_ids else: author_is_special = False # In the Q&A sort type, we want to collapse all comments other than # those that are: # # 1. Top-level comments, # 2. Responses from the OP(s), # 3. Responded to by the OP(s) (dealt with below), # 4. Within one level of an OP reply, or # 5. Otherwise normally prevented from collapse (eg distinguished # comments). if (qa_sort_hiding and depth[comment._id] != 0 and # (1) not author_is_special and # (2) not (parent and parent.author_id in special_responder_ids and feature.is_enabled('qa_show_replies')) and # (4) not comment.prevent_collapse): # (5) comment.hidden = True if comment.collapsed: if comment._id in dont_collapse or author_is_special: comment.collapsed = False comment.hidden = False if parent: if author_is_special: # Un-collapse parents as necessary. It's a lot easier to # do this here, upwards, than to check through all the # children when we were iterating at the parent. ancestor = parent counter = 0 while (ancestor and not getattr(ancestor, 'walked', False) and counter < max_relation_walks): ancestor.hidden = False # In case we haven't processed this comment yet. ancestor.prevent_collapse = True # This allows us to short-circuit when the rest of the # tree has already been uncollapsed. ancestor.walked = True ancestor = wrapped_by_id.get(ancestor.parent_id) counter += 1 # One more time through to actually add things to the final list. We # couldn't do that the first time because in the Q&A sort we don't know # if a comment should be visible until after we've processed all its # children. for comment in wrapped: if getattr(comment, 'hidden_completely', False): # Don't add it to the tree, don't put it in "load more", don't # acknowledge its existence at all. continue if getattr(comment, 'hidden', False): # Remove it from the list of visible comments so it'll # automatically be a candidate for the "load more" links. del wrapped_by_id[comment._id] # And don't add it to the tree. continue # add the comment as a child of its parent or to the top level of # the tree if it has no parent parent = wrapped_by_id.get(comment.parent_id) if parent: if not hasattr(parent, 'child'): add_child_listing(parent, comment) else: parent.child.things.append(comment) else: final.append(comment) for parent_id, more_recursion in more_recursions.iteritems(): if parent_id not in wrapped_by_id: continue parent = wrapped_by_id[parent_id] add_child_listing(parent, more_recursion) timer.intermediate("build_comments") if not self.load_more: timer.stop() return final # build MoreChildren for visible comments visible_comments = wrapped_by_id.keys() for visible_id in visible_comments: if visible_id in more_recursions: # don't add a MoreChildren if we already have a MoreRecursion continue children = cid_tree.get(visible_id, ()) missing_children = [ child for child in children if child not in visible_comments ] if missing_children: visible_children = (child for child in children if child in visible_comments) visible_count = sum(1 + num_children[child] for child in visible_children) missing_count = num_children[visible_id] - visible_count missing_depth = depth.get(visible_id, 0) + 1 - offset_depth if missing_depth < self.max_depth: mc = MoreChildren(self.link, self.sort, depth=missing_depth, parent_id=visible_id) mc.children.extend(missing_children) w = Wrapped(mc) w.count = missing_count else: mr = MoreRecursion(self.link, depth=missing_depth, parent_id=visible_id) w = Wrapped(mr) # attach the MoreChildren parent = wrapped_by_id[visible_id] if hasattr(parent, 'child'): parent.child.things.append(w) else: add_child_listing(parent, w) # build MoreChildren for missing root level comments if top_level_candidates: mc = MoreChildren(self.link, self.sort, depth=0, parent_id=None) mc.children.extend(top_level_candidates) w = Wrapped(mc) w.count = sum(1 + num_children[comment] for comment in top_level_candidates) final.append(w) if isinstance(self.sort, operators.shuffled): shuffle(final) timer.intermediate("build_morechildren") timer.stop() return final
def sort_comments_key(link_id, sort): assert sort.startswith('_') return '%s%s' % (to36(link_id), sort)
def add_props(cls, user, wrapped): from r2.lib.template_helpers import add_attr from r2.lib import promote #fetch parent links links = Link._byID(set(l.link_id for l in wrapped), data=True, return_dict=True) #get srs for comments that don't have them (old comments) for cm in wrapped: if not hasattr(cm, 'sr_id'): cm.sr_id = links[cm.link_id].sr_id subreddits = Subreddit._byID(set(cm.sr_id for cm in wrapped), data=True, return_dict=False) cids = dict((w._id, w) for w in wrapped) parent_ids = set( cm.parent_id for cm in wrapped if getattr(cm, 'parent_id', None) and cm.parent_id not in cids) parents = {} if parent_ids: parents = Comment._byID(parent_ids, data=True) can_reply_srs = set(s._id for s in subreddits if s.can_comment(user)) \ if c.user_is_loggedin else set() can_reply_srs.add(promote.get_promote_srid()) min_score = user.pref_min_comment_score profilepage = c.profilepage user_is_admin = c.user_is_admin user_is_loggedin = c.user_is_loggedin focal_comment = c.focal_comment for item in wrapped: # for caching: item.profilepage = c.profilepage item.link = links.get(item.link_id) if (item.link._score <= 1 or item.score < 3 or item.link._spam or item._spam or item.author._spam): item.nofollow = True else: item.nofollow = False if not hasattr(item, 'subreddit'): item.subreddit = item.subreddit_slow if item.author_id == item.link.author_id and not item.link._deleted: add_attr(item.attribs, 'S', link=item.link.make_permalink(item.subreddit)) if not hasattr(item, 'target'): item.target = None if item.parent_id: if item.parent_id in cids: item.parent_permalink = '#' + utils.to36(item.parent_id) else: parent = parents[item.parent_id] item.parent_permalink = parent.make_permalink( item.link, item.subreddit) else: item.parent_permalink = None item.can_reply = c.can_reply or (item.sr_id in can_reply_srs) # not deleted on profile pages, # deleted if spam and not author or admin item.deleted = ( not profilepage and (item._deleted or (item._spam and item.author != user and not item.show_spam))) extra_css = '' if item.deleted: extra_css += "grayed" if not user_is_admin: item.author = DeletedUser() item.body = '[deleted]' if focal_comment == item._id36: extra_css += " border" # don't collapse for admins, on profile pages, or if deleted item.collapsed = ( (item.score < min_score) and not (profilepage or item.deleted or user_is_admin)) item.editted = getattr(item, "editted", False) #will get updated in builder item.num_children = 0 item.score_fmt = Score.points item.permalink = item.make_permalink(item.link, item.subreddit) item.is_author = (user == item.author) item.is_focal = (focal_comment == item._id36) #will seem less horrible when add_props is in pages.py from r2.lib.pages import UserText item.usertext = UserText(item, item.body, editable=item.is_author, nofollow=item.nofollow, target=item.target, extra_css=extra_css) # Run this last Printable.add_props(user, wrapped)