def add_comments(comments): """Add comments to the CommentTree and update scores.""" from r2.models.builder import write_comment_orders link_ids = [comment.link_id for comment in tup(comments)] links = Link._byID(link_ids, data=True) comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) for link_id, link_comments in comments_by_link_id.iteritems(): link = links[link_id] timer = g.stats.get_timer( 'comment_tree.add.%s' % link.comment_tree_version) timer.start() # write scores before CommentTree because the scores must exist for all # comments in the tree for sort in ("_controversy", "_confidence", "_score"): scores_by_comment = { comment._id36: getattr(comment, sort) for comment in link_comments } CommentScoresByLink.set_scores(link, sort, scores_by_comment) scores_by_comment = _get_qa_comment_scores(link, link_comments) CommentScoresByLink.set_scores(link, "_qa", scores_by_comment) timer.intermediate('scores') with CommentTree.mutation_context(link, timeout=180): try: timer.intermediate('lock') comment_tree = CommentTree.by_link(link, timer) timer.intermediate('get') comment_tree.add_comments(link_comments) timer.intermediate('update') except InconsistentCommentTreeError: # failed to add a comment to the CommentTree because its parent # is missing from the tree. this comment will be lost forever # unless a rebuild is performed. comment_ids = [comment._id for comment in link_comments] g.log.error( "comment_tree_inconsistent: %s %s" % (link, comment_ids)) g.stats.simple_event('comment_tree_inconsistent') return # do this under the same lock because we want to ensure we are using # the same version of the CommentTree as was just written write_comment_orders(link) timer.intermediate('write_order') timer.stop()
def add_comments(comments): """Add comments to the CommentTree and update scores.""" from r2.models.builder import write_comment_orders link_ids = [comment.link_id for comment in tup(comments)] links = Link._byID(link_ids, data=True) comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) for link_id, link_comments in comments_by_link_id.iteritems(): link = links[link_id] timer = g.stats.get_timer("comment_tree.add.1") timer.start() # write scores before CommentTree because the scores must exist for all # comments in the tree for sort in ("_controversy", "_confidence", "_score"): scores_by_comment = {comment._id36: getattr(comment, sort) for comment in link_comments} CommentScoresByLink.set_scores(link, sort, scores_by_comment) scores_by_comment = _get_qa_comment_scores(link, link_comments) CommentScoresByLink.set_scores(link, "_qa", scores_by_comment) timer.intermediate("scores") CommentTree.add_comments(link, link_comments) timer.intermediate("update") write_comment_orders(link) timer.intermediate("write_order") timer.stop()
def add_comments(comments): """Add comments to the CommentTree and update scores.""" from r2.models.builder import write_comment_orders link_ids = [comment.link_id for comment in tup(comments)] links_by_id = Link._byID(link_ids) comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) for link_id, link_comments in comments_by_link_id.iteritems(): link = links_by_id[link_id] timer = g.stats.get_timer('comment_tree.add.1') timer.start() write_comment_scores(link, link_comments) timer.intermediate('scores') CommentTree.add_comments(link, link_comments) timer.intermediate('update') write_comment_orders(link) timer.intermediate('write_order') timer.stop()
def add_comments(comments): links = Link._byID([com.link_id for com in tup(comments)], data=True) comments = tup(comments) link_map = {} for com in comments: link_map.setdefault(com.link_id, []).append(com) for link_id, coms in link_map.iteritems(): link = links[link_id] timer = g.stats.get_timer('comment_tree.add.%s' % link.comment_tree_version) timer.start() try: with CommentTree.mutation_context(link): timer.intermediate('lock') cache = get_comment_tree(link, timer=timer) timer.intermediate('get') cache.add_comments(coms) timer.intermediate('update') except: g.log.exception( 'add_comments_nolock failed for link %s, recomputing tree', link_id) # calculate it from scratch get_comment_tree(link, _update=True, timer=timer) timer.stop() update_comment_votes(coms)
def _fast_query(cls, thing1_ids, thing2_ids, **kw): """Find all of the relations of this class between all of the members of thing1_ids and thing2_ids""" thing1_ids, thing1s_is_single = tup(thing1_ids, True) thing2_ids, thing2s_is_single = tup(thing2_ids, True) # permute all of the pairs ids = set(('%s_%s' % (x, y)) for x in thing1_ids for y in thing2_ids) rels = cls._byID(ids).values() # does anybody actually use us this way? if thing1s_is_single and thing2s_is_single: if rels: assert len(rels) == 1 return rels[0] else: raise NotFound("<%s '%s_%s'>" % (cls.__name__, thing1_ids[0], thing2_ids[0])) return dict(((rel.thing1_id, rel.thing2_id), rel) for rel in rels)
def get_actions(cls, srs, mod=None, action=None, after=None, reverse=False, count=1000): """ Get a ColumnQuery that yields ModAction objects according to specified criteria. """ if after and isinstance(after, basestring): after = cls._byID(UUID(after)) elif after and isinstance(after, UUID): after = cls._byID(after) if not isinstance(after, cls): after = None srs = tup(srs) if not mod and not action: rowkeys = [sr._id36 for sr in srs] q = ModActionBySR.query(rowkeys, after=after, reverse=reverse, count=count) elif mod: mods = tup(mod) key = '%s_%s' if not action else '%%s_%%s_%s' % action rowkeys = itertools.product([sr._id36 for sr in srs], [mod._id36 for mod in mods]) rowkeys = [key % (sr, mod) for sr, mod in rowkeys] view = ModActionBySRActionMod if action else ModActionBySRMod q = view.query(rowkeys, after=after, reverse=reverse, count=count) else: rowkeys = ['%s_%s' % (sr._id36, action) for sr in srs] q = ModActionBySRAction.query(rowkeys, after=after, reverse=reverse, count=count) return q
def _somethinged(cls, rel, user, link, name): return rel._fast_query( tup(user), tup(link), name=name, thing_data=True, timestamp_optimize=True)
def add_queries(queries, insert_items=None, delete_items=None, foreground=False): """Adds multiple queries to the query queue. If insert_items or delete_items is specified, the query may not need to be recomputed against the database.""" if not g.write_query_queue: return for q in queries: if insert_items and q.can_insert(): log.debug("Inserting %s into query %s" % (insert_items, q)) if foreground: q.insert(insert_items) else: worker.do(q.insert, insert_items) elif delete_items and q.can_delete(): log.debug("Deleting %s from query %s" % (delete_items, q)) if foreground: q.delete(delete_items) else: worker.do(q.delete, delete_items) else: raise Exception("Cannot update query %r!" % (q,)) # dual-write any queries that are being migrated to the new query cache with CachedQueryMutator() as m: new_queries = [getattr(q, 'new_query') for q in queries if hasattr(q, 'new_query')] if insert_items: for query in new_queries: m.insert(query, tup(insert_items)) if delete_items: for query in new_queries: m.delete(query, tup(delete_items))
def get_recommendations(srs, count=10, source=SRC_MULTIREDDITS, to_omit=None, match_set=True, over18=False): """Return subreddits recommended if you like the given subreddits. Args: - srs is one Subreddit object or a list of Subreddits - count is total number of results to return - source is a prefix telling which set of recommendations to use - to_omit is a single or list of subreddit id36s that should not be be included. (Useful for omitting recs that were already rejected.) - match_set=True will return recs that are similar to each other, useful for matching the "theme" of the original set - over18 content is filtered unless over18=True or one of the original srs is over18 """ srs = tup(srs) to_omit = tup(to_omit) if to_omit else [] # fetch more recs than requested because some might get filtered out rec_id36s = SRRecommendation.for_srs([sr._id36 for sr in srs], to_omit, count * 2, source, match_set=match_set) # always check for private subreddits at runtime since type might change rec_srs = Subreddit._byID36(rec_id36s, return_dict=False) filtered = [sr for sr in rec_srs if is_visible(sr)] # don't recommend adult srs unless one of the originals was over_18 if not over18 and not any(sr.over_18 for sr in srs): filtered = [sr for sr in filtered if not sr.over_18] return filtered[:count]
def add_comments(comments): links = Link._byID([com.link_id for com in tup(comments)], data=True) comments = tup(comments) link_map = {} for com in comments: link_map.setdefault(com.link_id, []).append(com) for link_id, coms in link_map.iteritems(): link = links[link_id] add_comments = [comment for comment in coms if not comment._deleted] delete_comments = (comment for comment in coms if comment._deleted) timer = g.stats.get_timer('comment_tree.add.%s' % link.comment_tree_version) timer.start() try: with CommentTree.mutation_context(link, timeout=30): timer.intermediate('lock') cache = get_comment_tree(link, timer=timer) timer.intermediate('get') if add_comments: cache.add_comments(add_comments) for comment in delete_comments: cache.delete_comment(comment, link) timer.intermediate('update') except InconsistentCommentTreeError: comment_ids = [comment._id for comment in coms] g.log.exception( 'add_comments_nolock failed for link %s %s, recomputing', link_id, comment_ids) rebuild_comment_tree(link, timer=timer) timer.stop() update_comment_votes(coms)
def _fast_query(cls, thing1_ids, thing2_ids, properties=None, **kw): """Find all of the relations of this class between all of the members of thing1_ids and thing2_ids""" thing1_ids, thing1s_is_single = tup(thing1_ids, True) thing2_ids, thing2s_is_single = tup(thing2_ids, True) if not thing1_ids or not thing2_ids: # nothing to permute return {} if properties is not None: properties = set(properties) # all relations must load these properties, even if # unrequested properties.add("thing1_id") properties.add("thing2_id") # permute all of the pairs ids = set(cls._rowkey(x, y) for x in thing1_ids for y in thing2_ids) rels = cls._byID(ids, properties=properties).values() if thing1s_is_single and thing2s_is_single: if rels: assert len(rels) == 1 return rels[0] else: raise NotFound("<%s %r>" % (cls.__name__, cls._rowkey(thing1_ids[0], thing2_ids[0]))) return dict(((rel.thing1_id, rel.thing2_id), rel) for rel in rels)
def get_recommendations(srs, count=10, source=SRC_MULTIREDDITS, to_omit=None): """Return subreddits recommended if you like the given subreddits. Args: - srs is one Subreddit object or a list of Subreddits - count is total number of results to return - source is a prefix telling which set of recommendations to use - to_omit is one Subreddit object or a list of Subreddits that should not be included. (Useful for omitting recs that were already rejected.) """ srs = tup(srs) to_omit = tup(to_omit) if to_omit else [] # fetch more recs than requested because some might get filtered out rec_id36s = SRRecommendation.for_srs([sr._id36 for sr in srs], [o._id36 for o in to_omit], count * 2, source) # always check for private subreddits at runtime since type might change rec_srs = Subreddit._byID36(rec_id36s, return_dict=False) filtered = [sr for sr in rec_srs if sr.type != "private"] # don't recommend adult srs unless one of the originals was over_18 if not any(sr.over_18 for sr in srs): filtered = [sr for sr in filtered if not sr.over_18] return filtered[:count]
def add_comments(comments): links = Link._byID([com.link_id for com in tup(comments)], data=True) comments = tup(comments) link_map = {} for com in comments: link_map.setdefault(com.link_id, []).append(com) for link_id, coms in link_map.iteritems(): link = links[link_id] add_comments = [comment for comment in coms if not comment._deleted] delete_comments = (comment for comment in coms if comment._deleted) timer = g.stats.get_timer("comment_tree.add.%s" % link.comment_tree_version) timer.start() try: with CommentTree.mutation_context(link): timer.intermediate("lock") cache = get_comment_tree(link, timer=timer) timer.intermediate("get") if add_comments: cache.add_comments(add_comments) for comment in delete_comments: cache.delete_comment(comment, link) timer.intermediate("update") except: g.log.exception("add_comments_nolock failed for link %s, recomputing tree", link_id) # calculate it from scratch get_comment_tree(link, _update=True, timer=timer) timer.stop() update_comment_votes(coms)
def get_actions(cls, srs, mod=None, action=None, after=None, reverse=False, count=1000): """ Get a ColumnQuery that yields ModAction objects according to specified criteria. """ if after and isinstance(after, basestring): after = cls._byID(UUID(after)) elif after and isinstance(after, UUID): after = cls._byID(after) if not isinstance(after, cls): after = None srs = tup(srs) if not mod and not action: rowkeys = [sr._id36 for sr in srs] q = ModActionBySR.query(rowkeys, after=after, reverse=reverse, count=count) elif mod and not action: mods = tup(mod) rowkeys = itertools.product([sr._id36 for sr in srs], [mod._id36 for mod in mods]) rowkeys = ['%s_%s' % (sr, mod) for sr, mod in rowkeys] q = ModActionBySRMod.query(rowkeys, after=after, reverse=reverse, count=count) elif not mod and action: rowkeys = ['%s_%s' % (sr._id36, action) for sr in srs] q = ModActionBySRAction.query(rowkeys, after=after, reverse=reverse, count=count) else: raise NotImplementedError("Can't query by both mod and action") return q
def _fast_query(cls, sub, obj, name, data=True, eager_load=True, thing_data=False, timestamp_optimize=False): # divide into types def type_dict(items): types = {} for i in items: types.setdefault(i.__class__, []).append(i) return types sub_dict = type_dict(tup(sub)) obj_dict = type_dict(tup(obj)) # for each pair of types, see if we have a query to send res = {} for types, rel in cls.rels.iteritems(): t1, t2 = types if sub_dict.has_key(t1) and obj_dict.has_key(t2): res.update( rel._fast_query( sub_dict[t1], obj_dict[t2], name, data=data, eager_load=eager_load, thing_data=thing_data, timestamp_optimize=timestamp_optimize, ) ) return res
def _fast_query(cls, thing1s, thing2s, name, data=True, eager_load=True, thing_data=False): """looks up all the relationships between thing1_ids and thing2_ids and caches them""" prefix = thing_prefix(cls.__name__) thing1_dict = dict((t._id, t) for t in tup(thing1s)) thing2_dict = dict((t._id, t) for t in tup(thing2s)) thing1_ids = thing1_dict.keys() thing2_ids = thing2_dict.keys() name = tup(name) # permute all of the pairs pairs = set((x, y, n) for x in thing1_ids for y in thing2_ids for n in name) def lookup_rel_ids(pairs): rel_ids = {} t1_ids = set() t2_ids = set() names = set() for t1, t2, name in pairs: t1_ids.add(t1) t2_ids.add(t2) names.add(name) if t1_ids and t2_ids and names: q = cls._query(cls.c._thing1_id == t1_ids, cls.c._thing2_id == t2_ids, cls.c._name == names) else: q = [] for rel in q: rel_ids[(rel._thing1_id, rel._thing2_id, rel._name)] = rel._id for p in pairs: if p not in rel_ids: rel_ids[p] = None return rel_ids # get the relation ids from the cache or query the db res = sgm(cls._cache, pairs, lookup_rel_ids, prefix) # get the relation objects rel_ids = {rel_id for rel_id in res.itervalues() if rel_id is not None} rels = cls._byID_rel(rel_ids, data=data, eager_load=eager_load, thing_data=thing_data) res_obj = {} for (thing1_id, thing2_id, name), rel_id in res.iteritems(): pair = (thing1_dict[thing1_id], thing2_dict[thing2_id], name) rel = rels[rel_id] if rel_id is not None else None res_obj[pair] = rel return res_obj
def remove_tag(self, tag_name, name='tag'): """Removes a tag from the link. The tag is not deleted, just the relationship between the link and the tag""" try: tag = Tag._by_name(tag_name) except NotFound: return False tags = LinkTag._fast_query(tup(self), tup(tag), name=name) link_tag = tags[(self, tag, name)] if link_tag: link_tag._delete() return link_tag
def run(self, url, sr = None): if sr is None and not isinstance(c.site, FakeSubreddit): sr = c.site elif sr: try: sr = Subreddit._by_name(sr) except NotFound: c.errors.add(errors.SUBREDDIT_NOEXIST) sr = None else: sr = None if not url: return self.error(errors.NO_URL) url = utils.sanitize_url(url) if url == 'self': return url elif url: try: l = Link._by_url(url, sr) self.error(errors.ALREADY_SUB) return utils.tup(l) except NotFound: return url return self.error(errors.BAD_URL)
def validate_list(self, nodes, validators_by_type, ignored_types=None): for node in nodes: if node.type == "error": yield ValidationError(node.source_line, "SYNTAX_ERROR", {"message": node.message}) continue elif node.type == "literal": if node.value == ";": # if we're seeing a semicolon as a literal, it's in a place # that doesn't fit naturally in the syntax. # Safari 5 will treat this as two color properties: # color: calc(;color:red;); message = "semicolons are not allowed in this context" yield ValidationError(node.source_line, "SYNTAX_ERROR", {"message": message}) continue validator = validators_by_type.get(node.type) if validator: for error in tup(validator(node)): if error: yield error else: if not ignored_types or node.type not in ignored_types: yield ValidationError(node.source_line, "UNEXPECTED_TOKEN", {"token": node.type})
def __iter__(self, yield_column_names=False): retrieved = 0 column_start = self.column_start while retrieved < self._limit: try: column_count = min(self._chunk_size, self._limit - retrieved) if column_start: column_count += 1 # cassandra includes column_start r = self.cls._cf.multiget(self.rowkeys, column_start=column_start, column_finish=self.column_finish, column_count=column_count, column_reversed=self.column_reversed) # multiget returns OrderedDict {rowkey: {column_name: column_value}} # combine into single OrderedDict of {column_name: column_value} nrows = len(r.keys()) if nrows == 0: return elif nrows == 1: columns = r.values()[0] else: r_combined = {} for d in r.values(): r_combined.update(d) columns = OrderedDict(sorted(r_combined.items(), key=lambda t: self.sort_key(t[0]), reverse=self.column_reversed)) except NotFoundException: return retrieved += self._chunk_size if column_start: try: del columns[column_start] except KeyError: columns.popitem(last=True) # remove extra column if not columns: return # Convert to list of columns l_columns = [{col_name: columns[col_name]} for col_name in columns] column_start = l_columns[-1].keys()[0] objs = self.column_to_obj(l_columns) if yield_column_names: column_names = [column.keys()[0] for column in l_columns] if len(column_names) == 1: ret = (column_names[0], objs), else: ret = zip(column_names, objs) else: ret = objs ret, is_single = tup(ret, ret_is_single=True) for r in ret: yield r
def sa_op(op): #if BooleanOp if isinstance(op, operators.or_): return sa.or_(*[sa_op(o) for o in op.ops]) elif isinstance(op, operators.and_): return sa.and_(*[sa_op(o) for o in op.ops]) #else, assume op is an instance of op if isinstance(op, operators.eq): fn = lambda x,y: x == y elif isinstance(op, operators.ne): fn = lambda x,y: x != y elif isinstance(op, operators.gt): fn = lambda x,y: x > y elif isinstance(op, operators.lt): fn = lambda x,y: x < y elif isinstance(op, operators.gte): fn = lambda x,y: x >= y elif isinstance(op, operators.lte): fn = lambda x,y: x <= y rval = tup(op.rval) if not rval: return '2+2=5' else: return sa.or_(*[fn(op.lval, v) for v in rval])
def for_srs(cls, srid36, to_omit, count, source, match_set=True): # It's usually better to use get_recommendations() than to call this # function directly because it does privacy filtering. srid36s = tup(srid36) to_omit = set(to_omit) to_omit.update(srid36s) # don't show the originals rowkeys = ['%s.%s' % (source, srid36) for srid36 in srid36s] # fetch multiple sets of recommendations, one for each input srid36 d = sgm(g.cache, rowkeys, SRRecommendation._byID, prefix='srr.') rows = d.values() if match_set: sorted_recs = SRRecommendation._merge_and_sort_by_count(rows) # heuristic: if input set is large, rec should match more than one min_count = math.floor(.1 * len(srid36s)) sorted_recs = (rec[0] for rec in sorted_recs if rec[1] > min_count) else: sorted_recs = SRRecommendation._merge_roundrobin(rows) # remove duplicates and ids listed in to_omit filtered = [] for r in sorted_recs: if r not in to_omit: filtered.append(r) to_omit.add(r) return filtered[:count]
def unspam(self, things, unbanner=None, train_spam=True, insert=True): from r2.lib.db import queries things = tup(things) # We want to make unban-all moderately efficient, so when # mass-unbanning, we're going to skip the code below on links that # are already not banned. However, when someone manually clicks # "approve" on an unbanned link, and there's just one, we want do # want to run the code below. That way, the little green checkmark # will have the right mouseover details, the reports will be # cleared, etc. if len(things) > 1: things = [x for x in things if x._spam] Report.accept(things, False) for t in things: ban_info = copy(getattr(t, 'ban_info', {})) ban_info['unbanned_at'] = datetime.now(g.tz) if unbanner: ban_info['unbanner'] = unbanner if ban_info.get('reset_used', None) == None: ban_info['reset_used'] = False else: ban_info['reset_used'] = True t.ban_info = ban_info t._spam = False t._commit() self.author_spammer(things, False) self.set_last_sr_ban(things) queries.unban(things, insert)
def __init__(self, name, i18n_message, msg_params, field=None, code=None): self.name = name self.i18n_message = i18n_message self.msg_params = msg_params # list of fields in the original form that caused the error self.fields = tup(field) if field else [] self.code = code
def _byID(cls, ids): ids, is_single = tup(ids, True) if not len(ids): if is_single: raise InvariantException("whastis?") else: return {} # all keys must be strings or directly convertable to strings assert all(isinstance(_id, basestring) and str(_id) for _id in ids) def lookup(l_ids): rows = cls.cf.multiget(l_ids, column_count=max_column_count) l_ret = {} for t_id, row in rows.iteritems(): t = cls._from_serialized_columns(t_id, row) l_ret[t._id] = t return l_ret ret = cache.sgm(thing_cache, ids, lookup, prefix=cls._cache_prefix()) if is_single and not ret: raise NotFound("<%s %r>" % (cls.__name__, ids[0])) elif is_single: assert len(ret) == 1 return ret.values()[0] return ret
def compute_message_trees(messages): from r2.models import Message roots = set() threads = {} mdict = {} messages = sorted(messages, key = lambda m: m._date, reverse = True) for m in messages: if not m._loaded: m._load() mdict[m._id] = m if m.first_message: roots.add(m.first_message) threads.setdefault(m.first_message, set()).add(m._id) else: roots.add(m._id) # load any top-level messages which are not in the original list missing = [m for m in roots if m not in mdict] if missing: mdict.update(Message._byID(tup(missing), return_dict = True, data = True)) # sort threads in chrono order for k in threads: threads[k] = list(sorted(threads[k])) tree = [(root, threads.get(root, [])) for root in roots] tree.sort(key = tree_sort_fn, reverse = True) return tree
def _byID(cls, ids, data=False, return_dict=True, extra_props=None, stale=False, ignore_missing=False): ids, single = tup(ids, True) prefix = thing_prefix(cls.__name__) if not all(x <= tdb.MAX_THING_ID for x in ids): raise NotFound('huge thing_id in %r' % ids) def count_found(ret, still_need): cls._cache.stats.cache_report( hits=len(ret), misses=len(still_need), cache_name='sgm.%s' % cls.__name__) if not cls._cache.stats: count_found = None def items_db(ids): items = cls._get_item(cls._type_id, ids) for i in items.keys(): items[i] = cls._build(i, items[i]) return items bases = sgm(cls._cache, ids, items_db, prefix, stale=stale, found_fn=count_found) # Check to see if we found everything we asked for missing = [] for i in ids: if i not in bases: missing.append(i) elif bases[i] and bases[i]._id != i: g.log.error("thing.py: Doppleganger on byID: %s got %s for %s" % (cls.__name__, bases[i]._id, i)) bases[i] = items_db([i]).values()[0] bases[i]._cache_myself() if missing and not ignore_missing: raise NotFound, '%s %s' % (cls.__name__, missing) for i in missing: ids.remove(i) if data: need = [] for v in bases.itervalues(): if not v._loaded: need.append(v) if need: cls._load_multi(need) if extra_props: for _id, props in extra_props.iteritems(): for k, v in props.iteritems(): bases[_id].__setattr__(k, v, False) if single: return bases[ids[0]] if ids else None elif return_dict: return bases else: return filter(None, (bases.get(i) for i in ids))
def add_to_queue(self, user, emails, from_name, fr_addr, kind, date = None, ip = None, body = "", reply_to = "", thing = None): s = self.queue_table hashes = [] if not date: date = datetime.datetime.now(g.tz) if not ip: ip = getattr(request, "ip", "127.0.0.1") for email in tup(emails): uid = user._id if user else 0 tid = thing._fullname if thing else "" key = hashlib.sha1(str((email, from_name, uid, tid, ip, kind, body, datetime.datetime.now(g.tz)))).hexdigest() s.insert().values({s.c.to_addr : email, s.c.account_id : uid, s.c.from_name : from_name, s.c.fr_addr : fr_addr, s.c.reply_to : reply_to, s.c.fullname: tid, s.c.ip : ip, s.c.kind: kind, s.c.body: body, s.c.date : date, s.c.msg_hash : key}).execute() hashes.append(key) return hashes
def spam(self, things, auto, moderator_banned, banner, date = None, **kw): from r2.lib.db import queries things = [x for x in tup(things) if not x._spam] Report.accept(things, True) for t in things: t._spam = True ban_info = copy(getattr(t, 'ban_info', {})) ban_info.update(auto = auto, moderator_banned = moderator_banned, banned_at = date or datetime.now(g.tz), **kw) if isinstance(banner, dict): ban_info['banner'] = banner[t._fullname] else: ban_info['banner'] = banner t.ban_info = ban_info t._commit() changed(t) if not auto: self.author_spammer(things, True) self.set_last_sr_ban(things) queries.ban(things)
def run(self, url, sr = None): if sr is None and not isinstance(c.site, FakeSubreddit): sr = c.site elif sr: try: sr = Subreddit._by_name(str(sr)) except (NotFound, UnicodeEncodeError): self.set_error(errors.SUBREDDIT_NOEXIST) sr = None else: sr = None if not url: return self.error(errors.NO_URL) url = utils.sanitize_url(url) if not url: return self.error(errors.BAD_URL) if url == 'self': if self.allow_self: return url elif not self.lookup: return url elif url: try: l = Link._by_url(url, sr) self.error(errors.ALREADY_SUB) return utils.tup(l) except NotFound: return url return self.error(errors.BAD_URL)
def add(self, error_name, msg_params={}, field=None): msg = error_list[error_name] for field_name in tup(field): e = Error(error_name, msg, msg_params, field=field_name) self.errors[(error_name, field_name)] = e
def get_available_pageviews(targets, start, end, location=None, datestr=False, ignore=None, platform='all'): """ Return the available pageviews by date for the targets and location. Available pageviews depends on all equal and higher level locations: A location is: subreddit > country > metro e.g. if a campaign is targeting /r/funny in USA/Boston we need to check that there's enough inventory in: * /r/funny (all campaigns targeting /r/funny regardless of location) * /r/funny + USA (all campaigns targeting /r/funny and USA with or without metro level targeting) * /r/funny + USA + Boston (all campaigns targeting /r/funny and USA and Boston) The available inventory is the smallest of these values. """ # assemble levels of location targeting, None means untargeted locations = [None] if location: locations.append(location) if location.metro: locations.append(Location(country=location.country)) # get all the campaigns directly and indirectly involved in our target targets, is_single = tup(targets, ret_is_single=True) target_srs = list(chain.from_iterable( target.subreddits_slow for target in targets)) all_campaigns = find_campaigns(target_srs, start, end, ignore) # get predicted pageviews for each subreddit and location all_sr_names = set(sr.name for sr in target_srs) all_sr_names |= set(chain.from_iterable( campaign.target.subreddit_names for campaign in all_campaigns )) all_srs = Subreddit._by_name(all_sr_names).values() pageviews_dict = {location: get_predicted_pageviews(all_srs, location) for location in locations} # determine booked impressions by target and location for each day dates = set(get_date_range(start, end)) booked_dict = {} for date in dates: booked_dict[date] = {} for location in locations: booked_dict[date][location] = defaultdict(int) for campaign in all_campaigns: camp_dates = set(get_date_range(campaign.start_date, campaign.end_date)) sr_names = tuple(sorted(campaign.target.subreddit_names)) daily_impressions = campaign.impressions / campaign.ndays for location in locations: if location and not location.contains(campaign.location): # campaign's location is less specific than location continue for date in camp_dates.intersection(dates): booked_dict[date][location][sr_names] += daily_impressions # calculate inventory for each target and location on each date datekey = lambda dt: dt.strftime('%m/%d/%Y') if datestr else dt ret = {} for target in targets: name = make_target_name(target) subreddit_names = target.subreddit_names ret[name] = {} for date in dates: pageviews_by_location = {} for location in locations: # calculate available impressions for each location booked_by_target = booked_dict[date][location] pageviews_by_sr_name = pageviews_dict[location] pageviews_by_location[location] = get_maximized_pageviews( subreddit_names, booked_by_target, pageviews_by_sr_name) # available pageviews is the minimum from all locations min_pageviews = min(pageviews_by_location.values()) if PERCENT_MOBILE != 0: mobile_pageviews = min_pageviews * (float(PERCENT_MOBILE) / 100) if platform == 'mobile': min_pageviews = mobile_pageviews if platform == 'desktop': min_pageviews = min_pageviews - mobile_pageviews ret[name][datekey(date)] = max(0, min_pageviews) if is_single: name = make_target_name(targets[0]) return ret[name] else: return ret
def insert_table_rows(self, rows, index=-1): new = self.__getattr__("insert_table_rows") return new([row.render() for row in tup(rows)], index)
def _by_name(cls, names, stale=False, _update=False): ''' Usages: 1. Subreddit._by_name('funny') # single sr name Searches for a single subreddit. Returns a single Subreddit object or raises NotFound if the subreddit doesn't exist. 2. Subreddit._by_name(['aww','iama']) # list of sr names Searches for a list of subreddits. Returns a dict mapping srnames to Subreddit objects. Items that were not found are ommitted from the dict. If no items are found, an empty dict is returned. ''' #lower name here so there is only one cache names, single = tup(names, True) to_fetch = {} ret = {} for name in names: lname = name.lower() if lname in cls._specials: ret[name] = cls._specials[lname] elif len(lname) > Subreddit.MAX_SRNAME_LENGTH: g.log.debug( "Subreddit._by_name() ignoring invalid srname (too long): %s", lname) else: to_fetch[lname] = name if to_fetch: def _fetch(lnames): q = cls._query(lower(cls.c.name) == lnames, cls.c._spam == (True, False), limit=len(lnames), data=True) try: srs = list(q) except UnicodeEncodeError: print "Error looking up SRs %r" % (lnames, ) raise return dict((sr.name.lower(), sr._id) for sr in srs) srs = {} srids = sgm(g.cache, to_fetch.keys(), _fetch, prefix='subreddit.byname', stale=stale) if srids: srs = cls._byID(srids.values(), data=True, return_dict=False, stale=stale) for sr in srs: ret[to_fetch[sr.name.lower()]] = sr if ret and single: return ret.values()[0] elif not ret and single: raise NotFound, 'Subreddit %s' % name else: return ret
def __lshift__(self, routing_keys): """Register bindings from routing keys to this queue.""" routing_keys = tup(routing_keys) for routing_key in routing_keys: self._bind(routing_key)
def init_query(self): names = list(tup(self.query)) after = self.after._fullname if self.after else None self.names = self._get_after(names, after, self.reverse)
def find_data(type_id, get_cols, sort, limit, constraints): t_table, d_table = get_thing_table(type_id) constraints = deepcopy(constraints) used_first = False s = None need_join = False have_data_rule = False first_alias = d_table.alias() s = sa.select([first_alias.c.thing_id.label('thing_id') ]) #, distinct=True) for op in operators.op_iter(constraints): key = op.lval_name vals = tup(op.rval) if key == '_id': op.lval = first_alias.c.thing_id elif key.startswith('_'): need_join = True op.lval = translate_sort(t_table, key[1:], op.lval) op.rval = translate_thing_value(op.rval) else: have_data_rule = True id_col = None if not used_first: alias = first_alias used_first = True else: alias = d_table.alias() id_col = first_alias.c.thing_id if id_col: s.append_whereclause(id_col == alias.c.thing_id) s.append_column(alias.c.value.label(key)) s.append_whereclause(alias.c.key == key) #add the substring constraint if no other functions are there translate_data_value(alias, op) for op in constraints: s.append_whereclause(sa_op(op)) if not have_data_rule: raise Exception('Data queries must have at least one data rule.') #TODO in order to sort by data columns, this is going to need to be smarter if sort: need_join = True s, cols = add_sort(sort, {'_': t_table}, s) if need_join: s.append_whereclause(first_alias.c.thing_id == t_table.c.thing_id) if limit: s = s.limit(limit) r = s.execute() return Results(r, lambda (row): row if get_cols else row.thing_id)
def update_candidates(self, candidates, sorter, to_add=None): for comment in (comment for comment in tup(to_add) if comment in sorter): sort_val = -sorter[comment] if self.rev_sort else sorter[comment] heapq.heappush(candidates, (sort_val, comment))
def flatten_response(content): """Convert a content iterable to a string, properly handling unicode.""" # TODO: it would be nice to replace this with response.body someday # once unicode issues are ironed out. return "".join(_force_utf8(x) for x in tup(content) if x)
def _report_interval(interval): """Read aggregated traffic from S3 and write to postgres.""" from sqlalchemy.orm import scoped_session, sessionmaker from r2.models.traffic import engine Session = scoped_session(sessionmaker(bind=engine)) # determine interval_type from YYYY-MM[-DD][-HH] pieces = interval.split('-') pieces = [int(i) for i in pieces] if len(pieces) == 4: interval_type = 'hour' elif len(pieces) == 3: interval_type = 'day' pieces.append(0) elif len(pieces) == 2: interval_type = 'month' pieces.append(1) pieces.append(0) else: raise pg_interval = "%04d-%02d-%02d %02d:00:00" % tuple(pieces) print 'reporting interval %s (%s)' % (pg_interval, interval_type) # Read aggregates and write to traffic db for category_cls in traffic_categories: now = datetime.datetime.now() print '*** %s - %s - %s' % (category_cls.__name__, interval, now) data = get_aggregate(interval, category_cls) len_data = len(data) step = max(len_data / 5, 100) for i, (name, (uniques, pageviews)) in enumerate(data.iteritems()): try: for n in tup(name): unicode(n) except UnicodeDecodeError: print '%s - %s - %s - %s' % (category_cls.__name__, name, uniques, pageviews) continue if i % step == 0: now = datetime.datetime.now() print '%s - %s - %s/%s - %s' % ( interval, category_cls.__name__, i, len_data, now) kw = { 'date': pg_interval, 'interval': interval_type, 'unique_count': uniques, 'pageview_count': pageviews } kw.update(_name_to_kw(category_cls, name)) r = category_cls(**kw) try: Session.merge(r) Session.commit() except DataError: Session.rollback() continue Session.remove() now = datetime.datetime.now() print 'finished reporting %s (%s) - %s' % (pg_interval, interval_type, now)
def add_error(self, error): for field_name in tup(error.fields): self.errors[(error.name, field_name)] = error
def add(self, error_name, msg_params=None, field=None, code=None): for field_name in tup(field): e = RedditError(error_name, msg_params, fields=field_name, code=code) self.add_error(e)
def _fast_query(cls, thing1s, thing2s, name, data=True, eager_load=True, thing_data=False): """looks up all the relationships between thing1_ids and thing2_ids and caches them""" prefix = thing_prefix(cls.__name__) thing1_dict = dict((t._id, t) for t in tup(thing1s)) thing2_dict = dict((t._id, t) for t in tup(thing2s)) thing1_ids = thing1_dict.keys() thing2_ids = thing2_dict.keys() name = tup(name) # permute all of the pairs pairs = set((x, y, n) for x in thing1_ids for y in thing2_ids for n in name) def lookup_rel_ids(pairs): rel_ids = {} t1_ids = set() t2_ids = set() names = set() for t1, t2, name in pairs: t1_ids.add(t1) t2_ids.add(t2) names.add(name) if t1_ids and t2_ids and names: q = cls._query(cls.c._thing1_id == t1_ids, cls.c._thing2_id == t2_ids, cls.c._name == names) else: q = [] for rel in q: rel_ids[(rel._thing1_id, rel._thing2_id, rel._name)] = rel._id for p in pairs: if p not in rel_ids: rel_ids[p] = None return rel_ids # get the relation ids from the cache or query the db res = sgm(cls._cache, pairs, lookup_rel_ids, prefix) # get the relation objects rel_ids = { rel_id for rel_id in res.itervalues() if rel_id is not None } rels = cls._byID_rel(rel_ids, data=data, eager_load=eager_load, thing_data=thing_data) res_obj = {} for (thing1_id, thing2_id, name), rel_id in res.iteritems(): pair = (thing1_dict[thing1_id], thing2_dict[thing2_id], name) rel = rels[rel_id] if rel_id is not None else None res_obj[pair] = rel return res_obj
def insert(self, items): """Inserts the item into the cached data. This only works under certain criteria, see can_insert.""" self._insert_tuples( [self.make_item_tuple(item) for item in tup(items)])
def spam(self, thing, amount=1, mark_as_spam=True, **kw): things = tup(thing) for t in things: if mark_as_spam: t._spam = (amount > 0) t._commit()
def _byID(cls, ids, data=False, return_dict=True, extra_props=None, stale=False, ignore_missing=False): ids, single = tup(ids, True) prefix = thing_prefix(cls.__name__) if not all(x <= tdb.MAX_THING_ID for x in ids): raise NotFound('huge thing_id in %r' % ids) def count_found(ret, still_need): cls._cache.stats.cache_report(hits=len(ret), misses=len(still_need), cache_name='sgm.%s' % cls.__name__) if not cls._cache.stats: count_found = None def items_db(ids): items = cls._get_item(cls._type_id, ids) for i in items.keys(): items[i] = cls._build(i, items[i]) return items bases = sgm(cls._cache, ids, items_db, prefix, stale=stale, found_fn=count_found) # Check to see if we found everything we asked for missing = [] for i in ids: if i not in bases: missing.append(i) elif bases[i] and bases[i]._id != i: g.log.error( "thing.py: Doppleganger on byID: %s got %s for %s" % (cls.__name__, bases[i]._id, i)) bases[i] = items_db([i]).values()[0] bases[i]._cache_myself() if missing and not ignore_missing: raise NotFound, '%s %s' % (cls.__name__, missing) for i in missing: ids.remove(i) if data: need = [] for v in bases.itervalues(): if not v._loaded: need.append(v) if need: cls._load_multi(need) if extra_props: for _id, props in extra_props.iteritems(): for k, v in props.iteritems(): bases[_id].__setattr__(k, v, False) if single: return bases[ids[0]] if ids else None elif return_dict: return bases else: return filter(None, (bases.get(i) for i in ids))
def __init__(self, name, i18n_message, msg_params, field=None): self.name = name self.i18n_message = i18n_message self.msg_params = msg_params # list of fields in the original form that caused the error self.fields = tup(field) if field else []
def _column_to_obj(cls, columns): # columns = [{colname: colvalue}] return [ LiveUpdate.from_json(*column.popitem()) for column in utils.tup(columns) ]
def spam(self, things, auto=True, moderator_banned=False, banner=None, date=None, train_spam=True, **kw): from r2.lib.db import queries all_things = tup(things) new_things = [x for x in all_things if not x._spam] Report.accept(all_things, True) inbox_adjustment_counter = Counter() for t in all_things: if getattr(t, "promoted", None) is not None: g.log.debug("Refusing to mark promotion %r as spam" % t) continue if not t._spam and train_spam: note = 'spam' elif not t._spam and not train_spam: note = 'remove not spam' elif t._spam and not train_spam: note = 'confirm spam' elif t._spam and train_spam: note = 'reinforce spam' if isinstance(t, Message) and not t._spam and t.to_id: inbox_adjustment_counter[t.to_id] -= 1 t._spam = True if moderator_banned: t.verdict = 'mod-removed' elif not auto: t.verdict = 'admin-removed' ban_info = copy(getattr(t, 'ban_info', {})) if isinstance(banner, dict): ban_info['banner'] = banner[t._fullname] else: ban_info['banner'] = banner ban_info.update(auto=auto, moderator_banned=moderator_banned, banned_at=date or datetime.now(g.tz), **kw) ban_info['note'] = note t.ban_info = ban_info t._commit() self.adjust_inbox_counts(inbox_adjustment_counter) if not auto: self.author_spammer(new_things, True) self.set_last_sr_ban(new_things) queries.ban(all_things, filtered=auto) for t in all_things: if auto: amqp.add_item("auto_removed", t._fullname) if isinstance(t, Comment): amqp.add_item("removed_comment", t._fullname) elif isinstance(t, Link): amqp.add_item("removed_link", t._fullname)
def touch(cls, fullname, names): names = tup(names) now = datetime.datetime.now(g.tz) values = dict.fromkeys(names, now) cls._set_values(fullname, values) return now
def _somethinged(cls, rel, user, link, name): return rel._fast_query(tup(user), tup(link), name = name)
def get_renderable_campaigns(link, campaigns): campaigns, is_single = tup(campaigns, ret_is_single=True) r = RenderableCampaign.create(link, campaigns) if is_single: r = r[0] return r
def _obj_to_column(cls, entries): entries, is_single = utils.tup(entries, ret_is_single=True) columns = [{entry._id: entry.to_json()} for entry in entries] return columns[0] if is_single else columns
def _byID(cls, ids, data=True, return_dict=True, stale=False, ignore_missing=False): # data props are ALWAYS loaded, data keyword is meaningless ids, single = tup(ids, ret_is_single=True) for x in ids: if not isinstance(x, (int, long)): raise ValueError('non-integer thing_id in %r' % ids) if x > tdb.MAX_THING_ID: raise NotFound('huge thing_id in %r' % ids) elif x < tdb.MIN_THING_ID: raise NotFound('negative thing_id in %r' % ids) if not single and not ids: if return_dict: return {} else: return [] cls.record_lookup(data=data, delta=len(ids)) def count_found_and_reject_unloaded(ret, still_need): unloaded_ids = { _id for _id, thing in ret.iteritems() if not thing._loaded } for _id in unloaded_ids: del ret[_id] still_need.add(_id) if cls._cache.stats: cls._cache.stats.cache_report(hits=len(ret), misses=len(still_need), cache_name='sgm.%s' % cls.__name__) def get_things_from_db(ids): props_by_id = cls._get_item(cls._type_id, ids) data_props_by_id = cls._get_data(cls._type_id, ids) things_by_id = {} for _id, props in props_by_id.iteritems(): thing = cls._build(_id, props) data_props = data_props_by_id.get(_id, {}) thing._t.update(data_props) thing._loaded = True if not all(data_prop in thing._t for data_prop in cls._essentials): # a Thing missing an essential prop is invalid # this can happen if a process looks up the Thing as it's # created but between when the props and the data props are # written g.log.error("%s missing essentials, got %s", thing, thing._t) g.stats.simple_event("thing.load.missing_essentials") continue things_by_id[_id] = thing # caching happens in sgm, but is less intrusive to count here cls.record_cache_write(event="cache", delta=len(things_by_id)) return things_by_id things_by_id = sgm(cls._cache, ids, miss_fn=get_things_from_db, prefix=cls._cache_prefix(), time=THING_CACHE_TTL, stale=stale, found_fn=count_found_and_reject_unloaded, stat_subname=cls.__name__) # Check to see if we found everything we asked for missing = [_id for _id in ids if _id not in things_by_id] if missing and not ignore_missing: raise NotFound, '%s %s' % (cls.__name__, missing) if missing: ids = [_id for _id in ids if _id not in missing] if single: return things_by_id[ids[0]] if ids else None elif return_dict: return things_by_id else: return filter(None, (things_by_id.get(_id) for _id in ids))
def mark_participated(cls, account, subreddit): cls.create(account, tup(subreddit))
def unschedule(cls, rowkey, column_keys): column_keys = tup(column_keys) return cls._cf.remove(rowkey, column_keys)
def _commit(self, keys=None): lock = None try: if not self._created: begin() self._create() just_created = True else: just_created = False lock = g.make_lock("thing_commit", 'commit_' + self._fullname) lock.acquire() if not just_created and not self._sync_latest(): #sync'd and we have nothing to do now, but we still cache anyway self._cache_myself() return # begin is a no-op if already done, but in the not-just-created # case we need to do this here because the else block is not # executed when the try block is exited prematurely in any way # (including the return in the above branch) begin() to_set = self._dirties.copy() if keys: keys = tup(keys) for key in to_set.keys(): if key not in keys: del to_set[key] data_props = {} thing_props = {} for k, (old_value, new_value) in to_set.iteritems(): if k.startswith('_'): thing_props[k[1:]] = new_value else: data_props[k] = new_value if data_props: self._set_data(self._type_id, self._id, just_created, **data_props) if thing_props: self._set_props(self._type_id, self._id, **thing_props) if keys: for k in keys: if self._dirties.has_key(k): del self._dirties[k] else: self._dirties.clear() except: rollback() raise else: commit() self._cache_myself() finally: if lock: lock.release() hooks.get_hook("thing.commit").call(thing=self, changes=to_set)
def _fast_query(cls, thing1s, thing2s, name, data=True, eager_load=True, thing_data=False, thing_stale=False): """looks up all the relationships between thing1_ids and thing2_ids and caches them""" cache_key_lookup = dict() # We didn't find these keys in the cache, look them up in the # database def lookup_rel_ids(uncached_keys): rel_ids = {} # Lookup thing ids and name from cache key t1_ids = set() t2_ids = set() names = set() for cache_key in uncached_keys: (thing1, thing2, name) = cache_key_lookup[cache_key] t1_ids.add(thing1._id) t2_ids.add(thing2._id) names.add(name) q = cls._query(cls.c._thing1_id == t1_ids, cls.c._thing2_id == t2_ids, cls.c._name == names) for rel in q: rel_ids[cls._fast_cache_key_from_parts( cls.__name__, rel._thing1_id, rel._thing2_id, str(rel._name))] = rel._id for cache_key in uncached_keys: if cache_key not in rel_ids: rel_ids[cache_key] = None return rel_ids # make lookups for thing ids and names thing1_dict = dict((t._id, t) for t in tup(thing1s)) thing2_dict = dict((t._id, t) for t in tup(thing2s)) names = map(str, tup(name)) # permute all of the pairs via cartesian product rel_tuples = itertools.product(thing1_dict.values(), thing2_dict.values(), names) # create cache keys for all permutations and initialize lookup for t in rel_tuples: thing1, thing2, name = t cache_key = cls._fast_cache_key_from_parts( cls.__name__, thing1._id, thing2._id, name) cache_key_lookup[cache_key] = t # get the relation ids from the cache or query the db res = sgm(cls._fast_cache, cache_key_lookup.keys(), lookup_rel_ids) # get the relation objects rel_ids = { rel_id for rel_id in res.itervalues() if rel_id is not None } rels = cls._byID_rel(rel_ids, data=data, eager_load=eager_load, thing_data=thing_data, thing_stale=thing_stale) # Takes aggregated results from cache and db (res) and transforms # the values from ids to Relations. res_obj = {} for cache_key, rel_id in res.iteritems(): t = cache_key_lookup[cache_key] rel = rels[rel_id] if rel_id is not None else None res_obj[t] = rel return res_obj