def add_comments(comments): """Add comments to the CommentTree and update scores.""" from v1.models.builder import write_comment_orders link_ids = [comment.link_id for comment in tup(comments)] links_by_id = Link._byID(link_ids) comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) for link_id, link_comments in comments_by_link_id.iteritems(): link = links_by_id[link_id] timer = g.stats.get_timer('comment_tree.add.1') timer.start() write_comment_scores(link, link_comments) timer.intermediate('scores') CommentTree.add_comments(link, link_comments) timer.intermediate('update') write_comment_orders(link) timer.intermediate('write_order') timer.stop()
def get_actions(cls, srs, mod=None, action=None, after=None, reverse=False, count=1000): """ Get a ColumnQuery that yields ModAction objects according to specified criteria. """ if after and isinstance(after, basestring): after = cls._byID(UUID(after)) elif after and isinstance(after, UUID): after = cls._byID(after) if not isinstance(after, cls): after = None srs = tup(srs) if not mod and not action: rowkeys = [sr._id36 for sr in srs] q = ModActionBySR.query(rowkeys, after=after, reverse=reverse, count=count) elif mod: mods = tup(mod) key = '%s_%s' if not action else '%%s_%%s_%s' % action rowkeys = itertools.product([sr._id36 for sr in srs], [mod._id36 for mod in mods]) rowkeys = [key % (sr, mod) for sr, mod in rowkeys] view = ModActionBySRActionMod if action else ModActionBySRMod q = view.query(rowkeys, after=after, reverse=reverse, count=count) else: rowkeys = ['%s_%s' % (sr._id36, action) for sr in srs] q = ModActionBySRAction.query(rowkeys, after=after, reverse=reverse, count=count) return q
def unschedule(cls, rowkey, colkey, schedule_rowkey): colkey = tup(colkey) victims = cls.search(rowkey, colkey) for uu in victims.itervalues(): keys = TryLater.search(schedule_rowkey, uu).keys() TryLater.unschedule(schedule_rowkey, keys) cls._cf.remove(rowkey, colkey)
def validate_list(self, nodes, validators_by_type, ignored_types=None): for node in nodes: if node.type == "error": yield ValidationError(node.source_line, "SYNTAX_ERROR", {"message": node.message}) continue elif node.type == "literal": if node.value == ";": # if we're seeing a semicolon as a literal, it's in a place # that doesn't fit naturally in the syntax. # Safari 5 will treat this as two color properties: # color: calc(;color:red;); message = "semicolons are not allowed in this context" yield ValidationError(node.source_line, "SYNTAX_ERROR", {"message": message}) continue validator = validators_by_type.get(node.type) if validator: for error in tup(validator(node)): if error: yield error else: if not ignored_types or node.type not in ignored_types: yield ValidationError(node.source_line, "UNEXPECTED_TOKEN", {"token": node.type})
def compute_message_trees(messages): from v1.models import Message roots = set() threads = {} mdict = {} messages = sorted(messages, key=lambda m: m._date, reverse=True) for m in messages: mdict[m._id] = m if m.first_message: roots.add(m.first_message) threads.setdefault(m.first_message, set()).add(m._id) else: roots.add(m._id) # load any top-level messages which are not in the original list missing = [m for m in roots if m not in mdict] if missing: mdict.update(Message._byID(tup(missing), return_dict=True, data=True)) # sort threads in chrono order for k in threads: threads[k] = list(sorted(threads[k])) tree = [(root, threads.get(root, [])) for root in roots] tree.sort(key=tree_sort_fn, reverse=True) return tree
def add_to_queue(self, user, emails, from_name, fr_addr, kind, date = None, ip = None, body = "", reply_to = "", thing = None): s = self.queue_table hashes = [] if not date: date = datetime.datetime.now(g.tz) if not ip: ip = getattr(request, "ip", "127.0.0.1") for email in tup(emails): uid = user._id if user else 0 tid = thing._fullname if thing else "" key = hashlib.sha1(str((email, from_name, uid, tid, ip, kind, body, datetime.datetime.now(g.tz)))).hexdigest() s.insert().values({s.c.to_addr : email, s.c.account_id : uid, s.c.from_name : from_name, s.c.fr_addr : fr_addr, s.c.reply_to : reply_to, s.c.fullname: tid, s.c.ip : ip, s.c.kind: kind, s.c.body: body, s.c.date : date, s.c.msg_hash : key}).execute() hashes.append(key) return hashes
def get(cls, metric_name, sr_names=None): sr_names = tup(sr_names) try: metric = cls._byID(metric_name, properties=sr_names) return metric._values() # might have additional values except tdb_cassandra.NotFound: return {}
def add(self, error_name, msg_params=None, field=None, code=None): for field_name in tup(field): e = VerbifyError(error_name, msg_params, fields=field_name, code=code) self.add_error(e)
def sa_op(op): #if BooleanOp if isinstance(op, operators.or_): return sa.or_(*[sa_op(o) for o in op.ops]) elif isinstance(op, operators.and_): return sa.and_(*[sa_op(o) for o in op.ops]) elif isinstance(op, operators.not_): return sa.not_(*[sa_op(o) for o in op.ops]) #else, assume op is an instance of op if isinstance(op, operators.eq): fn = lambda x, y: x == y elif isinstance(op, operators.ne): fn = lambda x, y: x != y elif isinstance(op, operators.gt): fn = lambda x, y: x > y elif isinstance(op, operators.lt): fn = lambda x, y: x < y elif isinstance(op, operators.gte): fn = lambda x, y: x >= y elif isinstance(op, operators.lte): fn = lambda x, y: x <= y elif isinstance(op, operators.in_): return sa.or_(op.lval.in_(op.rval)) rval = tup(op.rval) if not rval: return '2+2=5' else: return sa.or_(*[fn(op.lval, v) for v in rval])
def accept(cls, things, correct=True): from v1.lib.db import queries things = tup(things) things_by_cls = {} for thing in things: things_by_cls.setdefault(thing.__class__, []).append(thing) for thing_cls, cls_things in things_by_cls.iteritems(): to_clear = [] # look up all of the reports for each thing rel_cls = cls.rel(Account, thing_cls) thing_ids = [t._id for t in cls_things] rels = rel_cls._query(rel_cls.c._thing2_id == thing_ids) for r in rels: if r._name == '0': r._name = '1' if correct else '-1' r._commit() for thing in cls_things: if thing.reported > 0: thing.reported = 0 thing._commit() to_clear.append(thing) queries.clear_reports(to_clear, rels)
def for_srs(cls, srid36, to_omit, count, source, match_set=True): # It's usually better to use get_recommendations() than to call this # function directly because it does privacy filtering. srid36s = tup(srid36) to_omit = set(to_omit) to_omit.update(srid36s) # don't show the originals rowkeys = ['%s.%s' % (source, srid36) for srid36 in srid36s] # fetch multiple sets of recommendations, one for each input srid36 rows = cls._byID(rowkeys, return_dict=False) if match_set: sorted_recs = cls._merge_and_sort_by_count(rows) # heuristic: if input set is large, rec should match more than one min_count = math.floor(.1 * len(srid36s)) sorted_recs = (rec[0] for rec in sorted_recs if rec[1] > min_count) else: sorted_recs = cls._merge_roundrobin(rows) # remove duplicates and ids listed in to_omit filtered = [] for r in sorted_recs: if r not in to_omit: filtered.append(r) to_omit.add(r) return filtered[:count]
def account_ids_by_ip(ip, after=None, before=None, limit=1000): """Get a list of account IDs that an IP has accessed. Parameters: after -- a `datetime.datetime` from which results should start before -- a `datetime.datetime` from which results should end. If `after` is specified, this will be ignored. limit -- number of results to return """ ips = tup(ip) results = [] flattened_accounts = {} for ip in ips: if before and not after: # One less result will be returned for `before` queries, so we # increase the limit by one. account_ip = AccountsByIP.get( ip, column_start=before, column_count=limit + 1, column_reversed=False) account_ip = sorted(account_ip, reverse=True) else: account_ip = AccountsByIP.get( ip, column_start=after, column_count=limit) flattened_account_ip = [j for i in account_ip for j in i.iteritems()] flattened_accounts[ip] = flattened_account_ip for ip, flattened_account_ip in flattened_accounts.iteritems(): for last_visit, account in flattened_account_ip: results.append((account, last_visit, [ip])) return results
def total_by_codename(cls, codenames): """Return total lifetime pageviews (or clicks) for given codename(s).""" codenames = tup(codenames) # uses hour totals to get the most up-to-date count q = (Session.query(cls.codename, sum( cls.pageview_count)).filter(cls.interval == "hour").filter( cls.codename.in_(codenames)).group_by(cls.codename)) return list(q)
def has_errors(self, field_name, *errors, **kw): have_error = False field_name = tup(field_name) for error_name in errors: for fname in field_name: if (error_name, fname) in c.errors: self.set_error(error_name, fname) have_error = True return have_error
def search(cls, rowkey, subjects=None): try: if subjects: subjects = tup(subjects) return cls._cf.get(rowkey, subjects) else: return cls._cf.get(rowkey) except tdb_cassandra.NotFoundException: return {}
def get_localized_source(self, lang): catalog = get_catalog(lang) # relies on pyx files, so it can't be imported at global scope from v1.lib.utils import tup data = {} for key in self.keys: key = tup(key)[0] # because the key for plurals is (sing, plur) self._check_formatting_specifiers(key) msg = catalog[key] if not msg or not msg.string: continue # jed expects to ignore the first value in the translations array # so we'll just make it null strings = tup(msg.string) data[key] = [None] + list(strings) return "r.i18n.addMessages(%s)" % json.dumps(data)
def has_errors(self): """Determines if the signature has any errors. :returns: whether or not there are non-ignored errors :rtype: bool """ if self.ignored_errors: igcodes = {err.code for err in tup(self.ignored_errors)} error_codes = {code for code, _ in self.errors} return not error_codes.issubset(igcodes) else: return bool(self.errors)
def get_recommendations(srs, count=10, source=SRC_MULTIVERBIFYS, to_omit=None, match_set=True, over18=False): """Return subverbifys recommended if you like the given subverbifys. Args: - srs is one Subverbify object or a list of Subverbifys - count is total number of results to return - source is a prefix telling which set of recommendations to use - to_omit is a single or list of subverbify id36s that should not be be included. (Useful for omitting recs that were already rejected.) - match_set=True will return recs that are similar to each other, useful for matching the "theme" of the original set - over18 content is filtered unless over18=True or one of the original srs is over18 """ srs = tup(srs) to_omit = tup(to_omit) if to_omit else [] # fetch more recs than requested because some might get filtered out rec_id36s = SRRecommendation.for_srs([sr._id36 for sr in srs], to_omit, count * 2, source, match_set=match_set) # always check for private subverbifys at runtime since type might change rec_srs = Subverbify._byID36(rec_id36s, return_dict=False) filtered = [sr for sr in rec_srs if is_visible(sr)] # don't recommend adult srs unless one of the originals was over_18 if not over18 and not any(sr.over_18 for sr in srs): filtered = [sr for sr in filtered if not sr.over_18] return filtered[:count]
def assert_vote_effects( self, vote, affects_score=True, affects_karma=True, affected_thing_attr="_ups", notes=None, ): notes = set(tup(notes) if notes else []) self.assertEqual(vote.effects.affects_score, affects_score) self.assertEqual(vote.effects.affects_karma, affects_karma) self.assertEqual(vote.affected_thing_attr, affected_thing_attr) self.assertEqual(set(vote.effects.notes), notes) return vote
def wrap_links(links, wrapper=default_thing_wrapper(), listing_cls=LinkListing, num=None, show_nums=False, nextprev=False, **kw): links = tup(links) if not all(isinstance(x, basestring) for x in links): links = [x._fullname for x in links] b = IDBuilder(links, num=num, wrap=wrapper, **kw) l = listing_cls(b, nextprev=nextprev, show_nums=show_nums) return l.listing()
def _backend_get(self, keys): keys, is_single = tup(keys, ret_is_single=True) rows = self.cf.multiget(keys, columns=[self.COLUMN_NAME]) ret = { key: pickle.loads(columns[self.COLUMN_NAME]) for key, columns in rows.iteritems() } if is_single: if ret: return ret.values()[0] else: return None else: return ret
def translate_data_value(alias, op): lval = op.lval need_substr = False if isinstance(lval, operators.query_func) else True lval = translate_sort(alias, 'value', lval, False) #add the substring func if need_substr: lval = sa.func.substring(lval, 1, max_val_len) op.lval = lval #convert the rval to db types #convert everything to strings for pg8.3 op.rval = tuple(str(py2db(v)) for v in tup(op.rval))
def get(cls, srs, locations): srs, srs_is_single = tup(srs, ret_is_single=True) locations, locations_is_single = tup(locations, ret_is_single=True) is_single = srs_is_single and locations_is_single rowkeys = {location: cls._rowkey(location) for location in locations} columns = {sr: cls._column_name(sr) for sr in srs} rcl = cls._read_consistency_level metrics = cls._cf.multiget(rowkeys.values(), columns.values(), read_consistency_level=rcl) ret = {} for sr, location in product(srs, locations): rowkey = rowkeys[location] column = columns[sr] impressions = metrics.get(rowkey, {}).get(column, 0) ret[(sr, location)] = impressions if is_single: return ret.values()[0] else: return ret
def location_by_ips(ips): ips, is_single = tup(ips, ret_is_single=True) location_by_ip = sgm( cache=g.gencache, keys=ips, miss_fn=_location_by_ips, prefix='geoip:loc_', time=GEOIP_CACHE_TIME, ignore_set_errors=True, ) if is_single and location_by_ip: return location_by_ip[ips[0]] else: return location_by_ip
def __init__(self, name=None, msg_params=None, fields=None, code=None): Exception.__init__(self) if name is not None: self.name = name self.i18n_message = error_list.get(self.name) self.msg_params = msg_params or {} if fields is not None: # list of fields in the original form that caused the error self.fields = tup(fields) if code is not None: self.code = code
def wrap_processor(msgs, *args): # Work the same for amqp.consume_items and amqp.handle_items. msg_tup = utils.tup(msgs) metrics_name = "amqp." + queue_name start = time.time() try: with baseplate_integration.make_server_span(metrics_name): return processor(msgs, *args) finally: service_time = (time.time() - start) / len(msg_tup) for n, msg in enumerate(msg_tup): fake_start = start + n * service_time fake_end = fake_start + service_time self.transact(metrics_name, fake_start, fake_end) self.flush()
def unspam(self, things, moderator_unbanned=True, unbanner=None, train_spam=True, insert=True): from v1.lib.db import queries things = tup(things) # We want to make unban-all moderately efficient, so when # mass-unbanning, we're going to skip the code below on links that # are already not banned. However, when someone manually clicks # "approve" on an unbanned link, and there's just one, we want do # want to run the code below. That way, the little green checkmark # will have the right mouseover details, the reports will be # cleared, etc. if len(things) > 1: things = [x for x in things if x._spam] Report.accept(things, False) for t in things: ban_info = copy(getattr(t, 'ban_info', {})) ban_info['unbanned_at'] = datetime.now(g.tz) if unbanner: ban_info['unbanner'] = unbanner if ban_info.get('reset_used', None) == None: ban_info['reset_used'] = False else: ban_info['reset_used'] = True t.ban_info = ban_info t._spam = False if moderator_unbanned: t.verdict = 'mod-approved' else: t.verdict = 'admin-approved' t._commit() if isinstance(t, Comment): amqp.add_item("approved_comment", t._fullname) elif isinstance(t, Link): amqp.add_item("approved_link", t._fullname) self.author_spammer(things, False) self.set_last_sr_ban(things) queries.unban(things, insert)
def _things(self, things, action, *a, **kw): """ function for inserting/replacing things in listings. """ things = tup(things) if not all(isinstance(t, Wrapped) for t in things): wrap = kw.pop('wrap', Wrapped) things = wrap_links(things, wrapper=wrap) data = [self.process_rendered(t.render()) for t in things] if kw: for d in data: if d.has_key('data'): d['data'].update(kw) self._data['things'] = data return data
def get_predicted_pageviews(srs, location=None): """ Return predicted number of pageviews for sponsored headlines. Predicted geotargeted impressions are estimated as: geotargeted impressions = (predicted untargeted impressions) * (fp impressions for location / fp impressions) """ srs, is_single = tup(srs, ret_is_single=True) sr_names = [sr.name for sr in srs] # default subverbifys require a different inventory factor default_srids = LocalizedDefaultSubverbifys.get_global_defaults() if location: no_location = Location(None) r = LocationPromoMetrics.get(DefaultSR, [no_location, location]) location_pageviews = r[(DefaultSR, location)] all_pageviews = r[(DefaultSR, no_location)] if all_pageviews: location_factor = float(location_pageviews) / float(all_pageviews) else: location_factor = 0. else: location_factor = 1.0 # prediction does not vary by date daily_inventory = PromoMetrics.get(MIN_DAILY_CASS_KEY, sr_names=sr_names) ret = {} for sr in srs: if not isinstance(sr, FakeSubverbify) and sr._id in default_srids: default_factor = DEFAULT_INVENTORY_FACTOR else: default_factor = INVENTORY_FACTOR base_pageviews = daily_inventory.get(sr.name, 0) ret[sr.name] = int(base_pageviews * default_factor * location_factor) if is_single: return ret[srs[0].name] else: return ret
def add_sort(sort, t_table, select): sort = tup(sort) prefixes = t_table.keys() if isinstance(t_table, dict) else None #sort the prefixes so the longest come first prefixes.sort(key=lambda x: len(x)) cols = [] def make_sa_sort(s): orig_col = s.col col = orig_col if prefixes: table = None for k in prefixes: if k and orig_col.startswith(k): table = t_table[k] col = orig_col[len(k):] if table is None: table = t_table[None] else: table = t_table real_col = translate_sort(table, col) #TODO a way to avoid overlap? #add column for the sort parameter using the sorted name select.append_column(real_col.label(orig_col)) #avoids overlap temporarily select.use_labels = True #keep track of which columns we added so we can add joins later cols.append((real_col, table)) #default to asc return (sa.desc(real_col) if isinstance(s, operators.desc) else sa.asc(real_col)) sa_sort = [make_sa_sort(s) for s in sort] s = select.order_by(*sa_sort) return s, cols