Beispiel #1
0
def add_comments(comments):
    """Add comments to the CommentTree and update scores."""
    from v1.models.builder import write_comment_orders

    link_ids = [comment.link_id for comment in tup(comments)]
    links_by_id = Link._byID(link_ids)

    comments = tup(comments)
    comments_by_link_id = defaultdict(list)
    for comment in comments:
        comments_by_link_id[comment.link_id].append(comment)

    for link_id, link_comments in comments_by_link_id.iteritems():
        link = links_by_id[link_id]

        timer = g.stats.get_timer('comment_tree.add.1')
        timer.start()

        write_comment_scores(link, link_comments)
        timer.intermediate('scores')

        CommentTree.add_comments(link, link_comments)
        timer.intermediate('update')

        write_comment_orders(link)
        timer.intermediate('write_order')

        timer.stop()
Beispiel #2
0
    def get_actions(cls, srs, mod=None, action=None, after=None, reverse=False, count=1000):
        """
        Get a ColumnQuery that yields ModAction objects according to
        specified criteria.
        """
        if after and isinstance(after, basestring):
            after = cls._byID(UUID(after))
        elif after and isinstance(after, UUID):
            after = cls._byID(after)

        if not isinstance(after, cls):
            after = None

        srs = tup(srs)

        if not mod and not action:
            rowkeys = [sr._id36 for sr in srs]
            q = ModActionBySR.query(rowkeys, after=after, reverse=reverse, count=count)
        elif mod:
            mods = tup(mod)
            key = '%s_%s' if not action else '%%s_%%s_%s' % action
            rowkeys = itertools.product([sr._id36 for sr in srs],
                [mod._id36 for mod in mods])
            rowkeys = [key % (sr, mod) for sr, mod in rowkeys]
            view = ModActionBySRActionMod if action else ModActionBySRMod
            q = view.query(rowkeys, after=after, reverse=reverse, count=count)
        else:
            rowkeys = ['%s_%s' % (sr._id36, action) for sr in srs]
            q = ModActionBySRAction.query(rowkeys, after=after, reverse=reverse, count=count)

        return q
Beispiel #3
0
 def unschedule(cls, rowkey, colkey, schedule_rowkey):
     colkey = tup(colkey)
     victims = cls.search(rowkey, colkey)
     for uu in victims.itervalues():
         keys = TryLater.search(schedule_rowkey, uu).keys()
         TryLater.unschedule(schedule_rowkey, keys)
     cls._cf.remove(rowkey, colkey)
Beispiel #4
0
    def validate_list(self, nodes, validators_by_type, ignored_types=None):
        for node in nodes:
            if node.type == "error":
                yield ValidationError(node.source_line, "SYNTAX_ERROR",
                                      {"message": node.message})
                continue
            elif node.type == "literal":
                if node.value == ";":
                    # if we're seeing a semicolon as a literal, it's in a place
                    # that doesn't fit naturally in the syntax.
                    # Safari 5 will treat this as two color properties:
                    # color: calc(;color:red;);
                    message = "semicolons are not allowed in this context"
                    yield ValidationError(node.source_line, "SYNTAX_ERROR",
                                          {"message": message})
                    continue

            validator = validators_by_type.get(node.type)

            if validator:
                for error in tup(validator(node)):
                    if error:
                        yield error
            else:
                if not ignored_types or node.type not in ignored_types:
                    yield ValidationError(node.source_line,
                                          "UNEXPECTED_TOKEN",
                                          {"token": node.type})
Beispiel #5
0
def compute_message_trees(messages):
    from v1.models import Message
    roots = set()
    threads = {}
    mdict = {}
    messages = sorted(messages, key=lambda m: m._date, reverse=True)

    for m in messages:
        mdict[m._id] = m
        if m.first_message:
            roots.add(m.first_message)
            threads.setdefault(m.first_message, set()).add(m._id)
        else:
            roots.add(m._id)

    # load any top-level messages which are not in the original list
    missing = [m for m in roots if m not in mdict]
    if missing:
        mdict.update(Message._byID(tup(missing), return_dict=True, data=True))

    # sort threads in chrono order
    for k in threads:
        threads[k] = list(sorted(threads[k]))

    tree = [(root, threads.get(root, [])) for root in roots]
    tree.sort(key=tree_sort_fn, reverse=True)

    return tree
Beispiel #6
0
 def add_to_queue(self, user, emails, from_name, fr_addr, kind,
                  date = None, ip = None,
                  body = "", reply_to = "", thing = None):
     s = self.queue_table
     hashes = []
     if not date:
         date = datetime.datetime.now(g.tz)
     if not ip:
         ip = getattr(request, "ip", "127.0.0.1")
     for email in tup(emails):
         uid = user._id if user else 0
         tid = thing._fullname if thing else ""
         key = hashlib.sha1(str((email, from_name, uid, tid, ip, kind, body,
                            datetime.datetime.now(g.tz)))).hexdigest()
         s.insert().values({s.c.to_addr : email,
                            s.c.account_id : uid,
                            s.c.from_name : from_name,
                            s.c.fr_addr : fr_addr,
                            s.c.reply_to : reply_to,
                            s.c.fullname: tid,
                            s.c.ip : ip,
                            s.c.kind: kind,
                            s.c.body: body,
                            s.c.date : date,
                            s.c.msg_hash : key}).execute()
         hashes.append(key)
     return hashes
Beispiel #7
0
 def get(cls, metric_name, sr_names=None):
     sr_names = tup(sr_names)
     try:
         metric = cls._byID(metric_name, properties=sr_names)
         return metric._values()  # might have additional values
     except tdb_cassandra.NotFound:
         return {}
Beispiel #8
0
 def add(self, error_name, msg_params=None, field=None, code=None):
     for field_name in tup(field):
         e = VerbifyError(error_name,
                          msg_params,
                          fields=field_name,
                          code=code)
         self.add_error(e)
Beispiel #9
0
def sa_op(op):
    #if BooleanOp
    if isinstance(op, operators.or_):
        return sa.or_(*[sa_op(o) for o in op.ops])
    elif isinstance(op, operators.and_):
        return sa.and_(*[sa_op(o) for o in op.ops])
    elif isinstance(op, operators.not_):
        return sa.not_(*[sa_op(o) for o in op.ops])

    #else, assume op is an instance of op
    if isinstance(op, operators.eq):
        fn = lambda x, y: x == y
    elif isinstance(op, operators.ne):
        fn = lambda x, y: x != y
    elif isinstance(op, operators.gt):
        fn = lambda x, y: x > y
    elif isinstance(op, operators.lt):
        fn = lambda x, y: x < y
    elif isinstance(op, operators.gte):
        fn = lambda x, y: x >= y
    elif isinstance(op, operators.lte):
        fn = lambda x, y: x <= y
    elif isinstance(op, operators.in_):
        return sa.or_(op.lval.in_(op.rval))

    rval = tup(op.rval)

    if not rval:
        return '2+2=5'
    else:
        return sa.or_(*[fn(op.lval, v) for v in rval])
Beispiel #10
0
    def accept(cls, things, correct=True):
        from v1.lib.db import queries

        things = tup(things)

        things_by_cls = {}
        for thing in things:
            things_by_cls.setdefault(thing.__class__, []).append(thing)

        for thing_cls, cls_things in things_by_cls.iteritems():
            to_clear = []
            # look up all of the reports for each thing
            rel_cls = cls.rel(Account, thing_cls)
            thing_ids = [t._id for t in cls_things]
            rels = rel_cls._query(rel_cls.c._thing2_id == thing_ids)
            for r in rels:
                if r._name == '0':
                    r._name = '1' if correct else '-1'
                    r._commit()

            for thing in cls_things:
                if thing.reported > 0:
                    thing.reported = 0
                    thing._commit()
                    to_clear.append(thing)

            queries.clear_reports(to_clear, rels)
Beispiel #11
0
    def for_srs(cls, srid36, to_omit, count, source, match_set=True):
        # It's usually better to use get_recommendations() than to call this
        # function directly because it does privacy filtering.

        srid36s = tup(srid36)
        to_omit = set(to_omit)
        to_omit.update(srid36s)  # don't show the originals
        rowkeys = ['%s.%s' % (source, srid36) for srid36 in srid36s]

        # fetch multiple sets of recommendations, one for each input srid36
        rows = cls._byID(rowkeys, return_dict=False)

        if match_set:
            sorted_recs = cls._merge_and_sort_by_count(rows)
            # heuristic: if input set is large, rec should match more than one
            min_count = math.floor(.1 * len(srid36s))
            sorted_recs = (rec[0] for rec in sorted_recs if rec[1] > min_count)
        else:
            sorted_recs = cls._merge_roundrobin(rows)
        # remove duplicates and ids listed in to_omit
        filtered = []
        for r in sorted_recs:
            if r not in to_omit:
                filtered.append(r)
                to_omit.add(r)
        return filtered[:count]
Beispiel #12
0
def account_ids_by_ip(ip, after=None, before=None, limit=1000):
    """Get a list of account IDs that an IP has accessed.

    Parameters:
    after -- a `datetime.datetime` from which results should start
    before -- a `datetime.datetime` from which results should end.  If `after`
        is specified, this will be ignored.
    limit -- number of results to return
    """
    ips = tup(ip)
    results = []
    flattened_accounts = {}
    for ip in ips:
        if before and not after:
            # One less result will be returned for `before` queries, so we
            # increase the limit by one.
            account_ip = AccountsByIP.get(
                ip, column_start=before, column_count=limit + 1,
                column_reversed=False)
            account_ip = sorted(account_ip, reverse=True)
        else:
            account_ip = AccountsByIP.get(
                ip, column_start=after, column_count=limit)
        flattened_account_ip = [j for i in account_ip
                                for j in i.iteritems()]
        flattened_accounts[ip] = flattened_account_ip

    for ip, flattened_account_ip in flattened_accounts.iteritems():
        for last_visit, account in flattened_account_ip:
            results.append((account, last_visit, [ip]))
    return results
Beispiel #13
0
def total_by_codename(cls, codenames):
    """Return total lifetime pageviews (or clicks) for given codename(s)."""
    codenames = tup(codenames)
    # uses hour totals to get the most up-to-date count
    q = (Session.query(cls.codename, sum(
        cls.pageview_count)).filter(cls.interval == "hour").filter(
            cls.codename.in_(codenames)).group_by(cls.codename))
    return list(q)
Beispiel #14
0
 def has_errors(self, field_name, *errors, **kw):
     have_error = False
     field_name = tup(field_name)
     for error_name in errors:
         for fname in field_name:
             if (error_name, fname) in c.errors:
                 self.set_error(error_name, fname)
                 have_error = True
     return have_error
Beispiel #15
0
 def search(cls, rowkey, subjects=None):
     try:
         if subjects:
             subjects = tup(subjects)
             return cls._cf.get(rowkey, subjects)
         else:
             return cls._cf.get(rowkey)
     except tdb_cassandra.NotFoundException:
         return {}
Beispiel #16
0
    def get_localized_source(self, lang):
        catalog = get_catalog(lang)

        # relies on pyx files, so it can't be imported at global scope
        from v1.lib.utils import tup

        data = {}
        for key in self.keys:
            key = tup(key)[0]  # because the key for plurals is (sing, plur)
            self._check_formatting_specifiers(key)
            msg = catalog[key]

            if not msg or not msg.string:
                continue

            # jed expects to ignore the first value in the translations array
            # so we'll just make it null
            strings = tup(msg.string)
            data[key] = [None] + list(strings)
        return "r.i18n.addMessages(%s)" % json.dumps(data)
Beispiel #17
0
    def has_errors(self):
        """Determines if the signature has any errors.

        :returns: whether or not there are non-ignored errors
        :rtype: bool
        """
        if self.ignored_errors:
            igcodes = {err.code for err in tup(self.ignored_errors)}
            error_codes = {code for code, _ in self.errors}
            return not error_codes.issubset(igcodes)
        else:
            return bool(self.errors)
Beispiel #18
0
def get_recommendations(srs,
                        count=10,
                        source=SRC_MULTIVERBIFYS,
                        to_omit=None,
                        match_set=True,
                        over18=False):
    """Return subverbifys recommended if you like the given subverbifys.

    Args:
    - srs is one Subverbify object or a list of Subverbifys
    - count is total number of results to return
    - source is a prefix telling which set of recommendations to use
    - to_omit is a single or list of subverbify id36s that should not be
        be included. (Useful for omitting recs that were already rejected.)
    - match_set=True will return recs that are similar to each other, useful
        for matching the "theme" of the original set
    - over18 content is filtered unless over18=True or one of the original srs
        is over18

    """
    srs = tup(srs)
    to_omit = tup(to_omit) if to_omit else []

    # fetch more recs than requested because some might get filtered out
    rec_id36s = SRRecommendation.for_srs([sr._id36 for sr in srs],
                                          to_omit,
                                          count * 2,
                                          source,
                                          match_set=match_set)

    # always check for private subverbifys at runtime since type might change
    rec_srs = Subverbify._byID36(rec_id36s, return_dict=False)
    filtered = [sr for sr in rec_srs if is_visible(sr)]

    # don't recommend adult srs unless one of the originals was over_18
    if not over18 and not any(sr.over_18 for sr in srs):
        filtered = [sr for sr in filtered if not sr.over_18]

    return filtered[:count]
Beispiel #19
0
 def assert_vote_effects(
     self, vote,
     affects_score=True,
     affects_karma=True,
     affected_thing_attr="_ups",
     notes=None,
 ):
     notes = set(tup(notes) if notes else [])
     self.assertEqual(vote.effects.affects_score, affects_score)
     self.assertEqual(vote.effects.affects_karma, affects_karma)
     self.assertEqual(vote.affected_thing_attr, affected_thing_attr)
     self.assertEqual(set(vote.effects.notes), notes)
     return vote
Beispiel #20
0
def wrap_links(links,
               wrapper=default_thing_wrapper(),
               listing_cls=LinkListing,
               num=None,
               show_nums=False,
               nextprev=False,
               **kw):
    links = tup(links)
    if not all(isinstance(x, basestring) for x in links):
        links = [x._fullname for x in links]
    b = IDBuilder(links, num=num, wrap=wrapper, **kw)
    l = listing_cls(b, nextprev=nextprev, show_nums=show_nums)
    return l.listing()
Beispiel #21
0
 def _backend_get(self, keys):
     keys, is_single = tup(keys, ret_is_single=True)
     rows = self.cf.multiget(keys, columns=[self.COLUMN_NAME])
     ret = {
         key: pickle.loads(columns[self.COLUMN_NAME])
         for key, columns in rows.iteritems()
     }
     if is_single:
         if ret:
             return ret.values()[0]
         else:
             return None
     else:
         return ret
Beispiel #22
0
def translate_data_value(alias, op):
    lval = op.lval
    need_substr = False if isinstance(lval, operators.query_func) else True
    lval = translate_sort(alias, 'value', lval, False)

    #add the substring func
    if need_substr:
        lval = sa.func.substring(lval, 1, max_val_len)

    op.lval = lval

    #convert the rval to db types
    #convert everything to strings for pg8.3
    op.rval = tuple(str(py2db(v)) for v in tup(op.rval))
Beispiel #23
0
    def get(cls, srs, locations):
        srs, srs_is_single = tup(srs, ret_is_single=True)
        locations, locations_is_single = tup(locations, ret_is_single=True)
        is_single = srs_is_single and locations_is_single

        rowkeys = {location: cls._rowkey(location) for location in locations}
        columns = {sr: cls._column_name(sr) for sr in srs}
        rcl = cls._read_consistency_level
        metrics = cls._cf.multiget(rowkeys.values(),
                                   columns.values(),
                                   read_consistency_level=rcl)
        ret = {}

        for sr, location in product(srs, locations):
            rowkey = rowkeys[location]
            column = columns[sr]
            impressions = metrics.get(rowkey, {}).get(column, 0)
            ret[(sr, location)] = impressions

        if is_single:
            return ret.values()[0]
        else:
            return ret
Beispiel #24
0
def location_by_ips(ips):
    ips, is_single = tup(ips, ret_is_single=True)
    location_by_ip = sgm(
        cache=g.gencache,
        keys=ips,
        miss_fn=_location_by_ips,
        prefix='geoip:loc_',
        time=GEOIP_CACHE_TIME,
        ignore_set_errors=True,
    )
    if is_single and location_by_ip:
        return location_by_ip[ips[0]]
    else:
        return location_by_ip
Beispiel #25
0
    def __init__(self, name=None, msg_params=None, fields=None, code=None):
        Exception.__init__(self)

        if name is not None:
            self.name = name

        self.i18n_message = error_list.get(self.name)
        self.msg_params = msg_params or {}

        if fields is not None:
            # list of fields in the original form that caused the error
            self.fields = tup(fields)

        if code is not None:
            self.code = code
Beispiel #26
0
            def wrap_processor(msgs, *args):
                # Work the same for amqp.consume_items and amqp.handle_items.
                msg_tup = utils.tup(msgs)

                metrics_name = "amqp." + queue_name
                start = time.time()
                try:
                    with baseplate_integration.make_server_span(metrics_name):
                        return processor(msgs, *args)
                finally:
                    service_time = (time.time() - start) / len(msg_tup)
                    for n, msg in enumerate(msg_tup):
                        fake_start = start + n * service_time
                        fake_end = fake_start + service_time
                        self.transact(metrics_name, fake_start, fake_end)
                    self.flush()
Beispiel #27
0
    def unspam(self,
               things,
               moderator_unbanned=True,
               unbanner=None,
               train_spam=True,
               insert=True):
        from v1.lib.db import queries

        things = tup(things)

        # We want to make unban-all moderately efficient, so when
        # mass-unbanning, we're going to skip the code below on links that
        # are already not banned.  However, when someone manually clicks
        # "approve" on an unbanned link, and there's just one, we want do
        # want to run the code below. That way, the little green checkmark
        # will have the right mouseover details, the reports will be
        # cleared, etc.

        if len(things) > 1:
            things = [x for x in things if x._spam]

        Report.accept(things, False)
        for t in things:
            ban_info = copy(getattr(t, 'ban_info', {}))
            ban_info['unbanned_at'] = datetime.now(g.tz)
            if unbanner:
                ban_info['unbanner'] = unbanner
            if ban_info.get('reset_used', None) == None:
                ban_info['reset_used'] = False
            else:
                ban_info['reset_used'] = True
            t.ban_info = ban_info
            t._spam = False
            if moderator_unbanned:
                t.verdict = 'mod-approved'
            else:
                t.verdict = 'admin-approved'
            t._commit()

            if isinstance(t, Comment):
                amqp.add_item("approved_comment", t._fullname)
            elif isinstance(t, Link):
                amqp.add_item("approved_link", t._fullname)

        self.author_spammer(things, False)
        self.set_last_sr_ban(things)
        queries.unban(things, insert)
Beispiel #28
0
    def _things(self, things, action, *a, **kw):
        """
        function for inserting/replacing things in listings.
        """
        things = tup(things)
        if not all(isinstance(t, Wrapped) for t in things):
            wrap = kw.pop('wrap', Wrapped)
            things = wrap_links(things, wrapper=wrap)
        data = [self.process_rendered(t.render()) for t in things]

        if kw:
            for d in data:
                if d.has_key('data'):
                    d['data'].update(kw)

        self._data['things'] = data
        return data
Beispiel #29
0
def get_predicted_pageviews(srs, location=None):
    """
    Return predicted number of pageviews for sponsored headlines.

    Predicted geotargeted impressions are estimated as:

    geotargeted impressions = (predicted untargeted impressions) *
                                 (fp impressions for location / fp impressions)

    """

    srs, is_single = tup(srs, ret_is_single=True)
    sr_names = [sr.name for sr in srs]

    # default subverbifys require a different inventory factor
    default_srids = LocalizedDefaultSubverbifys.get_global_defaults()

    if location:
        no_location = Location(None)
        r = LocationPromoMetrics.get(DefaultSR, [no_location, location])
        location_pageviews = r[(DefaultSR, location)]
        all_pageviews = r[(DefaultSR, no_location)]
        if all_pageviews:
            location_factor = float(location_pageviews) / float(all_pageviews)
        else:
            location_factor = 0.
    else:
        location_factor = 1.0

    # prediction does not vary by date
    daily_inventory = PromoMetrics.get(MIN_DAILY_CASS_KEY, sr_names=sr_names)
    ret = {}
    for sr in srs:
        if not isinstance(sr, FakeSubverbify) and sr._id in default_srids:
            default_factor = DEFAULT_INVENTORY_FACTOR
        else:
            default_factor = INVENTORY_FACTOR
        base_pageviews = daily_inventory.get(sr.name, 0)
        ret[sr.name] = int(base_pageviews * default_factor * location_factor)

    if is_single:
        return ret[srs[0].name]
    else:
        return ret
Beispiel #30
0
def add_sort(sort, t_table, select):
    sort = tup(sort)

    prefixes = t_table.keys() if isinstance(t_table, dict) else None
    #sort the prefixes so the longest come first
    prefixes.sort(key=lambda x: len(x))
    cols = []

    def make_sa_sort(s):
        orig_col = s.col

        col = orig_col
        if prefixes:
            table = None
            for k in prefixes:
                if k and orig_col.startswith(k):
                    table = t_table[k]
                    col = orig_col[len(k):]
            if table is None:
                table = t_table[None]
        else:
            table = t_table

        real_col = translate_sort(table, col)

        #TODO a way to avoid overlap?
        #add column for the sort parameter using the sorted name
        select.append_column(real_col.label(orig_col))

        #avoids overlap temporarily
        select.use_labels = True

        #keep track of which columns we added so we can add joins later
        cols.append((real_col, table))

        #default to asc
        return (sa.desc(real_col)
                if isinstance(s, operators.desc) else sa.asc(real_col))

    sa_sort = [make_sa_sort(s) for s in sort]

    s = select.order_by(*sa_sort)

    return s, cols