Beispiel #1
3
def add_comments(comments):
    """Add comments to the CommentTree and update scores."""
    from r2.models.builder import write_comment_orders

    link_ids = [comment.link_id for comment in tup(comments)]
    links = Link._byID(link_ids, data=True)

    comments = tup(comments)
    comments_by_link_id = defaultdict(list)
    for comment in comments:
        comments_by_link_id[comment.link_id].append(comment)

    for link_id, link_comments in comments_by_link_id.iteritems():
        link = links[link_id]

        timer = g.stats.get_timer(
            'comment_tree.add.%s' % link.comment_tree_version)
        timer.start()

        # write scores before CommentTree because the scores must exist for all
        # comments in the tree
        for sort in ("_controversy", "_confidence", "_score"):
            scores_by_comment = {
                comment._id36: getattr(comment, sort)
                for comment in link_comments
            }
            CommentScoresByLink.set_scores(link, sort, scores_by_comment)

        scores_by_comment = _get_qa_comment_scores(link, link_comments)
        CommentScoresByLink.set_scores(link, "_qa", scores_by_comment)
        timer.intermediate('scores')

        with CommentTree.mutation_context(link, timeout=180):
            try:
                timer.intermediate('lock')
                comment_tree = CommentTree.by_link(link, timer)
                timer.intermediate('get')
                comment_tree.add_comments(link_comments)
                timer.intermediate('update')
            except InconsistentCommentTreeError:
                # failed to add a comment to the CommentTree because its parent
                # is missing from the tree. this comment will be lost forever
                # unless a rebuild is performed.
                comment_ids = [comment._id for comment in link_comments]
                g.log.error(
                    "comment_tree_inconsistent: %s %s" % (link, comment_ids))
                g.stats.simple_event('comment_tree_inconsistent')
                return

            # do this under the same lock because we want to ensure we are using
            # the same version of the CommentTree as was just written
            write_comment_orders(link)
            timer.intermediate('write_order')

        timer.stop()
Beispiel #2
2
def add_comments(comments):
    """Add comments to the CommentTree and update scores."""
    from r2.models.builder import write_comment_orders

    link_ids = [comment.link_id for comment in tup(comments)]
    links = Link._byID(link_ids, data=True)

    comments = tup(comments)
    comments_by_link_id = defaultdict(list)
    for comment in comments:
        comments_by_link_id[comment.link_id].append(comment)

    for link_id, link_comments in comments_by_link_id.iteritems():
        link = links[link_id]

        timer = g.stats.get_timer("comment_tree.add.1")
        timer.start()

        # write scores before CommentTree because the scores must exist for all
        # comments in the tree
        for sort in ("_controversy", "_confidence", "_score"):
            scores_by_comment = {comment._id36: getattr(comment, sort) for comment in link_comments}
            CommentScoresByLink.set_scores(link, sort, scores_by_comment)

        scores_by_comment = _get_qa_comment_scores(link, link_comments)
        CommentScoresByLink.set_scores(link, "_qa", scores_by_comment)
        timer.intermediate("scores")

        CommentTree.add_comments(link, link_comments)
        timer.intermediate("update")

        write_comment_orders(link)
        timer.intermediate("write_order")

        timer.stop()
Beispiel #3
1
def add_comments(comments):
    """Add comments to the CommentTree and update scores."""
    from r2.models.builder import write_comment_orders

    link_ids = [comment.link_id for comment in tup(comments)]
    links_by_id = Link._byID(link_ids)

    comments = tup(comments)
    comments_by_link_id = defaultdict(list)
    for comment in comments:
        comments_by_link_id[comment.link_id].append(comment)

    for link_id, link_comments in comments_by_link_id.iteritems():
        link = links_by_id[link_id]

        timer = g.stats.get_timer('comment_tree.add.1')
        timer.start()

        write_comment_scores(link, link_comments)
        timer.intermediate('scores')

        CommentTree.add_comments(link, link_comments)
        timer.intermediate('update')

        write_comment_orders(link)
        timer.intermediate('write_order')

        timer.stop()
Beispiel #4
0
def add_comments(comments):
    links = Link._byID([com.link_id for com in tup(comments)], data=True)
    comments = tup(comments)

    link_map = {}
    for com in comments:
        link_map.setdefault(com.link_id, []).append(com)

    for link_id, coms in link_map.iteritems():
        link = links[link_id]
        timer = g.stats.get_timer('comment_tree.add.%s'
                                  % link.comment_tree_version)
        timer.start()
        try:
            with CommentTree.mutation_context(link):
                timer.intermediate('lock')
                cache = get_comment_tree(link, timer=timer)
                timer.intermediate('get')
                cache.add_comments(coms)
                timer.intermediate('update')
        except:
            g.log.exception(
                'add_comments_nolock failed for link %s, recomputing tree',
                link_id)

            # calculate it from scratch
            get_comment_tree(link, _update=True, timer=timer)
        timer.stop()
        update_comment_votes(coms)
Beispiel #5
0
    def _fast_query(cls, thing1_ids, thing2_ids, **kw):
        """Find all of the relations of this class between all of the
           members of thing1_ids and thing2_ids"""
        thing1_ids, thing1s_is_single = tup(thing1_ids, True)
        thing2_ids, thing2s_is_single = tup(thing2_ids, True)

        # permute all of the pairs
        ids = set(('%s_%s' % (x, y))
                  for x in thing1_ids
                  for y in thing2_ids)

        rels = cls._byID(ids).values()

        # does anybody actually use us this way?
        if thing1s_is_single and thing2s_is_single:
            if rels:
                assert len(rels) == 1
                return rels[0]
            else:
                raise NotFound("<%s '%s_%s'>" % (cls.__name__,
                                                 thing1_ids[0],
                                                 thing2_ids[0]))

        return dict(((rel.thing1_id, rel.thing2_id), rel)
                    for rel in rels)
Beispiel #6
0
    def get_actions(cls, srs, mod=None, action=None, after=None, reverse=False, count=1000):
        """
        Get a ColumnQuery that yields ModAction objects according to
        specified criteria.
        """
        if after and isinstance(after, basestring):
            after = cls._byID(UUID(after))
        elif after and isinstance(after, UUID):
            after = cls._byID(after)

        if not isinstance(after, cls):
            after = None

        srs = tup(srs)

        if not mod and not action:
            rowkeys = [sr._id36 for sr in srs]
            q = ModActionBySR.query(rowkeys, after=after, reverse=reverse, count=count)
        elif mod:
            mods = tup(mod)
            key = '%s_%s' if not action else '%%s_%%s_%s' % action
            rowkeys = itertools.product([sr._id36 for sr in srs],
                [mod._id36 for mod in mods])
            rowkeys = [key % (sr, mod) for sr, mod in rowkeys]
            view = ModActionBySRActionMod if action else ModActionBySRMod
            q = view.query(rowkeys, after=after, reverse=reverse, count=count)
        else:
            rowkeys = ['%s_%s' % (sr._id36, action) for sr in srs]
            q = ModActionBySRAction.query(rowkeys, after=after, reverse=reverse, count=count)

        return q
Beispiel #7
0
 def _somethinged(cls, rel, user, link, name):
     return rel._fast_query(
         tup(user),
         tup(link),
         name=name,
         thing_data=True,
         timestamp_optimize=True)
Beispiel #8
0
def add_queries(queries, insert_items=None, delete_items=None, foreground=False):
    """Adds multiple queries to the query queue. If insert_items or
       delete_items is specified, the query may not need to be
       recomputed against the database."""
    if not g.write_query_queue:
        return

    for q in queries:
        if insert_items and q.can_insert():
            log.debug("Inserting %s into query %s" % (insert_items, q))
            if foreground:
                q.insert(insert_items)
            else:
                worker.do(q.insert, insert_items)
        elif delete_items and q.can_delete():
            log.debug("Deleting %s from query %s" % (delete_items, q))
            if foreground:
                q.delete(delete_items)
            else:
                worker.do(q.delete, delete_items)
        else:
            raise Exception("Cannot update query %r!" % (q,))

    # dual-write any queries that are being migrated to the new query cache
    with CachedQueryMutator() as m:
        new_queries = [getattr(q, 'new_query') for q in queries if hasattr(q, 'new_query')]

        if insert_items:
            for query in new_queries:
                m.insert(query, tup(insert_items))

        if delete_items:
            for query in new_queries:
                m.delete(query, tup(delete_items))
Beispiel #9
0
def get_recommendations(srs, count=10, source=SRC_MULTIREDDITS, to_omit=None, match_set=True, over18=False):
    """Return subreddits recommended if you like the given subreddits.

    Args:
    - srs is one Subreddit object or a list of Subreddits
    - count is total number of results to return
    - source is a prefix telling which set of recommendations to use
    - to_omit is a single or list of subreddit id36s that should not be
        be included. (Useful for omitting recs that were already rejected.)
    - match_set=True will return recs that are similar to each other, useful
        for matching the "theme" of the original set
    - over18 content is filtered unless over18=True or one of the original srs
        is over18

    """
    srs = tup(srs)
    to_omit = tup(to_omit) if to_omit else []

    # fetch more recs than requested because some might get filtered out
    rec_id36s = SRRecommendation.for_srs([sr._id36 for sr in srs], to_omit, count * 2, source, match_set=match_set)

    # always check for private subreddits at runtime since type might change
    rec_srs = Subreddit._byID36(rec_id36s, return_dict=False)
    filtered = [sr for sr in rec_srs if is_visible(sr)]

    # don't recommend adult srs unless one of the originals was over_18
    if not over18 and not any(sr.over_18 for sr in srs):
        filtered = [sr for sr in filtered if not sr.over_18]

    return filtered[:count]
Beispiel #10
0
def add_comments(comments):
    links = Link._byID([com.link_id for com in tup(comments)], data=True)
    comments = tup(comments)

    link_map = {}
    for com in comments:
        link_map.setdefault(com.link_id, []).append(com)

    for link_id, coms in link_map.iteritems():
        link = links[link_id]
        add_comments = [comment for comment in coms if not comment._deleted]
        delete_comments = (comment for comment in coms if comment._deleted)
        timer = g.stats.get_timer('comment_tree.add.%s'
                                  % link.comment_tree_version)
        timer.start()
        try:
            with CommentTree.mutation_context(link, timeout=30):
                timer.intermediate('lock')
                cache = get_comment_tree(link, timer=timer)
                timer.intermediate('get')
                if add_comments:
                    cache.add_comments(add_comments)
                for comment in delete_comments:
                    cache.delete_comment(comment, link)
                timer.intermediate('update')
        except InconsistentCommentTreeError:
            comment_ids = [comment._id for comment in coms]
            g.log.exception(
                'add_comments_nolock failed for link %s %s, recomputing',
                link_id, comment_ids)
            rebuild_comment_tree(link, timer=timer)

        timer.stop()
        update_comment_votes(coms)
Beispiel #11
0
    def _fast_query(cls, thing1_ids, thing2_ids, properties=None, **kw):
        """Find all of the relations of this class between all of the
           members of thing1_ids and thing2_ids"""
        thing1_ids, thing1s_is_single = tup(thing1_ids, True)
        thing2_ids, thing2s_is_single = tup(thing2_ids, True)

        if not thing1_ids or not thing2_ids:
            # nothing to permute
            return {}

        if properties is not None:
            properties = set(properties)

            # all relations must load these properties, even if
            # unrequested
            properties.add("thing1_id")
            properties.add("thing2_id")

        # permute all of the pairs
        ids = set(cls._rowkey(x, y) for x in thing1_ids for y in thing2_ids)

        rels = cls._byID(ids, properties=properties).values()

        if thing1s_is_single and thing2s_is_single:
            if rels:
                assert len(rels) == 1
                return rels[0]
            else:
                raise NotFound("<%s %r>" % (cls.__name__, cls._rowkey(thing1_ids[0], thing2_ids[0])))

        return dict(((rel.thing1_id, rel.thing2_id), rel) for rel in rels)
Beispiel #12
0
def get_recommendations(srs, count=10, source=SRC_MULTIREDDITS, to_omit=None):
    """Return subreddits recommended if you like the given subreddits.

    Args:
    - srs is one Subreddit object or a list of Subreddits
    - count is total number of results to return
    - source is a prefix telling which set of recommendations to use
    - to_omit is one Subreddit object or a list of Subreddits that should not
        be included. (Useful for omitting recs that were already rejected.)

    """
    srs = tup(srs)
    to_omit = tup(to_omit) if to_omit else []

    # fetch more recs than requested because some might get filtered out
    rec_id36s = SRRecommendation.for_srs([sr._id36 for sr in srs], [o._id36 for o in to_omit], count * 2, source)

    # always check for private subreddits at runtime since type might change
    rec_srs = Subreddit._byID36(rec_id36s, return_dict=False)
    filtered = [sr for sr in rec_srs if sr.type != "private"]

    # don't recommend adult srs unless one of the originals was over_18
    if not any(sr.over_18 for sr in srs):
        filtered = [sr for sr in filtered if not sr.over_18]

    return filtered[:count]
Beispiel #13
0
def add_comments(comments):
    links = Link._byID([com.link_id for com in tup(comments)], data=True)
    comments = tup(comments)

    link_map = {}
    for com in comments:
        link_map.setdefault(com.link_id, []).append(com)

    for link_id, coms in link_map.iteritems():
        link = links[link_id]
        add_comments = [comment for comment in coms if not comment._deleted]
        delete_comments = (comment for comment in coms if comment._deleted)
        timer = g.stats.get_timer("comment_tree.add.%s" % link.comment_tree_version)
        timer.start()
        try:
            with CommentTree.mutation_context(link):
                timer.intermediate("lock")
                cache = get_comment_tree(link, timer=timer)
                timer.intermediate("get")
                if add_comments:
                    cache.add_comments(add_comments)
                for comment in delete_comments:
                    cache.delete_comment(comment, link)
                timer.intermediate("update")
        except:
            g.log.exception("add_comments_nolock failed for link %s, recomputing tree", link_id)

            # calculate it from scratch
            get_comment_tree(link, _update=True, timer=timer)
        timer.stop()
        update_comment_votes(coms)
Beispiel #14
0
    def get_actions(cls, srs, mod=None, action=None, after=None, reverse=False, count=1000):
        """
        Get a ColumnQuery that yields ModAction objects according to
        specified criteria.
        """
        if after and isinstance(after, basestring):
            after = cls._byID(UUID(after))
        elif after and isinstance(after, UUID):
            after = cls._byID(after)

        if not isinstance(after, cls):
            after = None

        srs = tup(srs)

        if not mod and not action:
            rowkeys = [sr._id36 for sr in srs]
            q = ModActionBySR.query(rowkeys, after=after, reverse=reverse, count=count)
        elif mod and not action:
            mods = tup(mod)
            rowkeys = itertools.product([sr._id36 for sr in srs],
                [mod._id36 for mod in mods])
            rowkeys = ['%s_%s' % (sr, mod) for sr, mod in rowkeys]
            q = ModActionBySRMod.query(rowkeys, after=after, reverse=reverse, count=count)
        elif not mod and action:
            rowkeys = ['%s_%s' % (sr._id36, action) for sr in srs]
            q = ModActionBySRAction.query(rowkeys, after=after, reverse=reverse, count=count)
        else:
            raise NotImplementedError("Can't query by both mod and action")

        return q
Beispiel #15
0
        def _fast_query(cls, sub, obj, name, data=True, eager_load=True, thing_data=False, timestamp_optimize=False):
            # divide into types
            def type_dict(items):
                types = {}
                for i in items:
                    types.setdefault(i.__class__, []).append(i)
                return types

            sub_dict = type_dict(tup(sub))
            obj_dict = type_dict(tup(obj))

            # for each pair of types, see if we have a query to send
            res = {}
            for types, rel in cls.rels.iteritems():
                t1, t2 = types
                if sub_dict.has_key(t1) and obj_dict.has_key(t2):
                    res.update(
                        rel._fast_query(
                            sub_dict[t1],
                            obj_dict[t2],
                            name,
                            data=data,
                            eager_load=eager_load,
                            thing_data=thing_data,
                            timestamp_optimize=timestamp_optimize,
                        )
                    )

            return res
Beispiel #16
0
        def _fast_query(cls, thing1s, thing2s, name, data=True, eager_load=True, thing_data=False):
            """looks up all the relationships between thing1_ids and
               thing2_ids and caches them"""
            prefix = thing_prefix(cls.__name__)

            thing1_dict = dict((t._id, t) for t in tup(thing1s))
            thing2_dict = dict((t._id, t) for t in tup(thing2s))

            thing1_ids = thing1_dict.keys()
            thing2_ids = thing2_dict.keys()

            name = tup(name)

            # permute all of the pairs
            pairs = set((x, y, n) for x in thing1_ids for y in thing2_ids for n in name)

            def lookup_rel_ids(pairs):
                rel_ids = {}

                t1_ids = set()
                t2_ids = set()
                names = set()
                for t1, t2, name in pairs:
                    t1_ids.add(t1)
                    t2_ids.add(t2)
                    names.add(name)

                if t1_ids and t2_ids and names:
                    q = cls._query(cls.c._thing1_id == t1_ids, cls.c._thing2_id == t2_ids, cls.c._name == names)
                else:
                    q = []

                for rel in q:
                    rel_ids[(rel._thing1_id, rel._thing2_id, rel._name)] = rel._id

                for p in pairs:
                    if p not in rel_ids:
                        rel_ids[p] = None

                return rel_ids

            # get the relation ids from the cache or query the db
            res = sgm(cls._cache, pairs, lookup_rel_ids, prefix)

            # get the relation objects
            rel_ids = {rel_id for rel_id in res.itervalues() if rel_id is not None}
            rels = cls._byID_rel(rel_ids, data=data, eager_load=eager_load, thing_data=thing_data)

            res_obj = {}
            for (thing1_id, thing2_id, name), rel_id in res.iteritems():
                pair = (thing1_dict[thing1_id], thing2_dict[thing2_id], name)
                rel = rels[rel_id] if rel_id is not None else None
                res_obj[pair] = rel

            return res_obj
Beispiel #17
0
    def remove_tag(self, tag_name, name='tag'):
        """Removes a tag from the link. The tag is not deleted,
           just the relationship between the link and the tag"""
        try:
            tag = Tag._by_name(tag_name)
        except NotFound:
            return False

        tags = LinkTag._fast_query(tup(self), tup(tag), name=name)
        link_tag = tags[(self, tag, name)]
        if link_tag:
            link_tag._delete()
            return link_tag
Beispiel #18
0
 def run(self, url, sr = None):
     if sr is None and not isinstance(c.site, FakeSubreddit):
         sr = c.site
     elif sr:
         try:
             sr = Subreddit._by_name(sr)
         except NotFound:
             c.errors.add(errors.SUBREDDIT_NOEXIST)
             sr = None
     else:
         sr = None
     
     if not url:
         return self.error(errors.NO_URL)
     url = utils.sanitize_url(url)
     if url == 'self':
         return url
     elif url:
         try:
             l = Link._by_url(url, sr)
             self.error(errors.ALREADY_SUB)
             return utils.tup(l)
         except NotFound:
             return url
     return self.error(errors.BAD_URL)
Beispiel #19
0
    def validate_list(self, nodes, validators_by_type, ignored_types=None):
        for node in nodes:
            if node.type == "error":
                yield ValidationError(node.source_line, "SYNTAX_ERROR",
                                      {"message": node.message})
                continue
            elif node.type == "literal":
                if node.value == ";":
                    # if we're seeing a semicolon as a literal, it's in a place
                    # that doesn't fit naturally in the syntax.
                    # Safari 5 will treat this as two color properties:
                    # color: calc(;color:red;);
                    message = "semicolons are not allowed in this context"
                    yield ValidationError(node.source_line, "SYNTAX_ERROR",
                                          {"message": message})
                    continue

            validator = validators_by_type.get(node.type)

            if validator:
                for error in tup(validator(node)):
                    if error:
                        yield error
            else:
                if not ignored_types or node.type not in ignored_types:
                    yield ValidationError(node.source_line,
                                          "UNEXPECTED_TOKEN",
                                          {"token": node.type})
Beispiel #20
0
    def __iter__(self, yield_column_names=False):
        retrieved = 0
        column_start = self.column_start
        while retrieved < self._limit:
            try:
                column_count = min(self._chunk_size, self._limit - retrieved)
                if column_start:
                    column_count += 1   # cassandra includes column_start
                r = self.cls._cf.multiget(self.rowkeys,
                                          column_start=column_start,
                                          column_finish=self.column_finish,
                                          column_count=column_count,
                                          column_reversed=self.column_reversed)

                # multiget returns OrderedDict {rowkey: {column_name: column_value}}
                # combine into single OrderedDict of {column_name: column_value}
                nrows = len(r.keys())
                if nrows == 0:
                    return
                elif nrows == 1:
                    columns = r.values()[0]
                else:
                    r_combined = {}
                    for d in r.values():
                        r_combined.update(d)
                    columns = OrderedDict(sorted(r_combined.items(),
                                                 key=lambda t: self.sort_key(t[0]),
                                                 reverse=self.column_reversed))
            except NotFoundException:
                return

            retrieved += self._chunk_size

            if column_start:
                try:
                    del columns[column_start]
                except KeyError:
                    columns.popitem(last=True)  # remove extra column

            if not columns:
                return

            # Convert to list of columns
            l_columns = [{col_name: columns[col_name]} for col_name in columns]

            column_start = l_columns[-1].keys()[0]
            objs = self.column_to_obj(l_columns)

            if yield_column_names:
                column_names = [column.keys()[0] for column in l_columns]
                if len(column_names) == 1:
                    ret = (column_names[0], objs),
                else:
                    ret = zip(column_names, objs)
            else:
                ret = objs

            ret, is_single = tup(ret, ret_is_single=True)
            for r in ret:
                yield r
Beispiel #21
0
def sa_op(op):
    #if BooleanOp
    if isinstance(op, operators.or_):
        return sa.or_(*[sa_op(o) for o in op.ops])
    elif isinstance(op, operators.and_):
        return sa.and_(*[sa_op(o) for o in op.ops])

    #else, assume op is an instance of op
    if isinstance(op, operators.eq):
        fn = lambda x,y: x == y
    elif isinstance(op, operators.ne):
        fn = lambda x,y: x != y
    elif isinstance(op, operators.gt):
        fn = lambda x,y: x > y
    elif isinstance(op, operators.lt):
        fn = lambda x,y: x < y
    elif isinstance(op, operators.gte):
        fn = lambda x,y: x >= y
    elif isinstance(op, operators.lte):
        fn = lambda x,y: x <= y

    rval = tup(op.rval)

    if not rval:
        return '2+2=5'
    else:
        return sa.or_(*[fn(op.lval, v) for v in rval])
Beispiel #22
0
    def for_srs(cls, srid36, to_omit, count, source, match_set=True):
        # It's usually better to use get_recommendations() than to call this
        # function directly because it does privacy filtering.

        srid36s = tup(srid36)
        to_omit = set(to_omit)
        to_omit.update(srid36s)  # don't show the originals
        rowkeys = ['%s.%s' % (source, srid36) for srid36 in srid36s]

        # fetch multiple sets of recommendations, one for each input srid36
        d = sgm(g.cache, rowkeys, SRRecommendation._byID, prefix='srr.')
        rows = d.values()

        if match_set:
            sorted_recs = SRRecommendation._merge_and_sort_by_count(rows)
            # heuristic: if input set is large, rec should match more than one
            min_count = math.floor(.1 * len(srid36s))
            sorted_recs = (rec[0] for rec in sorted_recs if rec[1] > min_count)
        else:
            sorted_recs = SRRecommendation._merge_roundrobin(rows)
        # remove duplicates and ids listed in to_omit
        filtered = []
        for r in sorted_recs:
            if r not in to_omit:
                filtered.append(r)
                to_omit.add(r)
        return filtered[:count]
Beispiel #23
0
    def unspam(self, things, unbanner=None, train_spam=True, insert=True):
        from r2.lib.db import queries

        things = tup(things)

        # We want to make unban-all moderately efficient, so when
        # mass-unbanning, we're going to skip the code below on links that
        # are already not banned.  However, when someone manually clicks
        # "approve" on an unbanned link, and there's just one, we want do
        # want to run the code below. That way, the little green checkmark
        # will have the right mouseover details, the reports will be
        # cleared, etc.

        if len(things) > 1:
            things = [x for x in things if x._spam]

        Report.accept(things, False)
        for t in things:
            ban_info = copy(getattr(t, 'ban_info', {}))
            ban_info['unbanned_at'] = datetime.now(g.tz)
            if unbanner:
                ban_info['unbanner'] = unbanner
            if ban_info.get('reset_used', None) == None:
                ban_info['reset_used'] = False
            else:
                ban_info['reset_used'] = True
            t.ban_info = ban_info
            t._spam = False
            t._commit()

        self.author_spammer(things, False)
        self.set_last_sr_ban(things)

        queries.unban(things, insert)
Beispiel #24
0
 def __init__(self, name, i18n_message, msg_params, field=None, code=None):
     self.name = name
     self.i18n_message = i18n_message
     self.msg_params = msg_params
     # list of fields in the original form that caused the error
     self.fields = tup(field) if field else []
     self.code = code
Beispiel #25
0
    def _byID(cls, ids):
        ids, is_single = tup(ids, True)

        if not len(ids):
            if is_single:
                raise InvariantException("whastis?")
            else:
                return {}

        # all keys must be strings or directly convertable to strings
        assert all(isinstance(_id, basestring) and str(_id) for _id in ids)

        def lookup(l_ids):
            rows = cls.cf.multiget(l_ids, column_count=max_column_count)

            l_ret = {}
            for t_id, row in rows.iteritems():
                t = cls._from_serialized_columns(t_id, row)
                l_ret[t._id] = t

            return l_ret

        ret = cache.sgm(thing_cache, ids, lookup, prefix=cls._cache_prefix())

        if is_single and not ret:
            raise NotFound("<%s %r>" % (cls.__name__,
                                        ids[0]))
        elif is_single:
            assert len(ret) == 1
            return ret.values()[0]

        return ret
Beispiel #26
0
def compute_message_trees(messages):
    from r2.models import Message
    roots = set()
    threads = {}
    mdict = {}
    messages = sorted(messages, key = lambda m: m._date, reverse = True)

    for m in messages:
        if not m._loaded:
            m._load()
        mdict[m._id] = m
        if m.first_message:
            roots.add(m.first_message)
            threads.setdefault(m.first_message, set()).add(m._id)
        else:
            roots.add(m._id)

    # load any top-level messages which are not in the original list
    missing = [m for m in roots if m not in mdict]
    if missing:
        mdict.update(Message._byID(tup(missing),
                                   return_dict = True, data = True))

    # sort threads in chrono order
    for k in threads:
        threads[k] = list(sorted(threads[k]))

    tree = [(root, threads.get(root, [])) for root in roots]
    tree.sort(key = tree_sort_fn, reverse = True)

    return tree
Beispiel #27
0
    def _byID(cls, ids, data=False, return_dict=True, extra_props=None,
              stale=False, ignore_missing=False):
        ids, single = tup(ids, True)
        prefix = thing_prefix(cls.__name__)

        if not all(x <= tdb.MAX_THING_ID for x in ids):
            raise NotFound('huge thing_id in %r' % ids)

        def count_found(ret, still_need):
            cls._cache.stats.cache_report(
                hits=len(ret), misses=len(still_need),
                cache_name='sgm.%s' % cls.__name__)

        if not cls._cache.stats:
            count_found = None

        def items_db(ids):
            items = cls._get_item(cls._type_id, ids)
            for i in items.keys():
                items[i] = cls._build(i, items[i])

            return items

        bases = sgm(cls._cache, ids, items_db, prefix, stale=stale,
                    found_fn=count_found)

        # Check to see if we found everything we asked for
        missing = []
        for i in ids:
            if i not in bases:
                missing.append(i)
            elif bases[i] and bases[i]._id != i:
                g.log.error("thing.py: Doppleganger on byID: %s got %s for %s" %
                            (cls.__name__, bases[i]._id, i))
                bases[i] = items_db([i]).values()[0]
                bases[i]._cache_myself()
        if missing and not ignore_missing:
            raise NotFound, '%s %s' % (cls.__name__, missing)
        for i in missing:
            ids.remove(i)

        if data:
            need = []
            for v in bases.itervalues():
                if not v._loaded:
                    need.append(v)
            if need:
                cls._load_multi(need)

        if extra_props:
            for _id, props in extra_props.iteritems():
                for k, v in props.iteritems():
                    bases[_id].__setattr__(k, v, False)

        if single:
            return bases[ids[0]] if ids else None
        elif return_dict:
            return bases
        else:
            return filter(None, (bases.get(i) for i in ids))
Beispiel #28
0
 def add_to_queue(self, user, emails, from_name, fr_addr, kind,
                  date = None, ip = None,
                  body = "", reply_to = "", thing = None):
     s = self.queue_table
     hashes = []
     if not date:
         date = datetime.datetime.now(g.tz)
     if not ip:
         ip = getattr(request, "ip", "127.0.0.1")
     for email in tup(emails):
         uid = user._id if user else 0
         tid = thing._fullname if thing else ""
         key = hashlib.sha1(str((email, from_name, uid, tid, ip, kind, body,
                            datetime.datetime.now(g.tz)))).hexdigest()
         s.insert().values({s.c.to_addr : email,
                            s.c.account_id : uid,
                            s.c.from_name : from_name,
                            s.c.fr_addr : fr_addr,
                            s.c.reply_to : reply_to,
                            s.c.fullname: tid,
                            s.c.ip : ip,
                            s.c.kind: kind,
                            s.c.body: body,
                            s.c.date : date,
                            s.c.msg_hash : key}).execute()
         hashes.append(key)
     return hashes
Beispiel #29
0
    def spam(self, things, auto, moderator_banned, banner, date = None, **kw):
        from r2.lib.db import queries

        things = [x for x in tup(things) if not x._spam]
        Report.accept(things, True)
        for t in things:
            t._spam = True
            ban_info = copy(getattr(t, 'ban_info', {}))
            ban_info.update(auto = auto,
                            moderator_banned = moderator_banned,
                            banned_at = date or datetime.now(g.tz),
                            **kw)

            if isinstance(banner, dict):
                ban_info['banner'] = banner[t._fullname]
            else:
                ban_info['banner'] = banner

            t.ban_info = ban_info
            t._commit()
            changed(t)


        if not auto:
            self.author_spammer(things, True)
            self.set_last_sr_ban(things)

        queries.ban(things)
Beispiel #30
0
    def run(self, url, sr = None):
        if sr is None and not isinstance(c.site, FakeSubreddit):
            sr = c.site
        elif sr:
            try:
                sr = Subreddit._by_name(str(sr))
            except (NotFound, UnicodeEncodeError):
                self.set_error(errors.SUBREDDIT_NOEXIST)
                sr = None
        else:
            sr = None

        if not url:
            return self.error(errors.NO_URL)
        url = utils.sanitize_url(url)
        if not url:
            return self.error(errors.BAD_URL)

        if url == 'self':
            if self.allow_self:
                return url
        elif not self.lookup:
            return url
        elif url:
            try:
                l = Link._by_url(url, sr)
                self.error(errors.ALREADY_SUB)
                return utils.tup(l)
            except NotFound:
                return url
        return self.error(errors.BAD_URL)
Beispiel #31
0
 def add(self, error_name, msg_params={}, field=None):
     msg = error_list[error_name]
     for field_name in tup(field):
         e = Error(error_name, msg, msg_params, field=field_name)
         self.errors[(error_name, field_name)] = e
Beispiel #32
0
def get_available_pageviews(targets, start, end, location=None, datestr=False,
                            ignore=None, platform='all'):
    """
    Return the available pageviews by date for the targets and location.

    Available pageviews depends on all equal and higher level locations:
    A location is: subreddit > country > metro

    e.g. if a campaign is targeting /r/funny in USA/Boston we need to check that
    there's enough inventory in:
    * /r/funny (all campaigns targeting /r/funny regardless of location)
    * /r/funny + USA (all campaigns targeting /r/funny and USA with or without
      metro level targeting)
    * /r/funny + USA + Boston (all campaigns targeting /r/funny and USA and
      Boston)
    The available inventory is the smallest of these values.

    """

    # assemble levels of location targeting, None means untargeted
    locations = [None]
    if location:
        locations.append(location)

        if location.metro:
            locations.append(Location(country=location.country))

    # get all the campaigns directly and indirectly involved in our target
    targets, is_single = tup(targets, ret_is_single=True)
    target_srs = list(chain.from_iterable(
        target.subreddits_slow for target in targets))
    all_campaigns = find_campaigns(target_srs, start, end, ignore)

    # get predicted pageviews for each subreddit and location
    all_sr_names = set(sr.name for sr in target_srs)
    all_sr_names |= set(chain.from_iterable(
        campaign.target.subreddit_names for campaign in all_campaigns
    ))
    all_srs = Subreddit._by_name(all_sr_names).values()
    pageviews_dict = {location: get_predicted_pageviews(all_srs, location)
                          for location in locations}

    # determine booked impressions by target and location for each day
    dates = set(get_date_range(start, end))
    booked_dict = {}
    for date in dates:
        booked_dict[date] = {}
        for location in locations:
            booked_dict[date][location] = defaultdict(int)

    for campaign in all_campaigns:
        camp_dates = set(get_date_range(campaign.start_date, campaign.end_date))
        sr_names = tuple(sorted(campaign.target.subreddit_names))
        daily_impressions = campaign.impressions / campaign.ndays

        for location in locations:
            if location and not location.contains(campaign.location):
                # campaign's location is less specific than location
                continue

            for date in camp_dates.intersection(dates):
                booked_dict[date][location][sr_names] += daily_impressions

    # calculate inventory for each target and location on each date
    datekey = lambda dt: dt.strftime('%m/%d/%Y') if datestr else dt

    ret = {}
    for target in targets:
        name = make_target_name(target)
        subreddit_names = target.subreddit_names
        ret[name] = {}
        for date in dates:
            pageviews_by_location = {}
            for location in locations:
                # calculate available impressions for each location
                booked_by_target = booked_dict[date][location]
                pageviews_by_sr_name = pageviews_dict[location]
                pageviews_by_location[location] = get_maximized_pageviews(
                    subreddit_names, booked_by_target, pageviews_by_sr_name)
            # available pageviews is the minimum from all locations
            min_pageviews = min(pageviews_by_location.values())
            if PERCENT_MOBILE != 0:
                mobile_pageviews = min_pageviews * (float(PERCENT_MOBILE) / 100)
                if platform == 'mobile':
                    min_pageviews = mobile_pageviews
                if platform == 'desktop':
                    min_pageviews = min_pageviews - mobile_pageviews
            ret[name][datekey(date)] = max(0, min_pageviews)

    if is_single:
        name = make_target_name(targets[0])
        return ret[name]
    else:
        return ret
Beispiel #33
0
 def insert_table_rows(self, rows, index=-1):
     new = self.__getattr__("insert_table_rows")
     return new([row.render() for row in tup(rows)], index)
Beispiel #34
0
    def _by_name(cls, names, stale=False, _update=False):
        '''
        Usages: 
        1. Subreddit._by_name('funny') # single sr name
        Searches for a single subreddit. Returns a single Subreddit object or 
        raises NotFound if the subreddit doesn't exist.
        2. Subreddit._by_name(['aww','iama']) # list of sr names
        Searches for a list of subreddits. Returns a dict mapping srnames to 
        Subreddit objects. Items that were not found are ommitted from the dict.
        If no items are found, an empty dict is returned.
        '''
        #lower name here so there is only one cache
        names, single = tup(names, True)

        to_fetch = {}
        ret = {}

        for name in names:
            lname = name.lower()

            if lname in cls._specials:
                ret[name] = cls._specials[lname]
            elif len(lname) > Subreddit.MAX_SRNAME_LENGTH:
                g.log.debug(
                    "Subreddit._by_name() ignoring invalid srname (too long): %s",
                    lname)
            else:
                to_fetch[lname] = name

        if to_fetch:

            def _fetch(lnames):
                q = cls._query(lower(cls.c.name) == lnames,
                               cls.c._spam == (True, False),
                               limit=len(lnames),
                               data=True)
                try:
                    srs = list(q)
                except UnicodeEncodeError:
                    print "Error looking up SRs %r" % (lnames, )
                    raise

                return dict((sr.name.lower(), sr._id) for sr in srs)

            srs = {}
            srids = sgm(g.cache,
                        to_fetch.keys(),
                        _fetch,
                        prefix='subreddit.byname',
                        stale=stale)
            if srids:
                srs = cls._byID(srids.values(),
                                data=True,
                                return_dict=False,
                                stale=stale)

            for sr in srs:
                ret[to_fetch[sr.name.lower()]] = sr

        if ret and single:
            return ret.values()[0]
        elif not ret and single:
            raise NotFound, 'Subreddit %s' % name
        else:
            return ret
Beispiel #35
0
 def __lshift__(self, routing_keys):
     """Register bindings from routing keys to this queue."""
     routing_keys = tup(routing_keys)
     for routing_key in routing_keys:
         self._bind(routing_key)
Beispiel #36
0
    def init_query(self):
        names = list(tup(self.query))

        after = self.after._fullname if self.after else None

        self.names = self._get_after(names, after, self.reverse)
Beispiel #37
0
def find_data(type_id, get_cols, sort, limit, constraints):
    t_table, d_table = get_thing_table(type_id)
    constraints = deepcopy(constraints)

    used_first = False
    s = None
    need_join = False
    have_data_rule = False
    first_alias = d_table.alias()
    s = sa.select([first_alias.c.thing_id.label('thing_id')
                   ])  #, distinct=True)

    for op in operators.op_iter(constraints):
        key = op.lval_name
        vals = tup(op.rval)

        if key == '_id':
            op.lval = first_alias.c.thing_id
        elif key.startswith('_'):
            need_join = True
            op.lval = translate_sort(t_table, key[1:], op.lval)
            op.rval = translate_thing_value(op.rval)
        else:
            have_data_rule = True
            id_col = None
            if not used_first:
                alias = first_alias
                used_first = True
            else:
                alias = d_table.alias()
                id_col = first_alias.c.thing_id

            if id_col:
                s.append_whereclause(id_col == alias.c.thing_id)

            s.append_column(alias.c.value.label(key))
            s.append_whereclause(alias.c.key == key)

            #add the substring constraint if no other functions are there
            translate_data_value(alias, op)

    for op in constraints:
        s.append_whereclause(sa_op(op))

    if not have_data_rule:
        raise Exception('Data queries must have at least one data rule.')

    #TODO in order to sort by data columns, this is going to need to be smarter
    if sort:
        need_join = True
        s, cols = add_sort(sort, {'_': t_table}, s)

    if need_join:
        s.append_whereclause(first_alias.c.thing_id == t_table.c.thing_id)

    if limit:
        s = s.limit(limit)

    r = s.execute()

    return Results(r, lambda (row): row if get_cols else row.thing_id)
Beispiel #38
0
 def update_candidates(self, candidates, sorter, to_add=None):
     for comment in (comment for comment in tup(to_add)
                     if comment in sorter):
         sort_val = -sorter[comment] if self.rev_sort else sorter[comment]
         heapq.heappush(candidates, (sort_val, comment))
Beispiel #39
0
def flatten_response(content):
    """Convert a content iterable to a string, properly handling unicode."""
    # TODO: it would be nice to replace this with response.body someday
    # once unicode issues are ironed out.
    return "".join(_force_utf8(x) for x in tup(content) if x)
Beispiel #40
0
def _report_interval(interval):
    """Read aggregated traffic from S3 and write to postgres."""
    from sqlalchemy.orm import scoped_session, sessionmaker
    from r2.models.traffic import engine
    Session = scoped_session(sessionmaker(bind=engine))

    # determine interval_type from YYYY-MM[-DD][-HH]
    pieces = interval.split('-')
    pieces = [int(i) for i in pieces]
    if len(pieces) == 4:
        interval_type = 'hour'
    elif len(pieces) == 3:
        interval_type = 'day'
        pieces.append(0)
    elif len(pieces) == 2:
        interval_type = 'month'
        pieces.append(1)
        pieces.append(0)
    else:
        raise

    pg_interval = "%04d-%02d-%02d %02d:00:00" % tuple(pieces)
    print 'reporting interval %s (%s)' % (pg_interval, interval_type)

    # Read aggregates and write to traffic db
    for category_cls in traffic_categories:
        now = datetime.datetime.now()
        print '*** %s - %s - %s' % (category_cls.__name__, interval, now)
        data = get_aggregate(interval, category_cls)
        len_data = len(data)
        step = max(len_data / 5, 100)
        for i, (name, (uniques, pageviews)) in enumerate(data.iteritems()):
            try:
                for n in tup(name):
                    unicode(n)
            except UnicodeDecodeError:
                print '%s - %s - %s - %s' % (category_cls.__name__, name,
                                             uniques, pageviews)
                continue

            if i % step == 0:
                now = datetime.datetime.now()
                print '%s - %s - %s/%s - %s' % (
                    interval, category_cls.__name__, i, len_data, now)

            kw = {
                'date': pg_interval,
                'interval': interval_type,
                'unique_count': uniques,
                'pageview_count': pageviews
            }
            kw.update(_name_to_kw(category_cls, name))
            r = category_cls(**kw)

            try:
                Session.merge(r)
                Session.commit()
            except DataError:
                Session.rollback()
                continue

    Session.remove()
    now = datetime.datetime.now()
    print 'finished reporting %s (%s) - %s' % (pg_interval, interval_type, now)
Beispiel #41
0
 def add_error(self, error):
     for field_name in tup(error.fields):
         self.errors[(error.name, field_name)] = error
Beispiel #42
0
 def add(self, error_name, msg_params=None, field=None, code=None):
     for field_name in tup(field):
         e = RedditError(error_name, msg_params, fields=field_name,
                         code=code)
         self.add_error(e)
Beispiel #43
0
        def _fast_query(cls,
                        thing1s,
                        thing2s,
                        name,
                        data=True,
                        eager_load=True,
                        thing_data=False):
            """looks up all the relationships between thing1_ids and
               thing2_ids and caches them"""
            prefix = thing_prefix(cls.__name__)

            thing1_dict = dict((t._id, t) for t in tup(thing1s))
            thing2_dict = dict((t._id, t) for t in tup(thing2s))

            thing1_ids = thing1_dict.keys()
            thing2_ids = thing2_dict.keys()

            name = tup(name)

            # permute all of the pairs
            pairs = set((x, y, n) for x in thing1_ids for y in thing2_ids
                        for n in name)

            def lookup_rel_ids(pairs):
                rel_ids = {}

                t1_ids = set()
                t2_ids = set()
                names = set()
                for t1, t2, name in pairs:
                    t1_ids.add(t1)
                    t2_ids.add(t2)
                    names.add(name)

                if t1_ids and t2_ids and names:
                    q = cls._query(cls.c._thing1_id == t1_ids,
                                   cls.c._thing2_id == t2_ids,
                                   cls.c._name == names)
                else:
                    q = []

                for rel in q:
                    rel_ids[(rel._thing1_id, rel._thing2_id,
                             rel._name)] = rel._id

                for p in pairs:
                    if p not in rel_ids:
                        rel_ids[p] = None

                return rel_ids

            # get the relation ids from the cache or query the db
            res = sgm(cls._cache, pairs, lookup_rel_ids, prefix)

            # get the relation objects
            rel_ids = {
                rel_id
                for rel_id in res.itervalues() if rel_id is not None
            }
            rels = cls._byID_rel(rel_ids,
                                 data=data,
                                 eager_load=eager_load,
                                 thing_data=thing_data)

            res_obj = {}
            for (thing1_id, thing2_id, name), rel_id in res.iteritems():
                pair = (thing1_dict[thing1_id], thing2_dict[thing2_id], name)
                rel = rels[rel_id] if rel_id is not None else None
                res_obj[pair] = rel

            return res_obj
Beispiel #44
0
 def insert(self, items):
     """Inserts the item into the cached data. This only works
        under certain criteria, see can_insert."""
     self._insert_tuples(
         [self.make_item_tuple(item) for item in tup(items)])
Beispiel #45
0
 def spam(self, thing, amount=1, mark_as_spam=True, **kw):
     things = tup(thing)
     for t in things:
         if mark_as_spam:
             t._spam = (amount > 0)
             t._commit()
Beispiel #46
0
    def _byID(cls,
              ids,
              data=False,
              return_dict=True,
              extra_props=None,
              stale=False,
              ignore_missing=False):
        ids, single = tup(ids, True)
        prefix = thing_prefix(cls.__name__)

        if not all(x <= tdb.MAX_THING_ID for x in ids):
            raise NotFound('huge thing_id in %r' % ids)

        def count_found(ret, still_need):
            cls._cache.stats.cache_report(hits=len(ret),
                                          misses=len(still_need),
                                          cache_name='sgm.%s' % cls.__name__)

        if not cls._cache.stats:
            count_found = None

        def items_db(ids):
            items = cls._get_item(cls._type_id, ids)
            for i in items.keys():
                items[i] = cls._build(i, items[i])

            return items

        bases = sgm(cls._cache,
                    ids,
                    items_db,
                    prefix,
                    stale=stale,
                    found_fn=count_found)

        # Check to see if we found everything we asked for
        missing = []
        for i in ids:
            if i not in bases:
                missing.append(i)
            elif bases[i] and bases[i]._id != i:
                g.log.error(
                    "thing.py: Doppleganger on byID: %s got %s for %s" %
                    (cls.__name__, bases[i]._id, i))
                bases[i] = items_db([i]).values()[0]
                bases[i]._cache_myself()
        if missing and not ignore_missing:
            raise NotFound, '%s %s' % (cls.__name__, missing)
        for i in missing:
            ids.remove(i)

        if data:
            need = []
            for v in bases.itervalues():
                if not v._loaded:
                    need.append(v)
            if need:
                cls._load_multi(need)

        if extra_props:
            for _id, props in extra_props.iteritems():
                for k, v in props.iteritems():
                    bases[_id].__setattr__(k, v, False)

        if single:
            return bases[ids[0]] if ids else None
        elif return_dict:
            return bases
        else:
            return filter(None, (bases.get(i) for i in ids))
Beispiel #47
0
 def __init__(self, name, i18n_message, msg_params, field=None):
     self.name = name
     self.i18n_message = i18n_message
     self.msg_params = msg_params
     # list of fields in the original form that caused the error
     self.fields = tup(field) if field else []
 def _column_to_obj(cls, columns):
     # columns = [{colname: colvalue}]
     return [
         LiveUpdate.from_json(*column.popitem())
         for column in utils.tup(columns)
     ]
Beispiel #49
0
    def spam(self,
             things,
             auto=True,
             moderator_banned=False,
             banner=None,
             date=None,
             train_spam=True,
             **kw):
        from r2.lib.db import queries

        all_things = tup(things)
        new_things = [x for x in all_things if not x._spam]

        Report.accept(all_things, True)

        inbox_adjustment_counter = Counter()
        for t in all_things:
            if getattr(t, "promoted", None) is not None:
                g.log.debug("Refusing to mark promotion %r as spam" % t)
                continue

            if not t._spam and train_spam:
                note = 'spam'
            elif not t._spam and not train_spam:
                note = 'remove not spam'
            elif t._spam and not train_spam:
                note = 'confirm spam'
            elif t._spam and train_spam:
                note = 'reinforce spam'

            if isinstance(t, Message) and not t._spam and t.to_id:
                inbox_adjustment_counter[t.to_id] -= 1
            t._spam = True

            if moderator_banned:
                t.verdict = 'mod-removed'
            elif not auto:
                t.verdict = 'admin-removed'

            ban_info = copy(getattr(t, 'ban_info', {}))
            if isinstance(banner, dict):
                ban_info['banner'] = banner[t._fullname]
            else:
                ban_info['banner'] = banner
            ban_info.update(auto=auto,
                            moderator_banned=moderator_banned,
                            banned_at=date or datetime.now(g.tz),
                            **kw)
            ban_info['note'] = note

            t.ban_info = ban_info
            t._commit()

        self.adjust_inbox_counts(inbox_adjustment_counter)

        if not auto:
            self.author_spammer(new_things, True)
            self.set_last_sr_ban(new_things)

        queries.ban(all_things, filtered=auto)

        for t in all_things:
            if auto:
                amqp.add_item("auto_removed", t._fullname)

            if isinstance(t, Comment):
                amqp.add_item("removed_comment", t._fullname)
            elif isinstance(t, Link):
                amqp.add_item("removed_link", t._fullname)
Beispiel #50
0
 def touch(cls, fullname, names):
     names = tup(names)
     now = datetime.datetime.now(g.tz)
     values = dict.fromkeys(names, now)
     cls._set_values(fullname, values)
     return now
Beispiel #51
0
 def _somethinged(cls, rel, user, link, name):
     return rel._fast_query(tup(user), tup(link), name = name)
Beispiel #52
0
def get_renderable_campaigns(link, campaigns):
    campaigns, is_single = tup(campaigns, ret_is_single=True)
    r = RenderableCampaign.create(link, campaigns)
    if is_single:
        r = r[0]
    return r
 def _obj_to_column(cls, entries):
     entries, is_single = utils.tup(entries, ret_is_single=True)
     columns = [{entry._id: entry.to_json()} for entry in entries]
     return columns[0] if is_single else columns
Beispiel #54
0
    def _byID(cls,
              ids,
              data=True,
              return_dict=True,
              stale=False,
              ignore_missing=False):
        # data props are ALWAYS loaded, data keyword is meaningless
        ids, single = tup(ids, ret_is_single=True)

        for x in ids:
            if not isinstance(x, (int, long)):
                raise ValueError('non-integer thing_id in %r' % ids)
            if x > tdb.MAX_THING_ID:
                raise NotFound('huge thing_id in %r' % ids)
            elif x < tdb.MIN_THING_ID:
                raise NotFound('negative thing_id in %r' % ids)

        if not single and not ids:
            if return_dict:
                return {}
            else:
                return []

        cls.record_lookup(data=data, delta=len(ids))

        def count_found_and_reject_unloaded(ret, still_need):
            unloaded_ids = {
                _id
                for _id, thing in ret.iteritems() if not thing._loaded
            }
            for _id in unloaded_ids:
                del ret[_id]
                still_need.add(_id)

            if cls._cache.stats:
                cls._cache.stats.cache_report(hits=len(ret),
                                              misses=len(still_need),
                                              cache_name='sgm.%s' %
                                              cls.__name__)

        def get_things_from_db(ids):
            props_by_id = cls._get_item(cls._type_id, ids)
            data_props_by_id = cls._get_data(cls._type_id, ids)

            things_by_id = {}
            for _id, props in props_by_id.iteritems():
                thing = cls._build(_id, props)
                data_props = data_props_by_id.get(_id, {})
                thing._t.update(data_props)
                thing._loaded = True

                if not all(data_prop in thing._t
                           for data_prop in cls._essentials):
                    # a Thing missing an essential prop is invalid
                    # this can happen if a process looks up the Thing as it's
                    # created but between when the props and the data props are
                    # written
                    g.log.error("%s missing essentials, got %s", thing,
                                thing._t)
                    g.stats.simple_event("thing.load.missing_essentials")
                    continue

                things_by_id[_id] = thing

            # caching happens in sgm, but is less intrusive to count here
            cls.record_cache_write(event="cache", delta=len(things_by_id))

            return things_by_id

        things_by_id = sgm(cls._cache,
                           ids,
                           miss_fn=get_things_from_db,
                           prefix=cls._cache_prefix(),
                           time=THING_CACHE_TTL,
                           stale=stale,
                           found_fn=count_found_and_reject_unloaded,
                           stat_subname=cls.__name__)

        # Check to see if we found everything we asked for
        missing = [_id for _id in ids if _id not in things_by_id]
        if missing and not ignore_missing:
            raise NotFound, '%s %s' % (cls.__name__, missing)

        if missing:
            ids = [_id for _id in ids if _id not in missing]

        if single:
            return things_by_id[ids[0]] if ids else None
        elif return_dict:
            return things_by_id
        else:
            return filter(None, (things_by_id.get(_id) for _id in ids))
Beispiel #55
0
 def mark_participated(cls, account, subreddit):
     cls.create(account, tup(subreddit))
Beispiel #56
0
 def unschedule(cls, rowkey, column_keys):
     column_keys = tup(column_keys)
     return cls._cf.remove(rowkey, column_keys)
Beispiel #57
0
    def _commit(self, keys=None):
        lock = None

        try:
            if not self._created:
                begin()
                self._create()
                just_created = True
            else:
                just_created = False

            lock = g.make_lock("thing_commit", 'commit_' + self._fullname)
            lock.acquire()

            if not just_created and not self._sync_latest():
                #sync'd and we have nothing to do now, but we still cache anyway
                self._cache_myself()
                return

            # begin is a no-op if already done, but in the not-just-created
            # case we need to do this here because the else block is not
            # executed when the try block is exited prematurely in any way
            # (including the return in the above branch)
            begin()

            to_set = self._dirties.copy()
            if keys:
                keys = tup(keys)
                for key in to_set.keys():
                    if key not in keys:
                        del to_set[key]

            data_props = {}
            thing_props = {}
            for k, (old_value, new_value) in to_set.iteritems():
                if k.startswith('_'):
                    thing_props[k[1:]] = new_value
                else:
                    data_props[k] = new_value

            if data_props:
                self._set_data(self._type_id, self._id, just_created,
                               **data_props)

            if thing_props:
                self._set_props(self._type_id, self._id, **thing_props)

            if keys:
                for k in keys:
                    if self._dirties.has_key(k):
                        del self._dirties[k]
            else:
                self._dirties.clear()
        except:
            rollback()
            raise
        else:
            commit()
            self._cache_myself()
        finally:
            if lock:
                lock.release()

        hooks.get_hook("thing.commit").call(thing=self, changes=to_set)
Beispiel #58
0
        def _fast_query(cls,
                        thing1s,
                        thing2s,
                        name,
                        data=True,
                        eager_load=True,
                        thing_data=False,
                        thing_stale=False):
            """looks up all the relationships between thing1_ids and
               thing2_ids and caches them"""

            cache_key_lookup = dict()

            # We didn't find these keys in the cache, look them up in the
            # database
            def lookup_rel_ids(uncached_keys):
                rel_ids = {}

                # Lookup thing ids and name from cache key
                t1_ids = set()
                t2_ids = set()
                names = set()
                for cache_key in uncached_keys:
                    (thing1, thing2, name) = cache_key_lookup[cache_key]
                    t1_ids.add(thing1._id)
                    t2_ids.add(thing2._id)
                    names.add(name)

                q = cls._query(cls.c._thing1_id == t1_ids,
                               cls.c._thing2_id == t2_ids,
                               cls.c._name == names)

                for rel in q:
                    rel_ids[cls._fast_cache_key_from_parts(
                        cls.__name__, rel._thing1_id, rel._thing2_id,
                        str(rel._name))] = rel._id

                for cache_key in uncached_keys:
                    if cache_key not in rel_ids:
                        rel_ids[cache_key] = None

                return rel_ids

            # make lookups for thing ids and names
            thing1_dict = dict((t._id, t) for t in tup(thing1s))
            thing2_dict = dict((t._id, t) for t in tup(thing2s))

            names = map(str, tup(name))

            # permute all of the pairs via cartesian product
            rel_tuples = itertools.product(thing1_dict.values(),
                                           thing2_dict.values(), names)

            # create cache keys for all permutations and initialize lookup
            for t in rel_tuples:
                thing1, thing2, name = t
                cache_key = cls._fast_cache_key_from_parts(
                    cls.__name__, thing1._id, thing2._id, name)
                cache_key_lookup[cache_key] = t

            # get the relation ids from the cache or query the db
            res = sgm(cls._fast_cache, cache_key_lookup.keys(), lookup_rel_ids)

            # get the relation objects
            rel_ids = {
                rel_id
                for rel_id in res.itervalues() if rel_id is not None
            }
            rels = cls._byID_rel(rel_ids,
                                 data=data,
                                 eager_load=eager_load,
                                 thing_data=thing_data,
                                 thing_stale=thing_stale)

            # Takes aggregated results from cache and db (res) and transforms
            # the values from ids to Relations.
            res_obj = {}
            for cache_key, rel_id in res.iteritems():
                t = cache_key_lookup[cache_key]
                rel = rels[rel_id] if rel_id is not None else None
                res_obj[t] = rel

            return res_obj