class DonationOrganizationsByPrefix(tdb_cassandra.View):
    _use_db = True
    _value_type = "bytes"  # disable tdb_cassandra's deserialization

    _write_consistency_level = tdb_cassandra.CL.ALL
    _read_consistency_level = tdb_cassandra.CL.ONE

    _compare_with = types.CompositeType(
        types.FloatType(reversed=True),
        types.UTF8Type(),
    )
    _extra_schema_creation_args = {
        "key_validation_class": "UTF8Type",
        "default_validation_class": "UTF8Type",
    }

    @classmethod
    def byPrefix(cls, prefix):
        stripped = donate_utils.normalize_query(prefix)
        if not stripped:
            return []
        try:
            results = cls._cf.get(stripped, column_count=MAX_COLUMNS)
        except NotFoundException:
            return []
        return [
            Organization(json.loads(data))
            for key, data in results.iteritems()
        ]
Example #2
0
class CommentTreeStorageV2(CommentTreeStorageBase):
    """Cassandra column-based storage for comment trees.

    Under this implementation, each column in a link's row corresponds to a
    comment on that link. The column name is an encoding of the tuple of
    (comment.parent_id, comment._id), and the value is a counter giving the
    size of the subtree rooted at the comment.

    Key features:
        - does not use permacache!
        - does not require locking for updates
    """

    __metaclass__ = tdb_cassandra.ThingMeta
    _connection_pool = 'main'
    _use_db = True

    _type_prefix = None
    _cf_name = 'CommentTree'

    # column keys are tuples of (depth, parent_id, comment_id)
    _compare_with = types.CompositeType(types.LongType(), types.LongType(),
                                        types.LongType())

    # column values are counters
    _extra_schema_creation_args = {
        'default_validation_class': COUNTER_COLUMN_TYPE,
        'replicate_on_write': True,
    }

    COLUMN_READ_BATCH_SIZE = tdb_cassandra.max_column_count
    COLUMN_WRITE_BATCH_SIZE = 1000

    @staticmethod
    def _key(link):
        revision = getattr(link, 'comment_tree_id', 0)
        if revision:
            return '%s:%s' % (utils.to36(link._id), utils.to36(revision))
        else:
            return utils.to36(link._id)

    @staticmethod
    def _column_to_obj(cols):
        for col in cols:
            for (depth, pid, cid), val in col.iteritems():
                yield (depth, None if pid == -1 else pid, cid), val

    @classmethod
    def by_link(cls, link):
        try:
            row = cls.get_row(cls._key(link))
        except ttypes.NotFoundException:
            row = {}
        return cls._from_row(row)

    @classmethod
    def get_row(cls, key):
        return cls._cf.xget(key, buffer_size=cls.COLUMN_READ_BATCH_SIZE)

    @classmethod
    def _from_row(cls, row):
        # row is an iterable of ((depth, parent_id, comment_id), subtree_size)
        cids = []
        tree = {}
        depth = {}
        parents = {}
        num_children = {}
        for (d, pid, cid), val in row:
            if cid == -1:
                continue
            if pid == -1:
                pid = None
            cids.append(cid)
            tree.setdefault(pid, []).append(cid)
            depth[cid] = d
            parents[cid] = pid
            num_children[cid] = val - 1
        return dict(cids=cids,
                    tree=tree,
                    depth=depth,
                    num_children=num_children,
                    parents=parents)

    @classmethod
    @tdb_cassandra.will_write
    def rebuild(cls, tree, comments):
        with batch.Mutator(g.cassandra_pools[cls._connection_pool]) as m:
            g.log.debug('removing tree from %s', cls._key(tree.link))
            m.remove(cls._cf, cls._key(tree.link))
        tree.link._incr('comment_tree_id')
        g.log.debug('link %s comment tree revision bumped up to %s',
                    tree.link._fullname, tree.link.comment_tree_id)

        # make sure all comments have parents attribute filled in
        parents = {c._id: c.parent_id for c in comments}
        for c in comments:
            if c.parent_id and c.parents is None:
                path = []
                pid = c.parent_id
                while pid:
                    path.insert(0, pid)
                    pid = parents[pid]
                c.parents = ':'.join(utils.to36(i) for i in path)
                c._commit()

        return cls.add_comments(tree, comments)

    @classmethod
    @tdb_cassandra.will_write
    def add_comments(cls, tree, comments):
        CommentTreeStorageBase.add_comments(tree, comments)
        g.log.debug('building updates dict')
        updates = {}
        for c in comments:
            pids = c.parent_path()
            pids.append(c._id)
            for d, (pid, cid) in enumerate(zip(pids, pids[1:])):
                k = (d, pid, cid)
                updates[k] = updates.get(k, 0) + 1

        g.log.debug('writing %d updates to %s', len(updates),
                    cls._key(tree.link))
        # increment counters in slices of 100
        cols = updates.keys()
        for i in xrange(0, len(updates), cls.COLUMN_WRITE_BATCH_SIZE):
            g.log.debug('adding updates %d..%d', i,
                        i + cls.COLUMN_WRITE_BATCH_SIZE)
            update_batch = {
                c: updates[c]
                for c in cols[i:i + cls.COLUMN_WRITE_BATCH_SIZE]
            }
            with batch.Mutator(g.cassandra_pools[cls._connection_pool]) as m:
                m.insert(cls._cf, cls._key(tree.link), update_batch)
        g.log.debug('added %d comments with %d updates', len(comments),
                    len(updates))

    @classmethod
    @tdb_cassandra.will_write
    def delete_comment(cls, tree, comment):
        CommentTreeStorageBase.delete_comment(tree, comment)
        pids = comment.parent_path()
        pids.append(comment._id)
        updates = {}
        for d, (pid, cid) in enumerate(zip(pids, pids[1:])):
            updates[(d, pid, cid)] = -1
        with batch.Mutator(g.cassandra_pools[cls._connection_pool]) as m:
            m.insert(cls._cf, cls._key(tree.link), updates)

    @classmethod
    @tdb_cassandra.will_write
    def upgrade(cls, tree, link):
        cids = []
        for parent, children in tree.tree.iteritems():
            cids.extend(children)

        comments = {}
        for i in xrange(0, len(cids), 100):
            g.log.debug('  loading comments %d..%d', i, i + 100)
            comments.update(Comment._byID(cids[i:i + 100], data=True))

        # need to fill in parents attr for each comment
        modified = []
        stack = [None]
        while stack:
            pid = stack.pop()
            if pid is None:
                parents = ''
            else:
                parents = comments[pid].parents + ':' + comments[pid]._id36
            children = tree.tree.get(pid, [])
            stack.extend(children)
            for cid in children:
                if comments[cid].parents != parents:
                    comments[cid].parents = parents
                    modified.append(comments[cid])

        for i, comment in enumerate(modified):
            comment._commit()

        cls.add_comments(tree, comments.values())
Example #3
0
class CommentTreeStorageV3(CommentTreeStorageBase):
    """Cassandra column-based storage for comment trees.

    Under this implementation, each column in a link's row corresponds to a
    comment on that link. The column name is an encoding of the tuple of
    (depth, comment.parent_id, comment._id), and the value is not used.

    Key features:
        - does not use permacache!
        - does not require locking for updates
    """

    __metaclass__ = tdb_cassandra.ThingMeta
    _connection_pool = 'main'
    _use_db = True

    _type_prefix = None
    _cf_name = 'CommentTreeStorage'

    # column names are tuples of (depth, parent_id, comment_id)
    _compare_with = types.CompositeType(types.LongType(), types.LongType(),
                                        types.LongType())

    COLUMN_READ_BATCH_SIZE = tdb_cassandra.max_column_count
    COLUMN_WRITE_BATCH_SIZE = 1000

    # special value for parent_id when the comment has no parent
    NO_PARENT = -1

    @staticmethod
    def _key(link):
        return utils.to36(link._id)

    @classmethod
    def by_link(cls, link):
        try:
            row = cls.get_row(cls._key(link))
        except ttypes.NotFoundException:
            row = {}
        return cls._from_row(row)

    @classmethod
    def get_row(cls, key):
        return cls._cf.xget(key, buffer_size=cls.COLUMN_READ_BATCH_SIZE)

    @classmethod
    def _from_row(cls, row):
        # row is an iterable of (depth, parent_id, comment_id), '')
        cids = []
        tree = {}
        depth = {}
        parents = {}
        for (d, pid, cid), val in row:
            if pid == cls.NO_PARENT:
                pid = None

            cids.append(cid)
            tree.setdefault(pid, []).append(cid)
            depth[cid] = d
            parents[cid] = pid
        return dict(cids=cids, tree=tree, depth=depth, parents=parents)

    @classmethod
    @tdb_cassandra.will_write
    def rebuild(cls, tree, comments):
        with batch.Mutator(g.cassandra_pools[cls._connection_pool]) as m:
            g.log.debug('removing tree from %s', cls._key(tree.link))
            m.remove(cls._cf, cls._key(tree.link))

        return cls.add_comments(tree, comments)

    @classmethod
    @tdb_cassandra.will_write
    def add_comments(cls, tree, comments):
        CommentTreeStorageBase.add_comments(tree, comments)
        updates = {}
        for comment in comments:
            parent_id = comment.parent_id or cls.NO_PARENT
            depth = tree.depth.get(parent_id, -1) + 1
            updates[(depth, parent_id, comment._id)] = ''

        cols = updates.keys()
        for i in xrange(0, len(updates), cls.COLUMN_WRITE_BATCH_SIZE):
            update_batch = {
                c: updates[c]
                for c in cols[i:i + cls.COLUMN_WRITE_BATCH_SIZE]
            }
            with batch.Mutator(g.cassandra_pools[cls._connection_pool]) as m:
                m.insert(cls._cf, cls._key(tree.link), update_batch)

    @classmethod
    @tdb_cassandra.will_write
    def upgrade(cls, tree, link):
        cids = []
        for parent, children in tree.tree.iteritems():
            cids.extend(children)

        comments = {}
        for i in xrange(0, len(cids), 100):
            g.log.debug('  loading comments %d..%d', i, i + 100)
            comments.update(Comment._byID(cids[i:i + 100], data=True))

        cls.add_comments(tree, comments.values())