class DonationOrganizationsByPrefix(tdb_cassandra.View): _use_db = True _value_type = "bytes" # disable tdb_cassandra's deserialization _write_consistency_level = tdb_cassandra.CL.ALL _read_consistency_level = tdb_cassandra.CL.ONE _compare_with = types.CompositeType( types.FloatType(reversed=True), types.UTF8Type(), ) _extra_schema_creation_args = { "key_validation_class": "UTF8Type", "default_validation_class": "UTF8Type", } @classmethod def byPrefix(cls, prefix): stripped = donate_utils.normalize_query(prefix) if not stripped: return [] try: results = cls._cf.get(stripped, column_count=MAX_COLUMNS) except NotFoundException: return [] return [ Organization(json.loads(data)) for key, data in results.iteritems() ]
class CommentTreeStorageV2(CommentTreeStorageBase): """Cassandra column-based storage for comment trees. Under this implementation, each column in a link's row corresponds to a comment on that link. The column name is an encoding of the tuple of (comment.parent_id, comment._id), and the value is a counter giving the size of the subtree rooted at the comment. Key features: - does not use permacache! - does not require locking for updates """ __metaclass__ = tdb_cassandra.ThingMeta _connection_pool = 'main' _use_db = True _type_prefix = None _cf_name = 'CommentTree' # column keys are tuples of (depth, parent_id, comment_id) _compare_with = types.CompositeType(types.LongType(), types.LongType(), types.LongType()) # column values are counters _extra_schema_creation_args = { 'default_validation_class': COUNTER_COLUMN_TYPE, 'replicate_on_write': True, } COLUMN_READ_BATCH_SIZE = tdb_cassandra.max_column_count COLUMN_WRITE_BATCH_SIZE = 1000 @staticmethod def _key(link): revision = getattr(link, 'comment_tree_id', 0) if revision: return '%s:%s' % (utils.to36(link._id), utils.to36(revision)) else: return utils.to36(link._id) @staticmethod def _column_to_obj(cols): for col in cols: for (depth, pid, cid), val in col.iteritems(): yield (depth, None if pid == -1 else pid, cid), val @classmethod def by_link(cls, link): try: row = cls.get_row(cls._key(link)) except ttypes.NotFoundException: row = {} return cls._from_row(row) @classmethod def get_row(cls, key): return cls._cf.xget(key, buffer_size=cls.COLUMN_READ_BATCH_SIZE) @classmethod def _from_row(cls, row): # row is an iterable of ((depth, parent_id, comment_id), subtree_size) cids = [] tree = {} depth = {} parents = {} num_children = {} for (d, pid, cid), val in row: if cid == -1: continue if pid == -1: pid = None cids.append(cid) tree.setdefault(pid, []).append(cid) depth[cid] = d parents[cid] = pid num_children[cid] = val - 1 return dict(cids=cids, tree=tree, depth=depth, num_children=num_children, parents=parents) @classmethod @tdb_cassandra.will_write def rebuild(cls, tree, comments): with batch.Mutator(g.cassandra_pools[cls._connection_pool]) as m: g.log.debug('removing tree from %s', cls._key(tree.link)) m.remove(cls._cf, cls._key(tree.link)) tree.link._incr('comment_tree_id') g.log.debug('link %s comment tree revision bumped up to %s', tree.link._fullname, tree.link.comment_tree_id) # make sure all comments have parents attribute filled in parents = {c._id: c.parent_id for c in comments} for c in comments: if c.parent_id and c.parents is None: path = [] pid = c.parent_id while pid: path.insert(0, pid) pid = parents[pid] c.parents = ':'.join(utils.to36(i) for i in path) c._commit() return cls.add_comments(tree, comments) @classmethod @tdb_cassandra.will_write def add_comments(cls, tree, comments): CommentTreeStorageBase.add_comments(tree, comments) g.log.debug('building updates dict') updates = {} for c in comments: pids = c.parent_path() pids.append(c._id) for d, (pid, cid) in enumerate(zip(pids, pids[1:])): k = (d, pid, cid) updates[k] = updates.get(k, 0) + 1 g.log.debug('writing %d updates to %s', len(updates), cls._key(tree.link)) # increment counters in slices of 100 cols = updates.keys() for i in xrange(0, len(updates), cls.COLUMN_WRITE_BATCH_SIZE): g.log.debug('adding updates %d..%d', i, i + cls.COLUMN_WRITE_BATCH_SIZE) update_batch = { c: updates[c] for c in cols[i:i + cls.COLUMN_WRITE_BATCH_SIZE] } with batch.Mutator(g.cassandra_pools[cls._connection_pool]) as m: m.insert(cls._cf, cls._key(tree.link), update_batch) g.log.debug('added %d comments with %d updates', len(comments), len(updates)) @classmethod @tdb_cassandra.will_write def delete_comment(cls, tree, comment): CommentTreeStorageBase.delete_comment(tree, comment) pids = comment.parent_path() pids.append(comment._id) updates = {} for d, (pid, cid) in enumerate(zip(pids, pids[1:])): updates[(d, pid, cid)] = -1 with batch.Mutator(g.cassandra_pools[cls._connection_pool]) as m: m.insert(cls._cf, cls._key(tree.link), updates) @classmethod @tdb_cassandra.will_write def upgrade(cls, tree, link): cids = [] for parent, children in tree.tree.iteritems(): cids.extend(children) comments = {} for i in xrange(0, len(cids), 100): g.log.debug(' loading comments %d..%d', i, i + 100) comments.update(Comment._byID(cids[i:i + 100], data=True)) # need to fill in parents attr for each comment modified = [] stack = [None] while stack: pid = stack.pop() if pid is None: parents = '' else: parents = comments[pid].parents + ':' + comments[pid]._id36 children = tree.tree.get(pid, []) stack.extend(children) for cid in children: if comments[cid].parents != parents: comments[cid].parents = parents modified.append(comments[cid]) for i, comment in enumerate(modified): comment._commit() cls.add_comments(tree, comments.values())
class CommentTreeStorageV3(CommentTreeStorageBase): """Cassandra column-based storage for comment trees. Under this implementation, each column in a link's row corresponds to a comment on that link. The column name is an encoding of the tuple of (depth, comment.parent_id, comment._id), and the value is not used. Key features: - does not use permacache! - does not require locking for updates """ __metaclass__ = tdb_cassandra.ThingMeta _connection_pool = 'main' _use_db = True _type_prefix = None _cf_name = 'CommentTreeStorage' # column names are tuples of (depth, parent_id, comment_id) _compare_with = types.CompositeType(types.LongType(), types.LongType(), types.LongType()) COLUMN_READ_BATCH_SIZE = tdb_cassandra.max_column_count COLUMN_WRITE_BATCH_SIZE = 1000 # special value for parent_id when the comment has no parent NO_PARENT = -1 @staticmethod def _key(link): return utils.to36(link._id) @classmethod def by_link(cls, link): try: row = cls.get_row(cls._key(link)) except ttypes.NotFoundException: row = {} return cls._from_row(row) @classmethod def get_row(cls, key): return cls._cf.xget(key, buffer_size=cls.COLUMN_READ_BATCH_SIZE) @classmethod def _from_row(cls, row): # row is an iterable of (depth, parent_id, comment_id), '') cids = [] tree = {} depth = {} parents = {} for (d, pid, cid), val in row: if pid == cls.NO_PARENT: pid = None cids.append(cid) tree.setdefault(pid, []).append(cid) depth[cid] = d parents[cid] = pid return dict(cids=cids, tree=tree, depth=depth, parents=parents) @classmethod @tdb_cassandra.will_write def rebuild(cls, tree, comments): with batch.Mutator(g.cassandra_pools[cls._connection_pool]) as m: g.log.debug('removing tree from %s', cls._key(tree.link)) m.remove(cls._cf, cls._key(tree.link)) return cls.add_comments(tree, comments) @classmethod @tdb_cassandra.will_write def add_comments(cls, tree, comments): CommentTreeStorageBase.add_comments(tree, comments) updates = {} for comment in comments: parent_id = comment.parent_id or cls.NO_PARENT depth = tree.depth.get(parent_id, -1) + 1 updates[(depth, parent_id, comment._id)] = '' cols = updates.keys() for i in xrange(0, len(updates), cls.COLUMN_WRITE_BATCH_SIZE): update_batch = { c: updates[c] for c in cols[i:i + cls.COLUMN_WRITE_BATCH_SIZE] } with batch.Mutator(g.cassandra_pools[cls._connection_pool]) as m: m.insert(cls._cf, cls._key(tree.link), update_batch) @classmethod @tdb_cassandra.will_write def upgrade(cls, tree, link): cids = [] for parent, children in tree.tree.iteritems(): cids.extend(children) comments = {} for i in xrange(0, len(cids), 100): g.log.debug(' loading comments %d..%d', i, i + 100) comments.update(Comment._byID(cids[i:i + 100], data=True)) cls.add_comments(tree, comments.values())