def initializeTableLayout(keyspace, server_list, replicationStrategy, strategyOptions, localDCName, credentials): sys_manager = SystemManager(server_list[0], credentials=credentials) # Make sure the the keyspace exists if keyspace not in sys_manager.list_keyspaces(): sys_manager.create_keyspace(keyspace, replicationStrategy, strategyOptions) cf_defs = sys_manager.get_keyspace_column_families(keyspace) # Create UTF8 CF's for tablename in ["global_nodes", "metadata"]: if tablename not in cf_defs.keys(): createUTF8ColumnFamily(sys_manager, keyspace, tablename) if localDCName: dcNodes = "dc_%s_nodes" % (localDCName,) if dcNodes not in cf_defs.keys(): createUTF8ColumnFamily(sys_manager, keyspace, dcNodes) else: # TODO Log we do not have the DC name pass if "node_slices" not in cf_defs.keys(): sys_manager.create_column_family( keyspace, "node_slices", super=False, comparator_type=pycassa_types.LongType(), key_validation_class=pycassa_types.UTF8Type(), default_validation_class=pycassa_types.LongType() )
class TestIndex(object): birthdate = types.LongType(default=0) def __eq__(self, other): return self.__dict__ == other.__dict__ def __ne__(self, other): return self.__dict__ != other.__dict__
class TestUTF8(object): key = types.LexicalUUIDType() strcol = types.AsciiType(default='default') intcol = types.LongType(default=0) floatcol = types.FloatType(default=0.0) datetimecol = types.DateType() def __str__(self): return str(map(str, [self.strcol, self.intcol, self.floatcol, self.datetimecol])) def __eq__(self, other): return self.__dict__ == other.__dict__ def __ne__(self, other): return self.__dict__ != other.__dict__
def createUTF8ColumnFamily(sys_manager, keyspace, tablename, ts_table=False): """Create column family with UTF8Type comparator, value and key.""" # Default options for the tree-storage tables kw_options = {'super': False, 'comparator_type': pycassa_types.UTF8Type(), 'key_validation_class': pycassa_types.UTF8Type(), 'default_validation_class': pycassa_types.UTF8Type()} # A ts* table needs to have a bunch of different settings since that's where # the actual data is going to be stored # - Use DTCS compaction to delete old data (requires 2.0.11+) # - Set the correct CQL table fields # - Set gc_grace_seconds to 0 since we never update/delete data once it's written # - DTCS options: # - timestamp_resolution: MICROSECONDS due to Pycassa and CQL's default timestamp resolution # - tombstone_threhold: 0.1 as an attempt to be more aggressive when it comes to compaction # - base_time_seconds: 60 groups sstables into 60 second blocks # CASSANDRA-8417 indicates that 60 seconds is a more reasonable default # - tombstone_compaction_interval: 1 is a Cassandra default. Included in case it needs tuning if ts_table: table_options = { 'compaction_strategy': 'DateTieredCompactionStrategy', 'compaction_strategy_options': { 'timestamp_resolution': 'MICROSECONDS', 'max_sstable_age_days': '365', 'tombstone_compaction_interval': '1', 'tombstone_threshold': '0.1', 'base_time_seconds': '60'}, 'comparator_type': pycassa_types.LongType(), 'key_validation_class': pycassa_types.UTF8Type(), 'default_validation_class': pycassa_types.FloatType(), 'gc_grace_seconds': '1800'} kw_options.update(table_options) sys_manager.create_column_family( keyspace, tablename, **kw_options )
class CommentTreeStorageV2(CommentTreeStorageBase): """Cassandra column-based storage for comment trees. Under this implementation, each column in a link's row corresponds to a comment on that link. The column name is an encoding of the tuple of (comment.parent_id, comment._id), and the value is a counter giving the size of the subtree rooted at the comment. Key features: - does not use permacache! - does not require locking for updates """ __metaclass__ = tdb_cassandra.ThingMeta _connection_pool = 'main' _use_db = True _type_prefix = None _cf_name = 'CommentTree' # column keys are tuples of (depth, parent_id, comment_id) _compare_with = types.CompositeType(types.LongType(), types.LongType(), types.LongType()) # column values are counters _extra_schema_creation_args = { 'default_validation_class': COUNTER_COLUMN_TYPE, 'replicate_on_write': True, } COLUMN_READ_BATCH_SIZE = tdb_cassandra.max_column_count COLUMN_WRITE_BATCH_SIZE = 1000 @staticmethod def _key(link): revision = getattr(link, 'comment_tree_id', 0) if revision: return '%s:%s' % (utils.to36(link._id), utils.to36(revision)) else: return utils.to36(link._id) @staticmethod def _column_to_obj(cols): for col in cols: for (depth, pid, cid), val in col.iteritems(): yield (depth, None if pid == -1 else pid, cid), val @classmethod def by_link(cls, link): try: row = cls.get_row(cls._key(link)) except ttypes.NotFoundException: row = {} return cls._from_row(row) @classmethod def get_row(cls, key): return cls._cf.xget(key, buffer_size=cls.COLUMN_READ_BATCH_SIZE) @classmethod def _from_row(cls, row): # row is an iterable of ((depth, parent_id, comment_id), subtree_size) cids = [] tree = {} depth = {} parents = {} num_children = {} for (d, pid, cid), val in row: if cid == -1: continue if pid == -1: pid = None cids.append(cid) tree.setdefault(pid, []).append(cid) depth[cid] = d parents[cid] = pid num_children[cid] = val - 1 return dict(cids=cids, tree=tree, depth=depth, num_children=num_children, parents=parents) @classmethod @tdb_cassandra.will_write def rebuild(cls, tree, comments): with batch.Mutator(g.cassandra_pools[cls._connection_pool]) as m: g.log.debug('removing tree from %s', cls._key(tree.link)) m.remove(cls._cf, cls._key(tree.link)) tree.link._incr('comment_tree_id') g.log.debug('link %s comment tree revision bumped up to %s', tree.link._fullname, tree.link.comment_tree_id) # make sure all comments have parents attribute filled in parents = {c._id: c.parent_id for c in comments} for c in comments: if c.parent_id and c.parents is None: path = [] pid = c.parent_id while pid: path.insert(0, pid) pid = parents[pid] c.parents = ':'.join(utils.to36(i) for i in path) c._commit() return cls.add_comments(tree, comments) @classmethod @tdb_cassandra.will_write def add_comments(cls, tree, comments): CommentTreeStorageBase.add_comments(tree, comments) g.log.debug('building updates dict') updates = {} for c in comments: pids = c.parent_path() pids.append(c._id) for d, (pid, cid) in enumerate(zip(pids, pids[1:])): k = (d, pid, cid) updates[k] = updates.get(k, 0) + 1 g.log.debug('writing %d updates to %s', len(updates), cls._key(tree.link)) # increment counters in slices of 100 cols = updates.keys() for i in xrange(0, len(updates), cls.COLUMN_WRITE_BATCH_SIZE): g.log.debug('adding updates %d..%d', i, i + cls.COLUMN_WRITE_BATCH_SIZE) update_batch = { c: updates[c] for c in cols[i:i + cls.COLUMN_WRITE_BATCH_SIZE] } with batch.Mutator(g.cassandra_pools[cls._connection_pool]) as m: m.insert(cls._cf, cls._key(tree.link), update_batch) g.log.debug('added %d comments with %d updates', len(comments), len(updates)) @classmethod @tdb_cassandra.will_write def delete_comment(cls, tree, comment): CommentTreeStorageBase.delete_comment(tree, comment) pids = comment.parent_path() pids.append(comment._id) updates = {} for d, (pid, cid) in enumerate(zip(pids, pids[1:])): updates[(d, pid, cid)] = -1 with batch.Mutator(g.cassandra_pools[cls._connection_pool]) as m: m.insert(cls._cf, cls._key(tree.link), updates) @classmethod @tdb_cassandra.will_write def upgrade(cls, tree, link): cids = [] for parent, children in tree.tree.iteritems(): cids.extend(children) comments = {} for i in xrange(0, len(cids), 100): g.log.debug(' loading comments %d..%d', i, i + 100) comments.update(Comment._byID(cids[i:i + 100], data=True)) # need to fill in parents attr for each comment modified = [] stack = [None] while stack: pid = stack.pop() if pid is None: parents = '' else: parents = comments[pid].parents + ':' + comments[pid]._id36 children = tree.tree.get(pid, []) stack.extend(children) for cid in children: if comments[cid].parents != parents: comments[cid].parents = parents modified.append(comments[cid]) for i, comment in enumerate(modified): comment._commit() cls.add_comments(tree, comments.values())
class CommentTreeStorageV3(CommentTreeStorageBase): """Cassandra column-based storage for comment trees. Under this implementation, each column in a link's row corresponds to a comment on that link. The column name is an encoding of the tuple of (depth, comment.parent_id, comment._id), and the value is not used. Key features: - does not use permacache! - does not require locking for updates """ __metaclass__ = tdb_cassandra.ThingMeta _connection_pool = 'main' _use_db = True _type_prefix = None _cf_name = 'CommentTreeStorage' # column names are tuples of (depth, parent_id, comment_id) _compare_with = types.CompositeType(types.LongType(), types.LongType(), types.LongType()) COLUMN_READ_BATCH_SIZE = tdb_cassandra.max_column_count COLUMN_WRITE_BATCH_SIZE = 1000 # special value for parent_id when the comment has no parent NO_PARENT = -1 @staticmethod def _key(link): return utils.to36(link._id) @classmethod def by_link(cls, link): try: row = cls.get_row(cls._key(link)) except ttypes.NotFoundException: row = {} return cls._from_row(row) @classmethod def get_row(cls, key): return cls._cf.xget(key, buffer_size=cls.COLUMN_READ_BATCH_SIZE) @classmethod def _from_row(cls, row): # row is an iterable of (depth, parent_id, comment_id), '') cids = [] tree = {} depth = {} parents = {} for (d, pid, cid), val in row: if pid == cls.NO_PARENT: pid = None cids.append(cid) tree.setdefault(pid, []).append(cid) depth[cid] = d parents[cid] = pid return dict(cids=cids, tree=tree, depth=depth, parents=parents) @classmethod @tdb_cassandra.will_write def rebuild(cls, tree, comments): with batch.Mutator(g.cassandra_pools[cls._connection_pool]) as m: g.log.debug('removing tree from %s', cls._key(tree.link)) m.remove(cls._cf, cls._key(tree.link)) return cls.add_comments(tree, comments) @classmethod @tdb_cassandra.will_write def add_comments(cls, tree, comments): CommentTreeStorageBase.add_comments(tree, comments) updates = {} for comment in comments: parent_id = comment.parent_id or cls.NO_PARENT depth = tree.depth.get(parent_id, -1) + 1 updates[(depth, parent_id, comment._id)] = '' cols = updates.keys() for i in xrange(0, len(updates), cls.COLUMN_WRITE_BATCH_SIZE): update_batch = { c: updates[c] for c in cols[i:i + cls.COLUMN_WRITE_BATCH_SIZE] } with batch.Mutator(g.cassandra_pools[cls._connection_pool]) as m: m.insert(cls._cf, cls._key(tree.link), update_batch) @classmethod @tdb_cassandra.will_write def upgrade(cls, tree, link): cids = [] for parent, children in tree.tree.iteritems(): cids.extend(children) comments = {} for i in xrange(0, len(cids), 100): g.log.debug(' loading comments %d..%d', i, i + 100) comments.update(Comment._byID(cids[i:i + 100], data=True)) cls.add_comments(tree, comments.values())
""" Replication strategy that simply chooses consecutive nodes in the ring for replicas """ NETWORK_TOPOLOGY_STRATEGY = 'NetworkTopologyStrategy' """ Replication strategy that puts a number of replicas in each datacenter """ OLD_NETWORK_TOPOLOGY_STRATEGY = 'OldNetworkTopologyStrategy' """ Original replication strategy for putting a number of replicas in each datacenter. This was originally called 'RackAwareStrategy'. """ KEYS_INDEX = IndexType.KEYS """ A secondary index type where each indexed value receives its own row """ BYTES_TYPE = types.BytesType() LONG_TYPE = types.LongType() INT_TYPE = types.IntegerType() ASCII_TYPE = types.AsciiType() UTF8_TYPE = types.UTF8Type() TIME_UUID_TYPE = types.TimeUUIDType() LEXICAL_UUID_TYPE = types.LexicalUUIDType() COUNTER_COLUMN_TYPE = types.CounterColumnType() DOUBLE_TYPE = types.DoubleType() FLOAT_TYPE = types.FloatType() DECIMAL_TYPE = types.DecimalType() BOOLEAN_TYPE = types.BooleanType() DATE_TYPE = types.DateType() class SystemManager(object): """ Lets you examine and modify schema definitions as well as get basic