class CommentTreeStorageV2(CommentTreeStorageBase): """Cassandra column-based storage for comment trees. Under this implementation, each column in a link's row corresponds to a comment on that link. The column name is an encoding of the tuple of (comment.parent_id, comment._id), and the value is a counter giving the size of the subtree rooted at the comment. Key features: - does not use permacache! - does not require locking for updates """ __metaclass__ = tdb_cassandra.ThingMeta _connection_pool = 'main' _use_db = True _type_prefix = None _cf_name = 'CommentTree' # column keys are tuples of (depth, parent_id, comment_id) _compare_with = types.CompositeType(types.IntegerType(), types.IntegerType(), types.IntegerType()) # column values are counters _extra_schema_creation_args = { 'default_validation_class': COUNTER_COLUMN_TYPE, 'replicate_on_write': True, } COLUMN_READ_BATCH_SIZE = tdb_cassandra.max_column_count COLUMN_WRITE_BATCH_SIZE = 1000 @staticmethod def _key(link): revision = getattr(link, 'comment_tree_id', 0) if revision: return '%s:%s' % (utils.to36(link._id), utils.to36(revision)) else: return utils.to36(link._id) @staticmethod def _column_to_obj(cols): for col in cols: for (depth, pid, cid), val in col.iteritems(): yield (depth, None if pid == -1 else pid, cid), val @classmethod def by_link(cls, link): try: row = cls.get_row(cls._key(link)) except ttypes.NotFoundException: row = {} return cls._from_row(row) @classmethod def get_row(cls, key): row = [] size = 0 column_start = '' while True: batch = cls._cf.get(key, column_count=cls.COLUMN_READ_BATCH_SIZE, column_start=column_start) row.extend(batch.iteritems()) num_fetched = len(row) - size size = len(row) if num_fetched < cls.COLUMN_READ_BATCH_SIZE: break depth, pid, cid = row[-1][0] column_start = (depth, pid if pid is not None else -1, cid + 1) return row @classmethod def _from_row(cls, row): # row is a dict of {(depth, parent_id, comment_id): subtree_size} cids = [] tree = {} depth = {} parents = {} num_children = {} for (d, pid, cid), val in row: if cid == -1: continue if pid == -1: pid = None cids.append(cid) tree.setdefault(pid, []).append(cid) depth[cid] = d parents[cid] = pid num_children[cid] = val - 1 return dict(cids=cids, tree=tree, depth=depth, num_children=num_children, parents=parents) @classmethod @tdb_cassandra.will_write def rebuild(cls, tree, comments): with batch.Mutator(g.cassandra_pools[cls._connection_pool]) as m: g.log.debug('removing tree from %s', cls._key(tree.link)) m.remove(cls._cf, cls._key(tree.link)) tree.link._incr('comment_tree_id') g.log.debug('link %s comment tree revision bumped up to %s', tree.link._fullname, tree.link.comment_tree_id) # make sure all comments have parents attribute filled in parents = {c._id: c.parent_id for c in comments} for c in comments: if c.parent_id and c.parents is None: path = [] pid = c.parent_id while pid: path.insert(0, pid) pid = parents[pid] c.parents = ':'.join(utils.to36(i) for i in path) c._commit() return cls.add_comments(tree, comments) @classmethod @tdb_cassandra.will_write def add_comments(cls, tree, comments): CommentTreeStorageBase.add_comments(tree, comments) g.log.debug('building updates dict') updates = {} for c in comments: pids = c.parent_path() pids.append(c._id) for d, (pid, cid) in enumerate(zip(pids, pids[1:])): k = (d, pid, cid) updates[k] = updates.get(k, 0) + 1 g.log.debug('writing %d updates to %s', len(updates), cls._key(tree.link)) # increment counters in slices of 100 cols = updates.keys() for i in xrange(0, len(updates), cls.COLUMN_WRITE_BATCH_SIZE): g.log.debug('adding updates %d..%d', i, i + cls.COLUMN_WRITE_BATCH_SIZE) update_batch = { c: updates[c] for c in cols[i:i + cls.COLUMN_WRITE_BATCH_SIZE] } with batch.Mutator(g.cassandra_pools[cls._connection_pool]) as m: m.insert(cls._cf, cls._key(tree.link), update_batch) g.log.debug('added %d comments with %d updates', len(comments), len(updates)) @classmethod @tdb_cassandra.will_write def delete_comment(cls, tree, comment): CommentTreeStorageBase.delete_comment(tree, comment) pids = comment.parent_path() pids.append(comment._id) updates = {} for d, (pid, cid) in enumerate(zip(pids, pids[1:])): updates[(d, pid, cid)] = -1 with batch.Mutator(g.cassandra_pools[cls._connection_pool]) as m: m.insert(cls._cf, cls._key(tree.link), updates) @classmethod @tdb_cassandra.will_write def upgrade(cls, tree, link): cids = [] for parent, children in tree.tree.iteritems(): cids.extend(children) comments = {} for i in xrange(0, len(cids), 100): g.log.debug(' loading comments %d..%d', i, i + 100) comments.update(Comment._byID(cids[i:i + 100], data=True)) # need to fill in parents attr for each comment modified = [] stack = [None] while stack: pid = stack.pop() if pid is None: parents = '' else: parents = comments[pid].parents + ':' + comments[pid]._id36 children = tree.tree.get(pid, []) stack.extend(children) for cid in children: if comments[cid].parents != parents: comments[cid].parents = parents modified.append(comments[cid]) for i, comment in enumerate(modified): comment._commit() cls.add_comments(tree, comments.values())
NETWORK_TOPOLOGY_STRATEGY = 'NetworkTopologyStrategy' """ Replication strategy that puts a number of replicas in each datacenter """ OLD_NETWORK_TOPOLOGY_STRATEGY = 'OldNetworkTopologyStrategy' """ Original replication strategy for putting a number of replicas in each datacenter. This was originally called 'RackAwareStrategy'. """ KEYS_INDEX = IndexType.KEYS """ A secondary index type where each indexed value receives its own row """ BYTES_TYPE = types.BytesType() LONG_TYPE = types.LongType() INT_TYPE = types.IntegerType() ASCII_TYPE = types.AsciiType() UTF8_TYPE = types.UTF8Type() TIME_UUID_TYPE = types.TimeUUIDType() LEXICAL_UUID_TYPE = types.LexicalUUIDType() COUNTER_COLUMN_TYPE = types.CounterColumnType() DOUBLE_TYPE = types.DoubleType() FLOAT_TYPE = types.FloatType() DECIMAL_TYPE = types.DecimalType() BOOLEAN_TYPE = types.BooleanType() DATE_TYPE = types.DateType() class SystemManager(object): """ Lets you examine and modify schema definitions as well as get basic information about the cluster.
def syncdb(keyspace=None): """ Create Cassandra keyspace, CF, SCF """ if not keyspace: keyspace = FLAGS.get("cassandra_keyspace", "synaps_test") serverlist = FLAGS.get("cassandra_server_list") replication_factor = FLAGS.get("cassandra_replication_factor") manager = pycassa.SystemManager(server=serverlist[0]) strategy_options = {'replication_factor': replication_factor} # create keyspace LOG.info(_("cassandra syncdb is started for keyspace(%s)" % keyspace)) if keyspace not in manager.list_keyspaces(): LOG.info(_("cassandra keyspace %s does not exist.") % keyspace) manager.create_keyspace(keyspace, strategy_options=strategy_options) LOG.info(_("cassandra keyspace %s is created.") % keyspace) else: property = manager.get_keyspace_properties(keyspace) # check strategy_option if not (strategy_options == property.get('strategy_options')): manager.alter_keyspace(keyspace, strategy_options=strategy_options) LOG.info( _("cassandra keyspace strategy options is updated - %s" % str(strategy_options))) # create CF, SCF column_families = manager.get_keyspace_column_families(keyspace) if 'Metric' not in column_families.keys(): manager.create_column_family( keyspace=keyspace, name='Metric', comparator_type=pycassa.ASCII_TYPE, key_validation_class=pycassa.LEXICAL_UUID_TYPE, column_validation_classes={ 'project_id': pycassa.UTF8_TYPE, 'name': pycassa.UTF8_TYPE, 'namespace': pycassa.UTF8_TYPE, 'unit': pycassa.UTF8_TYPE, 'dimensions': pycassa.UTF8_TYPE, 'updated_timestamp': pycassa.DATE_TYPE, 'created_timestamp': pycassa.DATE_TYPE }) manager.create_index(keyspace=keyspace, column_family='Metric', column='project_id', value_type=types.UTF8Type()) manager.create_index(keyspace=keyspace, column_family='Metric', column='name', value_type=types.UTF8Type()) manager.create_index(keyspace=keyspace, column_family='Metric', column='namespace', value_type=types.UTF8Type()) manager.create_index(keyspace=keyspace, column_family='Metric', column='dimensions', value_type=types.UTF8Type()) manager.create_index(keyspace=keyspace, column_family='Metric', column='updated_timestamp', value_type=types.DateType()) manager.create_index(keyspace=keyspace, column_family='Metric', column='created_timestamp', value_type=types.DateType()) if 'StatArchive' not in column_families.keys(): manager.create_column_family( keyspace=keyspace, name='StatArchive', super=True, key_validation_class=pycassa.LEXICAL_UUID_TYPE, comparator_type=pycassa.ASCII_TYPE, subcomparator_type=pycassa.DATE_TYPE, default_validation_class=pycassa.DOUBLE_TYPE) if 'MetricAlarm' not in column_families.keys(): manager.create_column_family( keyspace=keyspace, name='MetricAlarm', key_validation_class=pycassa.LEXICAL_UUID_TYPE, comparator_type=pycassa.ASCII_TYPE, column_validation_classes={ 'metric_key': pycassa.LEXICAL_UUID_TYPE, 'project_id': pycassa.UTF8_TYPE, 'actions_enabled': pycassa.BOOLEAN_TYPE, 'alarm_actions': pycassa.UTF8_TYPE, 'alarm_arn': pycassa.UTF8_TYPE, 'alarm_configuration_updated_timestamp': pycassa.DATE_TYPE, 'alarm_description': pycassa.UTF8_TYPE, 'alarm_name': pycassa.UTF8_TYPE, 'comparison_operator': pycassa.UTF8_TYPE, 'dimensions': pycassa.UTF8_TYPE, 'evaluation_periods': pycassa.INT_TYPE, 'insufficient_data_actions': pycassa.UTF8_TYPE, 'metric_name': pycassa.UTF8_TYPE, 'namespace': pycassa.UTF8_TYPE, 'ok_actions': pycassa.UTF8_TYPE, 'period': pycassa.INT_TYPE, 'state_reason': pycassa.UTF8_TYPE, 'state_reason_data': pycassa.UTF8_TYPE, 'state_updated_timestamp': pycassa.DATE_TYPE, 'state_value': pycassa.UTF8_TYPE, 'statistic': pycassa.UTF8_TYPE, 'threshold': pycassa.DOUBLE_TYPE, 'unit': pycassa.UTF8_TYPE }) manager.create_index(keyspace=keyspace, column_family='MetricAlarm', column='project_id', value_type=types.UTF8Type()) manager.create_index(keyspace=keyspace, column_family='MetricAlarm', column='metric_key', value_type=types.LexicalUUIDType()) manager.create_index(keyspace=keyspace, column_family='MetricAlarm', column='alarm_name', value_type=types.UTF8Type()) manager.create_index(keyspace=keyspace, column_family='MetricAlarm', column='state_updated_timestamp', value_type=types.DateType()) manager.create_index( keyspace=keyspace, column_family='MetricAlarm', column='alarm_configuration_updated_timestamp', value_type=types.DateType()) manager.create_index(keyspace=keyspace, column_family='MetricAlarm', column='state_value', value_type=types.UTF8Type()) manager.create_index(keyspace=keyspace, column_family='MetricAlarm', column='period', value_type=types.IntegerType()) manager.create_index(keyspace=keyspace, column_family='MetricAlarm', column='statistic', value_type=types.UTF8Type()) if 'AlarmHistory' not in column_families.keys(): manager.create_column_family( keyspace=keyspace, name='AlarmHistory', key_validation_class=pycassa.LEXICAL_UUID_TYPE, comparator_type=pycassa.ASCII_TYPE, column_validation_classes={ 'project_id': pycassa.UTF8_TYPE, 'alarm_key': pycassa.LEXICAL_UUID_TYPE, 'alarm_name': pycassa.UTF8_TYPE, 'history_data': pycassa.UTF8_TYPE, 'history_item_type': pycassa.UTF8_TYPE, 'history_summary': pycassa.UTF8_TYPE, 'timestamp': pycassa.DATE_TYPE, }) manager.create_index(keyspace=keyspace, column_family='AlarmHistory', column='project_id', value_type=types.UTF8Type()) manager.create_index(keyspace=keyspace, column_family='AlarmHistory', column='alarm_key', value_type=types.LexicalUUIDType()) manager.create_index(keyspace=keyspace, column_family='AlarmHistory', column='alarm_name', value_type=types.UTF8Type()) manager.create_index(keyspace=keyspace, column_family='AlarmHistory', column='history_item_type', value_type=types.UTF8Type()) manager.create_index(keyspace=keyspace, column_family='AlarmHistory', column='timestamp', value_type=types.DateType()) if 'AlarmCounter' not in column_families.keys(): manager.create_column_family( keyspace=keyspace, name='AlarmCounter', default_validation_class=pycassa.COUNTER_COLUMN_TYPE, key_validation_class=pycassa.UTF8_TYPE) if 'NotificationGroup' not in column_families.keys(): manager.create_column_family( keyspace=keyspace, name='NotificationGroup', key_validation_class=pycassa.UTF8_TYPE, comparator_type=pycassa.UTF8_TYPE, default_validation_class=pycassa.UTF8_TYPE) LOG.info(_("cassandra syncdb has finished"))