def initializeTableLayout(keyspace, server_list, replicationStrategy,
  strategyOptions, localDCName, credentials):

    sys_manager = SystemManager(server_list[0], credentials=credentials)

    # Make sure the the keyspace exists
    if keyspace not in sys_manager.list_keyspaces():
      sys_manager.create_keyspace(keyspace, replicationStrategy,
        strategyOptions)

    cf_defs = sys_manager.get_keyspace_column_families(keyspace)

    # Create UTF8 CF's
    for tablename in ["global_nodes", "metadata"]:
      if tablename not in cf_defs.keys():
        createUTF8ColumnFamily(sys_manager, keyspace, tablename)

    if localDCName:

        dcNodes = "dc_%s_nodes" % (localDCName,)
        if dcNodes not in cf_defs.keys():
          createUTF8ColumnFamily(sys_manager, keyspace, dcNodes)
    else:
      # TODO Log we do not have the DC name
      pass

    if "node_slices" not in cf_defs.keys():
      sys_manager.create_column_family(
        keyspace,
        "node_slices",
        super=False,
        comparator_type=pycassa_types.LongType(),
        key_validation_class=pycassa_types.UTF8Type(),
        default_validation_class=pycassa_types.LongType()
      )
Ejemplo n.º 2
0
class TestIndex(object):
    birthdate = types.LongType(default=0)

    def __eq__(self, other):
        return self.__dict__ == other.__dict__

    def __ne__(self, other):
        return self.__dict__ != other.__dict__
Ejemplo n.º 3
0
class TestUTF8(object):
    key = types.LexicalUUIDType()
    strcol = types.AsciiType(default='default')
    intcol = types.LongType(default=0)
    floatcol = types.FloatType(default=0.0)
    datetimecol = types.DateType()

    def __str__(self):
        return str(map(str, [self.strcol, self.intcol, self.floatcol, self.datetimecol]))

    def __eq__(self, other):
        return self.__dict__ == other.__dict__

    def __ne__(self, other):
        return self.__dict__ != other.__dict__
def createUTF8ColumnFamily(sys_manager, keyspace, tablename, ts_table=False):
  """Create column family with UTF8Type comparator, value and key."""

  # Default options for the tree-storage tables
  kw_options = {'super': False, 'comparator_type': pycassa_types.UTF8Type(),
                'key_validation_class': pycassa_types.UTF8Type(),
                'default_validation_class': pycassa_types.UTF8Type()}

  # A ts* table needs to have a bunch of different settings since that's where
  # the actual data is going to be stored
  # - Use DTCS compaction to delete old data (requires 2.0.11+)
  # - Set the correct CQL table fields
  # - Set gc_grace_seconds to 0 since we never update/delete data once it's written
  # - DTCS options:
  #   - timestamp_resolution: MICROSECONDS due to Pycassa and CQL's default timestamp resolution
  #   - tombstone_threhold: 0.1 as an attempt to be more aggressive when it comes to compaction
  #   - base_time_seconds: 60 groups sstables into 60 second blocks
  #     CASSANDRA-8417 indicates that 60 seconds is a more reasonable default
  #   - tombstone_compaction_interval: 1 is a Cassandra default. Included in case it needs tuning
  if ts_table:
    table_options = {
        'compaction_strategy': 'DateTieredCompactionStrategy',
        'compaction_strategy_options': {
          'timestamp_resolution': 'MICROSECONDS',
          'max_sstable_age_days': '365',
          'tombstone_compaction_interval': '1',
          'tombstone_threshold': '0.1',
          'base_time_seconds': '60'},
        'comparator_type': pycassa_types.LongType(),
        'key_validation_class': pycassa_types.UTF8Type(),
        'default_validation_class': pycassa_types.FloatType(),
        'gc_grace_seconds': '1800'}

    kw_options.update(table_options)

  sys_manager.create_column_family(
      keyspace,
      tablename,
      **kw_options
  )
Ejemplo n.º 5
0
class CommentTreeStorageV2(CommentTreeStorageBase):
    """Cassandra column-based storage for comment trees.

    Under this implementation, each column in a link's row corresponds to a
    comment on that link. The column name is an encoding of the tuple of
    (comment.parent_id, comment._id), and the value is a counter giving the
    size of the subtree rooted at the comment.

    Key features:
        - does not use permacache!
        - does not require locking for updates
    """

    __metaclass__ = tdb_cassandra.ThingMeta
    _connection_pool = 'main'
    _use_db = True

    _type_prefix = None
    _cf_name = 'CommentTree'

    # column keys are tuples of (depth, parent_id, comment_id)
    _compare_with = types.CompositeType(types.LongType(), types.LongType(),
                                        types.LongType())

    # column values are counters
    _extra_schema_creation_args = {
        'default_validation_class': COUNTER_COLUMN_TYPE,
        'replicate_on_write': True,
    }

    COLUMN_READ_BATCH_SIZE = tdb_cassandra.max_column_count
    COLUMN_WRITE_BATCH_SIZE = 1000

    @staticmethod
    def _key(link):
        revision = getattr(link, 'comment_tree_id', 0)
        if revision:
            return '%s:%s' % (utils.to36(link._id), utils.to36(revision))
        else:
            return utils.to36(link._id)

    @staticmethod
    def _column_to_obj(cols):
        for col in cols:
            for (depth, pid, cid), val in col.iteritems():
                yield (depth, None if pid == -1 else pid, cid), val

    @classmethod
    def by_link(cls, link):
        try:
            row = cls.get_row(cls._key(link))
        except ttypes.NotFoundException:
            row = {}
        return cls._from_row(row)

    @classmethod
    def get_row(cls, key):
        return cls._cf.xget(key, buffer_size=cls.COLUMN_READ_BATCH_SIZE)

    @classmethod
    def _from_row(cls, row):
        # row is an iterable of ((depth, parent_id, comment_id), subtree_size)
        cids = []
        tree = {}
        depth = {}
        parents = {}
        num_children = {}
        for (d, pid, cid), val in row:
            if cid == -1:
                continue
            if pid == -1:
                pid = None
            cids.append(cid)
            tree.setdefault(pid, []).append(cid)
            depth[cid] = d
            parents[cid] = pid
            num_children[cid] = val - 1
        return dict(cids=cids,
                    tree=tree,
                    depth=depth,
                    num_children=num_children,
                    parents=parents)

    @classmethod
    @tdb_cassandra.will_write
    def rebuild(cls, tree, comments):
        with batch.Mutator(g.cassandra_pools[cls._connection_pool]) as m:
            g.log.debug('removing tree from %s', cls._key(tree.link))
            m.remove(cls._cf, cls._key(tree.link))
        tree.link._incr('comment_tree_id')
        g.log.debug('link %s comment tree revision bumped up to %s',
                    tree.link._fullname, tree.link.comment_tree_id)

        # make sure all comments have parents attribute filled in
        parents = {c._id: c.parent_id for c in comments}
        for c in comments:
            if c.parent_id and c.parents is None:
                path = []
                pid = c.parent_id
                while pid:
                    path.insert(0, pid)
                    pid = parents[pid]
                c.parents = ':'.join(utils.to36(i) for i in path)
                c._commit()

        return cls.add_comments(tree, comments)

    @classmethod
    @tdb_cassandra.will_write
    def add_comments(cls, tree, comments):
        CommentTreeStorageBase.add_comments(tree, comments)
        g.log.debug('building updates dict')
        updates = {}
        for c in comments:
            pids = c.parent_path()
            pids.append(c._id)
            for d, (pid, cid) in enumerate(zip(pids, pids[1:])):
                k = (d, pid, cid)
                updates[k] = updates.get(k, 0) + 1

        g.log.debug('writing %d updates to %s', len(updates),
                    cls._key(tree.link))
        # increment counters in slices of 100
        cols = updates.keys()
        for i in xrange(0, len(updates), cls.COLUMN_WRITE_BATCH_SIZE):
            g.log.debug('adding updates %d..%d', i,
                        i + cls.COLUMN_WRITE_BATCH_SIZE)
            update_batch = {
                c: updates[c]
                for c in cols[i:i + cls.COLUMN_WRITE_BATCH_SIZE]
            }
            with batch.Mutator(g.cassandra_pools[cls._connection_pool]) as m:
                m.insert(cls._cf, cls._key(tree.link), update_batch)
        g.log.debug('added %d comments with %d updates', len(comments),
                    len(updates))

    @classmethod
    @tdb_cassandra.will_write
    def delete_comment(cls, tree, comment):
        CommentTreeStorageBase.delete_comment(tree, comment)
        pids = comment.parent_path()
        pids.append(comment._id)
        updates = {}
        for d, (pid, cid) in enumerate(zip(pids, pids[1:])):
            updates[(d, pid, cid)] = -1
        with batch.Mutator(g.cassandra_pools[cls._connection_pool]) as m:
            m.insert(cls._cf, cls._key(tree.link), updates)

    @classmethod
    @tdb_cassandra.will_write
    def upgrade(cls, tree, link):
        cids = []
        for parent, children in tree.tree.iteritems():
            cids.extend(children)

        comments = {}
        for i in xrange(0, len(cids), 100):
            g.log.debug('  loading comments %d..%d', i, i + 100)
            comments.update(Comment._byID(cids[i:i + 100], data=True))

        # need to fill in parents attr for each comment
        modified = []
        stack = [None]
        while stack:
            pid = stack.pop()
            if pid is None:
                parents = ''
            else:
                parents = comments[pid].parents + ':' + comments[pid]._id36
            children = tree.tree.get(pid, [])
            stack.extend(children)
            for cid in children:
                if comments[cid].parents != parents:
                    comments[cid].parents = parents
                    modified.append(comments[cid])

        for i, comment in enumerate(modified):
            comment._commit()

        cls.add_comments(tree, comments.values())
Ejemplo n.º 6
0
class CommentTreeStorageV3(CommentTreeStorageBase):
    """Cassandra column-based storage for comment trees.

    Under this implementation, each column in a link's row corresponds to a
    comment on that link. The column name is an encoding of the tuple of
    (depth, comment.parent_id, comment._id), and the value is not used.

    Key features:
        - does not use permacache!
        - does not require locking for updates
    """

    __metaclass__ = tdb_cassandra.ThingMeta
    _connection_pool = 'main'
    _use_db = True

    _type_prefix = None
    _cf_name = 'CommentTreeStorage'

    # column names are tuples of (depth, parent_id, comment_id)
    _compare_with = types.CompositeType(types.LongType(), types.LongType(),
                                        types.LongType())

    COLUMN_READ_BATCH_SIZE = tdb_cassandra.max_column_count
    COLUMN_WRITE_BATCH_SIZE = 1000

    # special value for parent_id when the comment has no parent
    NO_PARENT = -1

    @staticmethod
    def _key(link):
        return utils.to36(link._id)

    @classmethod
    def by_link(cls, link):
        try:
            row = cls.get_row(cls._key(link))
        except ttypes.NotFoundException:
            row = {}
        return cls._from_row(row)

    @classmethod
    def get_row(cls, key):
        return cls._cf.xget(key, buffer_size=cls.COLUMN_READ_BATCH_SIZE)

    @classmethod
    def _from_row(cls, row):
        # row is an iterable of (depth, parent_id, comment_id), '')
        cids = []
        tree = {}
        depth = {}
        parents = {}
        for (d, pid, cid), val in row:
            if pid == cls.NO_PARENT:
                pid = None

            cids.append(cid)
            tree.setdefault(pid, []).append(cid)
            depth[cid] = d
            parents[cid] = pid
        return dict(cids=cids, tree=tree, depth=depth, parents=parents)

    @classmethod
    @tdb_cassandra.will_write
    def rebuild(cls, tree, comments):
        with batch.Mutator(g.cassandra_pools[cls._connection_pool]) as m:
            g.log.debug('removing tree from %s', cls._key(tree.link))
            m.remove(cls._cf, cls._key(tree.link))

        return cls.add_comments(tree, comments)

    @classmethod
    @tdb_cassandra.will_write
    def add_comments(cls, tree, comments):
        CommentTreeStorageBase.add_comments(tree, comments)
        updates = {}
        for comment in comments:
            parent_id = comment.parent_id or cls.NO_PARENT
            depth = tree.depth.get(parent_id, -1) + 1
            updates[(depth, parent_id, comment._id)] = ''

        cols = updates.keys()
        for i in xrange(0, len(updates), cls.COLUMN_WRITE_BATCH_SIZE):
            update_batch = {
                c: updates[c]
                for c in cols[i:i + cls.COLUMN_WRITE_BATCH_SIZE]
            }
            with batch.Mutator(g.cassandra_pools[cls._connection_pool]) as m:
                m.insert(cls._cf, cls._key(tree.link), update_batch)

    @classmethod
    @tdb_cassandra.will_write
    def upgrade(cls, tree, link):
        cids = []
        for parent, children in tree.tree.iteritems():
            cids.extend(children)

        comments = {}
        for i in xrange(0, len(cids), 100):
            g.log.debug('  loading comments %d..%d', i, i + 100)
            comments.update(Comment._byID(cids[i:i + 100], data=True))

        cls.add_comments(tree, comments.values())
Ejemplo n.º 7
0
""" Replication strategy that simply chooses consecutive nodes in the ring for replicas """

NETWORK_TOPOLOGY_STRATEGY = 'NetworkTopologyStrategy'
""" Replication strategy that puts a number of replicas in each datacenter """

OLD_NETWORK_TOPOLOGY_STRATEGY = 'OldNetworkTopologyStrategy'
"""
Original replication strategy for putting a number of replicas in each datacenter.
This was originally called 'RackAwareStrategy'.
"""

KEYS_INDEX = IndexType.KEYS
""" A secondary index type where each indexed value receives its own row """

BYTES_TYPE = types.BytesType()
LONG_TYPE = types.LongType()
INT_TYPE = types.IntegerType()
ASCII_TYPE = types.AsciiType()
UTF8_TYPE = types.UTF8Type()
TIME_UUID_TYPE = types.TimeUUIDType()
LEXICAL_UUID_TYPE = types.LexicalUUIDType()
COUNTER_COLUMN_TYPE = types.CounterColumnType()
DOUBLE_TYPE = types.DoubleType()
FLOAT_TYPE = types.FloatType()
DECIMAL_TYPE = types.DecimalType()
BOOLEAN_TYPE = types.BooleanType()
DATE_TYPE = types.DateType()

class SystemManager(object):
    """
    Lets you examine and modify schema definitions as well as get basic