class CassandraCache(CacheUtils): permanent = True """A cache that uses a Cassandra ColumnFamily. Uses only the column-name 'value'""" def __init__(self, column_family, client, read_consistency_level = CL_ONE, write_consistency_level = CL_QUORUM): self.column_family = column_family self.client = client self.read_consistency_level = read_consistency_level self.write_consistency_level = write_consistency_level self.cf = ColumnFamily(self.client, self.column_family, read_consistency_level = read_consistency_level, write_consistency_level = write_consistency_level) def _rcl(self, alternative): return (alternative if alternative is not None else self.cf.read_consistency_level) def _wcl(self, alternative): return (alternative if alternative is not None else self.cf.write_consistency_level) def get(self, key, default = None, read_consistency_level = None): try: rcl = self._rcl(read_consistency_level) row = self.cf.get(key, columns=['value'], read_consistency_level = rcl) return pickle.loads(row['value']) except (CassandraNotFound, KeyError): return default def simple_get_multi(self, keys, read_consistency_level = None): rcl = self._rcl(read_consistency_level) rows = self.cf.multiget(list(keys), columns=['value'], read_consistency_level = rcl) return dict((key, pickle.loads(row['value'])) for (key, row) in rows.iteritems()) def set(self, key, val, write_consistency_level = None, time = None): if val == NoneResult: # NoneResult caching is for other parts of the chain return wcl = self._wcl(write_consistency_level) ret = self.cf.insert(key, {'value': pickle.dumps(val)}, write_consistency_level = wcl, ttl = time) self._warm([key]) return ret def set_multi(self, keys, prefix='', write_consistency_level = None, time = None): if not isinstance(keys, dict): # allow iterables yielding tuples keys = dict(keys) wcl = self._wcl(write_consistency_level) ret = {} with self.cf.batch(write_consistency_level = wcl): for key, val in keys.iteritems(): if val != NoneResult: ret[key] = self.cf.insert('%s%s' % (prefix, key), {'value': pickle.dumps(val)}, ttl = time or None) self._warm(keys.keys()) return ret def _warm(self, keys): import random if False and random.random() > 0.98: print 'Warming', keys self.cf.multiget(keys) def delete(self, key, write_consistency_level = None): wcl = self._wcl(write_consistency_level) self.cf.remove(key, write_consistency_level = wcl)
class CassandraCache(CacheUtils): permanent = True """A cache that uses a Cassandra ColumnFamily. Uses only the column-name 'value'""" def __init__(self, column_family, client, read_consistency_level=CL_ONE, write_consistency_level=CL_QUORUM): self.column_family = column_family self.client = client self.read_consistency_level = read_consistency_level self.write_consistency_level = write_consistency_level self.cf = ColumnFamily(self.client, self.column_family, read_consistency_level=read_consistency_level, write_consistency_level=write_consistency_level) def _rcl(self, alternative): return (alternative if alternative is not None else self.cf.read_consistency_level) def _wcl(self, alternative): return (alternative if alternative is not None else self.cf.write_consistency_level) def get(self, key, default=None, read_consistency_level=None): try: rcl = self._rcl(read_consistency_level) row = self.cf.get(key, columns=['value'], read_consistency_level=rcl) return pickle.loads(row['value']) except (CassandraNotFound, KeyError): return default def simple_get_multi(self, keys, read_consistency_level=None): rcl = self._rcl(read_consistency_level) rows = self.cf.multiget(list(keys), columns=['value'], read_consistency_level=rcl) return dict((key, pickle.loads(row['value'])) for (key, row) in rows.iteritems()) def set(self, key, val, write_consistency_level=None, time=None): if val == NoneResult: # NoneResult caching is for other parts of the chain return wcl = self._wcl(write_consistency_level) ret = self.cf.insert(key, {'value': pickle.dumps(val)}, write_consistency_level=wcl, ttl=time) self._warm([key]) return ret def set_multi(self, keys, prefix='', write_consistency_level=None, time=None): if not isinstance(keys, dict): # allow iterables yielding tuples keys = dict(keys) wcl = self._wcl(write_consistency_level) ret = {} with self.cf.batch(write_consistency_level=wcl): for key, val in keys.iteritems(): if val != NoneResult: ret[key] = self.cf.insert('%s%s' % (prefix, key), {'value': pickle.dumps(val)}, ttl=time) self._warm(keys.keys()) return ret def _warm(self, keys): import random if False and random.random() > 0.98: print 'Warming', keys self.cf.multiget(keys) def delete(self, key, write_consistency_level=None): wcl = self._wcl(write_consistency_level) self.cf.remove(key, write_consistency_level=wcl)
class TestMutator(unittest.TestCase): def setUp(self): credentials = {'username': '******', 'password': '******'} self.client = connect('Keyspace1', credentials=credentials) self.cf = ColumnFamily(self.client, 'Standard2', write_consistency_level=ConsistencyLevel.ONE, timestamp=self.timestamp) self.scf = ColumnFamily(self.client, 'Super1', write_consistency_level=ConsistencyLevel.ONE, super=True, timestamp=self.timestamp) try: self.timestamp_n = int(self.cf.get('meta')['timestamp']) except NotFoundException: self.timestamp_n = 0 self.clear() def clear(self): self.cf.truncate() self.scf.truncate() def timestamp(self): self.timestamp_n += 1 return self.timestamp_n def test_insert(self): batch = self.cf.batch() for key, cols in ROWS.iteritems(): batch.insert(key, cols) batch.send() for key, cols in ROWS.items(): assert self.cf.get(key) == cols def test_insert_supercolumns(self): batch = self.scf.batch() batch.insert('one', ROWS) batch.insert('two', ROWS) batch.insert('three', ROWS) batch.send() assert self.scf.get('one') == ROWS assert self.scf.get('two') == ROWS assert self.scf.get('three') == ROWS def test_queue_size(self): batch = self.cf.batch(queue_size=2) batch.insert('1', ROWS['1']) batch.insert('2', ROWS['2']) batch.insert('3', ROWS['3']) assert self.cf.get('1') == ROWS['1'] assert_raises(NotFoundException, self.cf.get, '3') batch.send() for key, cols in ROWS.items(): assert self.cf.get(key) == cols def test_remove_key(self): batch = self.cf.batch() batch.insert('1', ROWS['1']) batch.remove('1') batch.send() assert_raises(NotFoundException, self.cf.get, '1') def test_remove_columns(self): batch = self.cf.batch() batch.insert('1', {'a': '123', 'b': '123'}) batch.remove('1', ['a']) batch.send() assert self.cf.get('1') == {'b': '123'} def test_remove_supercolumns(self): batch = self.scf.batch() batch.insert('one', ROWS) batch.insert('two', ROWS) batch.insert('three', ROWS) batch.remove('two', ['b'], '2') batch.send() assert self.scf.get('one') == ROWS assert self.scf.get('two')['2'] == {'a': '234'} assert self.scf.get('three') == ROWS def test_chained(self): batch = self.cf.batch() batch.insert('1', ROWS['1']).insert('2', ROWS['2']).insert('3', ROWS['3']).send() assert self.cf.get('1') == ROWS['1'] assert self.cf.get('2') == ROWS['2'] assert self.cf.get('3') == ROWS['3'] def test_contextmgr(self): if sys.version_info < (2, 5): raise SkipTest("No context managers in Python < 2.5") exec """with self.cf.batch(queue_size=2) as b: b.insert('1', ROWS['1']) b.insert('2', ROWS['2']) b.insert('3', ROWS['3']) assert self.cf.get('3') == ROWS['3']""" def test_multi_column_family(self): batch = self.client.batch() cf2 = self.cf batch.insert(self.cf, '1', ROWS['1']) batch.insert(self.cf, '2', ROWS['2']) batch.remove(cf2, '1', ROWS['1']) batch.send() assert self.cf.get('2') == ROWS['2'] assert_raises(NotFoundException, self.cf.get, '1')
class CassandraDistributedRowLock(object): """ A lock that is implemented in a row of a Cassandra column family. It's good to use this type of lock when you want to lock a single row in cassandra for some purpose in a scenario where there will not be a lot of lock contention. Shamelessly lifted from: Netflix's `Astynax library <https://github.com/Netflix/astyanax>`_. Take a `look <https://github.com/Netflix/astyanax/blob/master/src/main/java/com/netflix/astyanax/recipes/locks/ColumnPrefixDistributedRowLock.java>`_ at the implementation (in Java). Importantly, note that this in no way a transaction for a cassandra row! :param pool: A pycassa ConnectionPool. It will be used to facilitate communication with cassandra. :type pool: pycassa.pool.ConnectionPool :param column_family: Either a `string` (which will then be made into a `pycassa.column_family.ColumnFamily` instance) or an already configured instance of `ColumnFamily` (which will be used directly). :type column_family: string :param key: The row key for this lock. The lock can co-exist with other columns on an existing row if desired. :type key: string The following paramters are optional and all come with defaults: :param prefix: The column prefix. Defaults to `_lock_` :type prefix: str :param lock_id: A unique string, should probably be a UUIDv1 if provided at all. Defaults to a UUIDv1 provided by `time-uuid <http://github.com/samuraisam/time_uuid>`_ :type lock_id: str :param fail_on_stale_lock: Whether or not to fail when stale locks are found. Otherwise they'll just be cleaned up. :type fail_on_stale_lock: bool :param timeout: How long to wait until the lock is considered stale. You should set this to as much time as you think the work will take using the lock. :type timeout: float :param ttl: How many seconds until cassandra will automatically clean up stale locks. It must be greater than `timeout`. :type ttl: float :param backoff_policy: a :py:class:`padlock.distributed.retry_policy.IRetryPolicy` instance. Governs the retry policy of acquiring the lock. :type backoff_policy: IRetryPolicy :param allow_retry: Whether or not to allow retry. Defaults to `True` :type allow_retry: bool You can also provide the following keyword arguments which will be passed directly to the `ColumnFamily` constructor if you didn't provide the instance yourself: * **read_consistency_level** * **write_consistency_level** * **autopack_names** * **autopack_values** * **autopack_keys** * **column_class_name** * **super_column_name_class** * **default_validation_class** * **column_validators** * **key_validation_class** * **dict_class** * **buffer_size** * **column_bufer_size** * **timestamp** """ implements(ILock) def __init__(self, pool, column_family, key, **kwargs): self.pool = pool if isinstance(column_family, ColumnFamily): self.column_family = column_family else: cf_kwargs = {k: kwargs.get(k) for k in _cf_args if k in kwargs} self.column_family = ColumnFamily(self.pool, column_family, **cf_kwargs) self.key = key self.consistency_level = kwargs.get('consistency_level', ConsistencyLevel.LOCAL_QUORUM) self.prefix = kwargs.get('prefix', '_lock_') self.lock_id = kwargs.get('lock_id', str(TimeUUID.with_utcnow())) self.fail_on_stale_lock = kwargs.get('fail_on_stale_lock', False) self.timeout = kwargs.get('timeout', 60.0) # seconds self.ttl = kwargs.get('ttl', None) self.backoff_policy = kwargs.get('backoff_policy', getUtility(IRetryPolicy, 'run_once')) self.allow_retry = kwargs.get('allow_retry', True) self.locks_to_delete = set() self.lock_column = None def acquire(self): """ Acquire the lock on this row. It will then read immediatly from cassandra, potentially retrying, potentially sleeping the executing thread. """ if self.ttl is not None: if self.timeout > self.ttl: raise ValueError("Timeout {} must be less than TTL {}".format(self.timeout, self.ttl)) retry = self.backoff_policy.duplicate() retry_count = 0 while True: try: cur_time = self.utcnow() mutation = self.column_family.batch() self.fill_lock_mutation(mutation, cur_time, self.ttl) mutation.send() self.verify_lock(cur_time) self.acquire_time = self.utcnow() return except BusyLockException, e: self.release() if not retry.allow_retry(): raise e retry_count += 1
class TestMutator(unittest.TestCase): def setUp(self): credentials = {"username": "******", "password": "******"} self.pool = ConnectionPool(keyspace="Keyspace1", credentials=credentials) self.cf = ColumnFamily(self.pool, "Standard2") self.scf = ColumnFamily(self.pool, "Super1") def tearDown(self): for key, cols in self.cf.get_range(): self.cf.remove(key) for key, cols in self.scf.get_range(): self.scf.remove(key) def test_insert(self): batch = self.cf.batch() for key, cols in ROWS.iteritems(): batch.insert(key, cols) batch.send() for key, cols in ROWS.items(): assert self.cf.get(key) == cols def test_insert_supercolumns(self): batch = self.scf.batch() batch.insert("one", ROWS) batch.insert("two", ROWS) batch.insert("three", ROWS) batch.send() assert self.scf.get("one") == ROWS assert self.scf.get("two") == ROWS assert self.scf.get("three") == ROWS def test_queue_size(self): batch = self.cf.batch(queue_size=2) batch.insert("1", ROWS["1"]) batch.insert("2", ROWS["2"]) batch.insert("3", ROWS["3"]) assert self.cf.get("1") == ROWS["1"] assert_raises(NotFoundException, self.cf.get, "3") batch.send() for key, cols in ROWS.items(): assert self.cf.get(key) == cols def test_remove_key(self): batch = self.cf.batch() batch.insert("1", ROWS["1"]) batch.remove("1") batch.send() assert_raises(NotFoundException, self.cf.get, "1") def test_remove_columns(self): batch = self.cf.batch() batch.insert("1", {"a": "123", "b": "123"}) batch.remove("1", ["a"]) batch.send() assert self.cf.get("1") == {"b": "123"} def test_remove_supercolumns(self): batch = self.scf.batch() batch.insert("one", ROWS) batch.insert("two", ROWS) batch.insert("three", ROWS) batch.remove("two", ["b"], "2") batch.send() assert self.scf.get("one") == ROWS assert self.scf.get("two")["2"] == {"a": "234"} assert self.scf.get("three") == ROWS def test_chained(self): batch = self.cf.batch() batch.insert("1", ROWS["1"]).insert("2", ROWS["2"]).insert("3", ROWS["3"]).send() assert self.cf.get("1") == ROWS["1"] assert self.cf.get("2") == ROWS["2"] assert self.cf.get("3") == ROWS["3"] def test_contextmgr(self): if sys.version_info < (2, 5): raise SkipTest("No context managers in Python < 2.5") exec """with self.cf.batch(queue_size=2) as b: b.insert('1', ROWS['1']) b.insert('2', ROWS['2']) b.insert('3', ROWS['3']) assert self.cf.get('3') == ROWS['3']""" def test_multi_column_family(self): batch = batch_mod.Mutator(self.pool) cf2 = self.cf batch.insert(self.cf, "1", ROWS["1"]) batch.insert(self.cf, "2", ROWS["2"]) batch.remove(cf2, "1", ROWS["1"]) batch.send() assert self.cf.get("2") == ROWS["2"] assert_raises(NotFoundException, self.cf.get, "1")
class CassandraDistributedRowLock(object): """ A lock that is implemented in a row of a Cassandra column family. It's good to use this type of lock when you want to lock a single row in cassandra for some purpose in a scenario where there will not be a lot of lock contention. Shamelessly lifted from: Netflix's `Astynax library <https://github.com/Netflix/astyanax>`_. Take a `look <https://github.com/Netflix/astyanax/blob/master/src/main/java/com/netflix/astyanax/recipes/locks/ColumnPrefixDistributedRowLock.java>`_ at the implementation (in Java). Importantly, note that this in no way a transaction for a cassandra row! :param pool: A pycassa ConnectionPool. It will be used to facilitate communication with cassandra. :type pool: pycassa.pool.ConnectionPool :param column_family: Either a `string` (which will then be made into a `pycassa.column_family.ColumnFamily` instance) or an already configured instance of `ColumnFamily` (which will be used directly). :type column_family: string :param key: The row key for this lock. The lock can co-exist with other columns on an existing row if desired. :type key: string The following paramters are optional and all come with defaults: :param prefix: The column prefix. Defaults to `_lock_` :type prefix: str :param lock_id: A unique string, should probably be a UUIDv1 if provided at all. Defaults to a UUIDv1 provided by `time-uuid <http://github.com/samuraisam/time_uuid>`_ :type lock_id: str :param fail_on_stale_lock: Whether or not to fail when stale locks are found. Otherwise they'll just be cleaned up. :type fail_on_stale_lock: bool :param timeout: How long to wait until the lock is considered stale. You should set this to as much time as you think the work will take using the lock. :type timeout: float :param ttl: How many seconds until cassandra will automatically clean up stale locks. It must be greater than `timeout`. :type ttl: float :param backoff_policy: a :py:class:`padlock.distributed.retry_policy.IRetryPolicy` instance. Governs the retry policy of acquiring the lock. :type backoff_policy: IRetryPolicy :param allow_retry: Whether or not to allow retry. Defaults to `True` :type allow_retry: bool You can also provide the following keyword arguments which will be passed directly to the `ColumnFamily` constructor if you didn't provide the instance yourself: * **read_consistency_level** * **write_consistency_level** * **autopack_names** * **autopack_values** * **autopack_keys** * **column_class_name** * **super_column_name_class** * **default_validation_class** * **column_validators** * **key_validation_class** * **dict_class** * **buffer_size** * **column_bufer_size** * **timestamp** """ implements(ILock) def __init__(self, pool, column_family, key, **kwargs): self.pool = pool if isinstance(column_family, ColumnFamily): self.column_family = column_family else: cf_kwargs = {k: kwargs.get(k) for k in _cf_args if k in kwargs} self.column_family = ColumnFamily(self.pool, column_family, **cf_kwargs) self.key = key self.consistency_level = kwargs.get('consistency_level', ConsistencyLevel.LOCAL_QUORUM) self.prefix = kwargs.get('prefix', '_lock_') self.lock_id = kwargs.get('lock_id', str(TimeUUID.with_utcnow())) self.fail_on_stale_lock = kwargs.get('fail_on_stale_lock', False) self.timeout = kwargs.get('timeout', 60.0) # seconds self.ttl = kwargs.get('ttl', None) self.backoff_policy = kwargs.get('backoff_policy', getUtility(IRetryPolicy, 'run_once')) self.allow_retry = kwargs.get('allow_retry', True) self.locks_to_delete = set() self.lock_column = None def acquire(self): """ Acquire the lock on this row. It will then read immediatly from cassandra, potentially retrying, potentially sleeping the executing thread. """ if self.ttl is not None: if self.timeout > self.ttl: raise ValueError("Timeout {} must be less than TTL {}".format( self.timeout, self.ttl)) retry = self.backoff_policy.duplicate() retry_count = 0 while True: try: cur_time = self.utcnow() mutation = self.column_family.batch() self.fill_lock_mutation(mutation, cur_time, self.ttl) mutation.send() self.verify_lock(cur_time) self.acquire_time = self.utcnow() return except BusyLockException, e: self.release() if not retry.allow_retry(): raise e retry_count += 1
class TestMutator(unittest.TestCase): def setUp(self): credentials = {'username': '******', 'password': '******'} self.client = connect('Keyspace1', credentials=credentials) self.cf = ColumnFamily(self.client, 'Standard2', write_consistency_level=ConsistencyLevel.ONE, timestamp=self.timestamp) self.scf = ColumnFamily(self.client, 'Super1', write_consistency_level=ConsistencyLevel.ONE, super=True, timestamp=self.timestamp) try: self.timestamp_n = int(self.cf.get('meta')['timestamp']) except NotFoundException: self.timestamp_n = 0 self.clear() def clear(self): self.cf.truncate() self.scf.truncate() def timestamp(self): self.timestamp_n += 1 return self.timestamp_n def test_insert(self): batch = self.cf.batch() for key, cols in ROWS.iteritems(): batch.insert(key, cols) batch.send() for key, cols in ROWS.items(): assert self.cf.get(key) == cols def test_insert_supercolumns(self): batch = self.scf.batch() batch.insert('one', ROWS) batch.insert('two', ROWS) batch.insert('three', ROWS) batch.send() assert self.scf.get('one') == ROWS assert self.scf.get('two') == ROWS assert self.scf.get('three') == ROWS def test_queue_size(self): batch = self.cf.batch(queue_size=2) batch.insert('1', ROWS['1']) batch.insert('2', ROWS['2']) batch.insert('3', ROWS['3']) assert self.cf.get('1') == ROWS['1'] assert_raises(NotFoundException, self.cf.get, '3') batch.send() for key, cols in ROWS.items(): assert self.cf.get(key) == cols def test_remove_key(self): batch = self.cf.batch() batch.insert('1', ROWS['1']) batch.remove('1') batch.send() assert_raises(NotFoundException, self.cf.get, '1') def test_remove_columns(self): batch = self.cf.batch() batch.insert('1', {'a':'123', 'b':'123'}) batch.remove('1', ['a']) batch.send() assert self.cf.get('1') == {'b':'123'} def test_remove_supercolumns(self): batch = self.scf.batch() batch.insert('one', ROWS) batch.insert('two', ROWS) batch.insert('three', ROWS) batch.remove('two', ['b'], '2') batch.send() assert self.scf.get('one') == ROWS assert self.scf.get('two')['2'] == {'a': '234'} assert self.scf.get('three') == ROWS def test_chained(self): batch = self.cf.batch() batch.insert('1', ROWS['1']).insert('2', ROWS['2']).insert('3', ROWS['3']).send() assert self.cf.get('1') == ROWS['1'] assert self.cf.get('2') == ROWS['2'] assert self.cf.get('3') == ROWS['3'] def test_contextmgr(self): if sys.version_info < (2,5): raise SkipTest("No context managers in Python < 2.5") exec """with self.cf.batch(queue_size=2) as b: b.insert('1', ROWS['1']) b.insert('2', ROWS['2']) b.insert('3', ROWS['3']) assert self.cf.get('3') == ROWS['3']""" def test_multi_column_family(self): batch = self.client.batch() cf2 = self.cf batch.insert(self.cf, '1', ROWS['1']) batch.insert(self.cf, '2', ROWS['2']) batch.remove(cf2, '1', ROWS['1']) batch.send() assert self.cf.get('2') == ROWS['2'] assert_raises(NotFoundException, self.cf.get, '1')