def test_null_pool_failover(self): listener = _TestListener() pool = NullPool(keyspace='Keyspace1', credentials=_credentials, listeners=[listener], use_threadlocal=False, server_list=['localhost:9160', 'localhost:9160']) conn = pool.get() cf = ColumnFamily(conn, 'Standard1') for i in range(1,5): setattr(cf.client._connection.client, 'batch_mutate', _timeout) # The first insert attempt should fail, but failover should occur # and the insert should succeed cf.insert('key', {'col': 'val'}) assert_equal(listener.failure_count, i) cf.get('key') pool.dispose() listener.reset() pool = NullPool(keyspace='Keyspace1', credentials=_credentials, listeners=[listener], use_threadlocal=False, server_list=['localhost:9160', 'localhost:9160']) threads = [] args = (pool, 'key', {'col':'val'}) for i in range(0, 5): threads.append(threading.Thread(target=_five_fails, args=args)) threads[-1].start() for thread in threads: thread.join() assert_equal(listener.failure_count, 25) pool.dispose()
class TestDefaultValidators(unittest.TestCase): def setUp(self): credentials = {"username": "******", "password": "******"} self.pool = ConnectionPool(pool_size=5, keyspace="Keyspace1", credentials=credentials) self.cf_def_valid = ColumnFamily(self.pool, "DefaultValidator") def tearDown(self): for key, cols in self.cf_def_valid.get_range(): self.cf_def_valid.remove(key) self.pool.dispose() def test_default_validated_columns(self): key = "key1" col_cf = {"aaaaaa": 1L} col_cm = {"subcol": TIME1} col_ncf = {"aaaaaa": TIME1} col_ncm = {"subcol": 1L} # Both of these inserts work, as cf allows # longs and cm for 'subcol' allows TIMEUUIDs. self.cf_def_valid.insert(key, col_cf) self.cf_def_valid.insert(key, col_cm) assert self.cf_def_valid.get(key) == {"aaaaaa": 1L, "subcol": TIME1} assert_raises(TypeError, self.cf_def_valid.insert, key, col_ncf) assert_raises(TypeError, self.cf_def_valid.insert, key, col_ncm)
class TestSuperColumnFamily: def setUp(self): credentials = {'username': '******', 'password': '******'} self.pool = ConnectionPool(keyspace='Keyspace1', credentials=credentials) self.cf = ColumnFamily(self.pool, 'Super2') def tearDown(self): for key, columns in self.cf.get_range(): self.cf.remove(key) def test_super(self): key = 'TestSuperColumnFamily.test_super' columns = {'1': {'sub1': 'val1', 'sub2': 'val2'}, '2': {'sub3': 'val3', 'sub4': 'val4'}} assert_raises(NotFoundException, self.cf.get, key) self.cf.insert(key, columns) assert_equal(self.cf.get(key), columns) assert_equal(self.cf.multiget([key]), {key: columns}) assert_equal(list(self.cf.get_range(start=key, finish=key)), [(key, columns)]) def test_super_column_argument(self): key = 'TestSuperColumnFamily.test_super_columns_argument' sub12 = {'sub1': 'val1', 'sub2': 'val2'} sub34 = {'sub3': 'val3', 'sub4': 'val4'} columns = {'1': sub12, '2': sub34} self.cf.insert(key, columns) assert_equal(self.cf.get(key, super_column='1'), sub12) assert_raises(NotFoundException, self.cf.get, key, super_column='3') assert_equal(self.cf.multiget([key], super_column='1'), {key: sub12}) assert_equal(list(self.cf.get_range(start=key, finish=key, super_column='1')), [(key, sub12)])
def test_queue_pool_recycle(self): listener = _TestListener() pool = ConnectionPool(pool_size=5, max_overflow=5, recycle=1, prefill=True, pool_timeout=0.5, timeout=1, keyspace='PycassaTestKeyspace', credentials=_credentials, listeners=[listener], use_threadlocal=False) cf = ColumnFamily(pool, 'Standard1') columns = {'col1': 'val', 'col2': 'val'} for i in range(10): cf.insert('key', columns) assert_equal(listener.recycle_count, 5) pool.dispose() listener.reset() # Try with threadlocal=True pool = ConnectionPool(pool_size=5, max_overflow=5, recycle=1, prefill=False, pool_timeout=0.5, timeout=1, keyspace='PycassaTestKeyspace', credentials=_credentials, listeners=[listener], use_threadlocal=True) cf = ColumnFamily(pool, 'Standard1') for i in range(10): cf.insert('key', columns) pool.dispose() assert_equal(listener.recycle_count, 5)
def test_queue_failover(self): for prefill in (True, False): listener = _TestListener() pool = ConnectionPool( pool_size=1, max_overflow=0, recycle=10000, prefill=prefill, timeout=1, keyspace='PycassaTestKeyspace', credentials=_credentials, listeners=[listener], use_threadlocal=False, server_list=['localhost:9160', 'localhost:9160']) cf = ColumnFamily(pool, 'Standard1') for i in range(1, 5): conn = pool.get() setattr(conn, 'send_batch_mutate', conn._fail_once) conn._should_fail = True conn.return_to_pool() # The first insert attempt should fail, but failover should occur # and the insert should succeed cf.insert('key', {'col': 'val%d' % i, 'col2': 'val'}) assert_equal(listener.failure_count, i) assert_equal(cf.get('key'), { 'col': 'val%d' % i, 'col2': 'val' }) pool.dispose()
def test_get_indexed_slices_batching(self): indexed_cf = ColumnFamily(self.client, 'Indexed1') columns = {'birthdate': 1L} for i in range(200): indexed_cf.insert('key%d' % i, columns) expr = index.create_index_expression(column_name='birthdate', value=1L) clause = index.create_index_clause([expr], count=10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=2)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=10)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=77)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=200)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=1000)) assert_equal(len(result), 10) clause = index.create_index_clause([expr], count=250) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=2)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=10)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=77)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=200)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=1000)) assert_equal(len(result), 200)
def test_get_indexed_slices_batching(self): indexed_cf = ColumnFamily(pool, 'Indexed1') columns = {'birthdate': 1L} for i in range(200): indexed_cf.insert('key%d' % i, columns) expr = index.create_index_expression(column_name='birthdate', value=1L) clause = index.create_index_clause([expr], count=10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=2)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=10)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=77)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=200)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=1000)) assert_equal(len(result), 10) clause = index.create_index_clause([expr], count=250) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=2)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=10)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=77)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=200)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=1000)) assert_equal(len(result), 200)
def test_queue_threadlocal_failover(self): stats_logger = StatsLoggerWithListStorage() pool = ConnectionPool(pool_size=1, max_overflow=0, recycle=10000, prefill=True, timeout=0.05, keyspace='PycassaTestKeyspace', credentials=_credentials, listeners=[stats_logger], use_threadlocal=True, server_list=['localhost:9160', 'localhost:9160']) cf = ColumnFamily(pool, 'Standard1') for i in range(1, 5): conn = pool.get() setattr(conn, 'send_batch_mutate', conn._fail_once) conn._should_fail = True conn.return_to_pool() # The first insert attempt should fail, but failover should occur # and the insert should succeed cf.insert('key', {'col': 'val%d' % i, 'col2': 'val'}) assert_equal(stats_logger.stats['failed'], i) assert_equal(cf.get('key'), {'col': 'val%d' % i, 'col2': 'val'}) pool.dispose() stats_logger.reset() pool = ConnectionPool(pool_size=5, max_overflow=5, recycle=10000, prefill=True, timeout=0.05, keyspace='PycassaTestKeyspace', credentials=_credentials, listeners=[stats_logger], use_threadlocal=True, server_list=['localhost:9160', 'localhost:9160']) cf = ColumnFamily(pool, 'Standard1') for i in range(5): conn = pool.get() setattr(conn, 'send_batch_mutate', conn._fail_once) conn._should_fail = True conn.return_to_pool() threads = [] args = ('key', {'col': 'val', 'col2': 'val'}) for i in range(5): threads.append(threading.Thread(target=cf.insert, args=args)) threads[-1].start() for thread in threads: thread.join() assert_equal(stats_logger.stats['failed'], 5) pool.dispose()
def test_queue_pool_recycle(self): listener = _TestListener() pool = QueuePool(pool_size=5, max_overflow=5, recycle=1, prefill=True, pool_timeout=0.5, timeout=1, keyspace='Keyspace1', credentials=_credentials, listeners=[listener], use_threadlocal=False) conn = pool.get() cf = ColumnFamily(conn, 'Standard1') for i in range(10): cf.insert('key', {'col': 'val'}) conn.return_to_pool() assert_equal(listener.recycle_count, 1) pool.dispose() listener.reset() # Try with threadlocal=True pool = QueuePool(pool_size=5, max_overflow=5, recycle=10, prefill=True, pool_timeout=0.5, timeout=1, keyspace='Keyspace1', credentials=_credentials, listeners=[listener], use_threadlocal=True) conn = pool.get() cf = ColumnFamily(conn, 'Standard1') for i in range(10): cf.insert('key', {'col': 'val'}) conn.return_to_pool() assert_equal(listener.recycle_count, 1)
class ActiveCode: def __init__(self): # Connect to Cassandra servers client = connect(cassandra_hosts) self.u = ColumnFamily(client, cassandra_keyspace, 'Users', super=True) self.e = Error() def _gen_active_code(self, length=6, chars=(letters + digits)): return ''.join([choice(chars) for _ in xrange(length)]) def _is_exist(self, code): try: self.u.get('active_codes', super_column=code) return True except NotFoundException: return False def get_new_code(self): new = self._gen_active_code() while self._is_exist(new) is True: new = self._gen_active_code() # store in database self.u.insert('active_codes', {new: {'create_time': str(datetime.now())}}) return new def active(self, username, active_code): if self._is_exist(active_code) is False: return False self.u.insert('active_codes', {active_code: {'active_time': str(datetime.now()), 'owner': str(username)}}) return True def stats(self): return self.u.get('active_codes')
def test_basic_pools(self): pool = ConnectionPool('PycassaTestKeyspace', credentials=_credentials) pool.dispose() pool = pool.recreate() cf = ColumnFamily(pool, 'Standard1') cf.insert('key1', {'col':'val'}) pool.status() pool.dispose()
def test_has_defaults(self): key = uuid.uuid4() ColumnFamily.insert(self.map, key, {'strcol': '1'}) instance = self.map.get(key) assert_equal(instance.intcol, TestUTF8.intcol.default) assert_equal(instance.floatcol, TestUTF8.floatcol.default) assert_equal(instance.datetimecol, TestUTF8.datetimecol.default)
def test_has_defaults(self): key = "TestColumnFamilyMap.test_has_defaults" ColumnFamily.insert(self.map, key, {"strcol": "1"}) instance = self.map.get(key) assert_equal(instance.intcol, TestUTF8.intcol.default) assert_equal(instance.floatcol, TestUTF8.floatcol.default) assert_equal(instance.datetimecol, TestUTF8.datetimecol.default)
def test_basic_pools(self): for pool_cls in _pools: pool = pool_cls(keyspace='Keyspace1', credentials=_credentials) pool.dispose() pool = pool.recreate() cf = ColumnFamily(pool, 'Standard1') cf.insert('key1', {'col':'val'}) pool.status() pool.dispose()
def _five_fails(pool, key, column): conn = pool.get() cf = ColumnFamily(conn, 'Standard1') for i in range(0,5): setattr(cf.client._connection.client, 'batch_mutate', _timeout) # The first insert attempt should fail, but failover should occur # and the insert should succeed cf.insert(key, column) cf.get(key)
def test_basic_pools(self): for pool_cls in _pools: print "Pool class: %s" % pool_cls.__name__ pool = pool_cls(keyspace='Keyspace1', credentials=_credentials) pool.dispose() pool = pool.recreate() conn = pool.get() cf = ColumnFamily(conn, 'Standard1') cf.insert('key1', {'col':'val'}) pool.status() pool.return_conn(conn)
class TestSuperColumnFamilyMap: def setUp(self): self.client = connect_thread_local() self.client.login('Keyspace1', {'username': '******', 'password': '******'}) self.cf = ColumnFamily(self.client, 'Keyspace1', 'Super2', write_consistency_level=ConsistencyLevel.ONE, timestamp=self.timestamp, super=True) self.map = ColumnFamilyMap(TestUTF8, self.cf) try: self.timestamp_n = int(self.cf.get('meta')['meta']['timestamp']) except NotFoundException: self.timestamp_n = 0 self.clear() def tearDown(self): self.cf.insert('meta', {'meta': {'timestamp': str(self.timestamp_n)}}) # Since the timestamp passed to Cassandra will be in the same second # with the default timestamp function, causing problems with removing # and inserting (Cassandra doesn't know which is later), we supply our own def timestamp(self): self.timestamp_n += 1 return self.timestamp_n def clear(self): for key, columns in self.cf.get_range(include_timestamp=True): for subcolumns in columns.itervalues(): for value, timestamp in subcolumns.itervalues(): self.timestamp_n = max(self.timestamp_n, timestamp) self.cf.remove(key) def instance(self, key, super_column): instance = TestUTF8() instance.key = key instance.super_column = super_column instance.strcol = '1' instance.intcol = 2 instance.floatcol = 3.5 instance.datetimecol = datetime.now().replace(microsecond=0) instance.intstrcol = 8 instance.floatstrcol = 4.6 instance.datetimestrcol = datetime.now().replace(microsecond=0) return instance def test_super(self): instance = self.instance('TestSuperColumnFamilyMap.test_super', 'super1') assert_raises(NotFoundException, self.map.get, instance.key) self.map.insert(instance) assert self.map.get(instance.key)[instance.super_column] == instance assert self.map.multiget([instance.key])[instance.key][instance.super_column] == instance assert list(self.map.get_range(start=instance.key, finish=instance.key)) == [{instance.super_column: instance}]
class TestSuperColumnFamilyMap: def setUp(self): credentials = {'username': '******', 'password': '******'} self.client = connect_thread_local('Keyspace1', credentials=credentials) self.cf = ColumnFamily(self.client, 'Super2', write_consistency_level=ConsistencyLevel.ONE, timestamp=self.timestamp, super=True) self.map = ColumnFamilyMap(TestUTF8, self.cf) try: self.timestamp_n = int(self.cf.get('meta')['meta']['timestamp']) except NotFoundException: self.timestamp_n = 0 self.clear() def tearDown(self): self.cf.insert('meta', {'meta': {'timestamp': str(self.timestamp_n)}}) # Since the timestamp passed to Cassandra will be in the same second # with the default timestamp function, causing problems with removing # and inserting (Cassandra doesn't know which is later), we supply our own def timestamp(self): self.timestamp_n += 1 return self.timestamp_n def clear(self): for key, columns in self.cf.get_range(include_timestamp=True): for subcolumns in columns.itervalues(): for value, timestamp in subcolumns.itervalues(): self.timestamp_n = max(self.timestamp_n, timestamp) self.cf.remove(key) def instance(self, key, super_column): instance = TestUTF8() instance.key = key instance.super_column = super_column instance.strcol = '1' instance.intcol = 2 instance.floatcol = 3.5 instance.datetimecol = datetime.now().replace(microsecond=0) instance.intstrcol = 8 instance.floatstrcol = 4.6 instance.datetimestrcol = datetime.now().replace(microsecond=0) return instance def test_super(self): instance = self.instance('TestSuperColumnFamilyMap.test_super', 'super1') assert_raises(NotFoundException, self.map.get, instance.key) self.map.insert(instance) assert self.map.get(instance.key)[instance.super_column] == instance assert self.map.multiget([instance.key])[instance.key][instance.super_column] == instance assert list(self.map.get_range(start=instance.key, finish=instance.key)) == [{instance.super_column: instance}]
class TestBigInt(unittest.TestCase): @classmethod def setup_class(cls): sys = SystemManager() sys.create_column_family(TEST_KS, 'StdInteger', comparator_type=INT_TYPE) @classmethod def teardown_class(cls): sys = SystemManager() sys.drop_column_family(TEST_KS, 'StdInteger') def setUp(self): self.key = 'TestBigInt' self.cf = ColumnFamily(pool, 'StdInteger') def tearDown(self): self.cf.remove(self.key) def test_negative_integers(self): self.cf.insert(self.key, {-1: '-1'}) self.cf.insert(self.key, {-12342390: '-12342390'}) self.cf.insert(self.key, {-255: '-255'}) self.cf.insert(self.key, {-256: '-256'}) self.cf.insert(self.key, {-257: '-257'}) for key, cols in self.cf.get_range(): self.assertEquals(str(cols.keys()[0]), cols.values()[0])
def test_queue_threadlocal_failover(self): listener = _TestListener() pool = ConnectionPool(pool_size=1, max_overflow=0, recycle=10000, prefill=True, timeout=0.05, keyspace='PycassaTestKeyspace', credentials=_credentials, listeners=[listener], use_threadlocal=True, server_list=['localhost:9160', 'localhost:9160']) cf = ColumnFamily(pool, 'Standard1') for i in range(1,5): conn = pool.get() setattr(conn, 'send_batch_mutate', conn._fail_once) conn._should_fail = True conn.return_to_pool() # The first insert attempt should fail, but failover should occur # and the insert should succeed cf.insert('key', {'col': 'val%d' % i, 'col2': 'val'}) assert_equal(listener.failure_count, i) assert_equal(cf.get('key'), {'col': 'val%d' % i, 'col2': 'val'}) pool.dispose() listener.reset() pool = ConnectionPool(pool_size=5, max_overflow=5, recycle=10000, prefill=True, timeout=0.05, keyspace='PycassaTestKeyspace', credentials=_credentials, listeners=[listener], use_threadlocal=True, server_list=['localhost:9160', 'localhost:9160']) cf = ColumnFamily(pool, 'Standard1') for i in range(5): conn = pool.get() setattr(conn, 'send_batch_mutate', conn._fail_once) conn._should_fail = True conn.return_to_pool() threads = [] args=('key', {'col': 'val', 'col2': 'val'}) for i in range(5): threads.append(threading.Thread(target=cf.insert, args=args)) threads[-1].start() for thread in threads: thread.join() assert_equal(listener.failure_count, 5) pool.dispose()
class Database: def __init__(self, column_family): """ ColumnFamily: - Thông tin người dùng - Tìm kiếm - Thông tin vé - Nhật ký hệ thống - Thông tin nhà cung cấp """ # Connect to Cassandra servers client = connect(CASSANDRA_HOSTS) self.db = ColumnFamily(client, CASSANDRA_KEYSPACE, column_family, super=False) def insert(self, key, columns): key = md5(capwords(key).lower()).hexdigest() return self.db.insert(key, columns) def get(self, key, columns=None): key = md5(capwords(key).lower()).hexdigest() return self.db.get(key=key, columns=columns) def remove(self, key, column=None): key = md5(capwords(key).lower()).hexdigest() return self.db.remove(key=key, column=column)
class TestTypeErrors(unittest.TestCase): def test_packing_enabled(self): self.cf = ColumnFamily(pool, 'Standard1') self.cf.insert('key', {'col': 'val'}) assert_raises(TypeError, self.cf.insert, args=('key', {123: 'val'})) assert_raises(TypeError, self.cf.insert, args=('key', {'col': 123})) assert_raises(TypeError, self.cf.insert, args=('key', {123: 123})) self.cf.remove('key') def test_packing_disabled(self): self.cf = ColumnFamily(pool, 'Standard1', autopack_names=False, autopack_values=False) self.cf.insert('key', {'col': 'val'}) assert_raises(TypeError, self.cf.insert, args=('key', {123: 'val'})) assert_raises(TypeError, self.cf.insert, args=('key', {'col': 123})) assert_raises(TypeError, self.cf.insert, args=('key', {123: 123})) self.cf.remove('key')
def fakepopulate(self, numkeyspace, numcolfam, numentries): ''' this method populates fake data in casandra''' countkeys = numkeyspace logging.info("Inside fake populate method") logging.debug("Method started at : %s", str(datetime.now())) #create random keyspace while(countkeys): name = str(uuid1()).replace("-","") if name not in self.sysmanager.list_keyspaces(): countkeys = countkeys - 1 self.sysmanager.create_keyspace(name,strategy_options={"replication_factor": "1"}) #create random key families in each key space for keyspace in self.sysmanager.list_keyspaces(): if (keyspace != 'system'): #check to skip the system database countcolfam = numcolfam while(countcolfam): name = str(uuid1()).replace("-","") if name not in self.sysmanager.get_keyspace_column_families(keyspace).keys(): countcolfam = countcolfam - 1 self.sysmanager.create_column_family(keyspace, name) #create random keys in each key column family of each keyspace totalcount=0 for keyspace in self.sysmanager.list_keyspaces(): if (keyspace != 'system'): #check to skip the system database pool = ConnectionPool(keyspace,[self.address]) columnfamilies = self.sysmanager.get_keyspace_column_families(keyspace) for columnfamilyname in columnfamilies.keys(): colfamily = ColumnFamily(pool,columnfamilyname) countnumetries = numentries while(countnumetries): rowkeyname = str(uuid1()).replace("-","") colname = str(uuid1()).replace("-","") colval = str(uuid1()).replace("-","") #col_fam.insert('row_key', {'col_name': 'col_val'}) colfamily.insert(rowkeyname, {colname:colval}) countnumetries = countnumetries - 1 totalcount = totalcount + 1 logging.debug("Method ended at : %s", str(datetime.now())) logging.debug("Total Entries added : %s", str(totalcount)) logging.info("Exiting fakepopulate method")
def test_assertion_threadlocal_failover(self): listener = _TestListener() pool = AssertionPool(keyspace='Keyspace1', credentials=_credentials, listeners=[listener], use_threadlocal=False, server_list=['localhost:9160', 'localhost:9160']) conn = pool.get() cf = ColumnFamily(conn, 'Standard1') for i in range(1,5): setattr(cf.client._connection.client, 'batch_mutate', _timeout) # The first insert attempt should fail, but failover should occur # and the insert should succeed cf.insert('key', {'col': 'val'}) assert_equal(listener.failure_count, i) cf.get('key') pool.dispose()
def insert_insert_get_indexed_slices(self): indexed_cf = ColumnFamily(pool, "Indexed1") columns = {"birthdate": 1L} keys = [] for i in range(1, 4): indexed_cf.insert("key%d" % i, columns) keys.append("key%d") expr = index.create_index_expression(column_name="birthdate", value=1L) clause = index.create_index_clause([expr]) count = 0 for key, cols in indexed_cf.get_indexed_slices(clause): assert_equal(cols, columns) assert key in keys count += 1 assert_equal(count, 3)
def insert_insert_get_indexed_slices(self): indexed_cf = ColumnFamily(self.client, 'Indexed1') columns = {'birthdate': 1L} keys = [] for i in range(1,4): indexed_cf.insert('key%d' % i, columns) keys.append('key%d') expr = index.create_index_expression(column_name='birthdate', value=1L) clause = index.create_index_clause([expr]) count = 0 for key,cols in indexed_cf.get_indexed_slices(clause): assert cols == columns assert key in keys count += 1 assert_equal(count, 3)
def insert_insert_get_indexed_slices(self): indexed_cf = ColumnFamily(pool, 'Indexed1') columns = {'birthdate': 1L} keys = [] for i in range(1, 4): indexed_cf.insert('key%d' % i, columns) keys.append('key%d') expr = index.create_index_expression(column_name='birthdate', value=1L) clause = index.create_index_clause([expr]) count = 0 for key, cols in indexed_cf.get_indexed_slices(clause): assert_equal(cols, columns) assert key in keys count += 1 assert_equal(count, 3)
def test_default_validated_columns(self): sys = SystemManager() sys.create_column_family(TEST_KS, 'DefaultValidator', default_validation_class=LONG_TYPE) sys.alter_column(TEST_KS, 'DefaultValidator', 'subcol', TIME_UUID_TYPE) sys.close() cf = ColumnFamily(pool, 'DefaultValidator') key = 'key1' col_cf = {'aaaaaa': 1L} col_cm = {'subcol': TIME1} col_ncf = {'aaaaaa': TIME1} col_ncm = {'subcol': 1L} # Both of these inserts work, as cf allows # longs and cm for 'subcol' allows TIMEUUIDs. cf.insert(key, col_cf) cf.insert(key, col_cm) assert_equal(cf.get(key), {'aaaaaa': 1L, 'subcol': TIME1})
def test_insert_get_indexed_slices(self): indexed_cf = ColumnFamily(self.client, 'Indexed1') columns = {'birthdate': 1L} key = 'key1' indexed_cf.insert(key, columns, write_consistency_level=ConsistencyLevel.ONE) key = 'key2' indexed_cf.insert(key, columns, write_consistency_level=ConsistencyLevel.ONE) key = 'key3' indexed_cf.insert(key, columns, write_consistency_level=ConsistencyLevel.ONE) expr = index.create_index_expression(column_name='birthdate', value=1L) clause = index.create_index_clause([expr]) result = indexed_cf.get_indexed_slices(clause) assert len(result) == 3 assert result.get('key1') == columns assert result.get('key2') == columns assert result.get('key3') == columns
def test_queue_failover(self): for prefill in (True, False): listener = _TestListener() pool = ConnectionPool(pool_size=1, max_overflow=0, recycle=10000, prefill=prefill, timeout=1, keyspace='PycassaTestKeyspace', credentials=_credentials, listeners=[listener], use_threadlocal=False, server_list=['localhost:9160', 'localhost:9160']) cf = ColumnFamily(pool, 'Standard1') for i in range(1,5): conn = pool.get() setattr(conn, 'send_batch_mutate', conn._fail_once) conn._should_fail = True conn.return_to_pool() # The first insert attempt should fail, but failover should occur # and the insert should succeed cf.insert('key', {'col': 'val%d' % i, 'col2': 'val'}) assert_equal(listener.failure_count, i) assert_equal(cf.get('key'), {'col': 'val%d' % i, 'col2': 'val'}) pool.dispose()
class TestSuperColumnFamily: def setUp(self): credentials = {'username': '******', 'password': '******'} self.client = connect_thread_local('Keyspace1', credentials=credentials) self.cf = ColumnFamily(self.client, 'Super2', write_consistency_level=ConsistencyLevel.ONE, buffer_size=2, timestamp=self.timestamp, super=True) try: self.timestamp_n = int(self.cf.get('meta')['meta']['timestamp']) except NotFoundException: self.timestamp_n = 0 self.clear() def tearDown(self): self.cf.insert('meta', {'meta': {'timestamp': str(self.timestamp_n)}}) # Since the timestamp passed to Cassandra will be in the same second # with the default timestamp function, causing problems with removing # and inserting (Cassandra doesn't know which is later), we supply our own def timestamp(self): self.timestamp_n += 1 return self.timestamp_n def clear(self): for key, columns in self.cf.get_range(include_timestamp=True): for subcolumns in columns.itervalues(): for value, timestamp in subcolumns.itervalues(): self.timestamp_n = max(self.timestamp_n, timestamp) self.cf.remove(key) def test_super(self): key = 'TestSuperColumnFamily.test_super' columns = {'1': {'sub1': 'val1', 'sub2': 'val2'}, '2': {'sub3': 'val3', 'sub4': 'val4'}} assert_raises(NotFoundException, self.cf.get, key) self.cf.insert(key, columns) assert self.cf.get(key) == columns assert self.cf.multiget([key]) == {key: columns} assert list(self.cf.get_range(start=key, finish=key)) == [(key, columns)] def test_super_column_argument(self): key = 'TestSuperColumnFamily.test_super_columns_argument' sub12 = {'sub1': 'val1', 'sub2': 'val2'} sub34 = {'sub3': 'val3', 'sub4': 'val4'} columns = {'1': sub12, '2': sub34} self.cf.insert(key, columns) assert self.cf.get(key, super_column='1') == sub12 assert_raises(NotFoundException, self.cf.get, key, super_column='3') assert self.cf.multiget([key], super_column='1') == {key: sub12} assert list(self.cf.get_range(start=key, finish=key, super_column='1')) == [(key, sub12)]
class TestSuperColumnFamily: def setUp(self): self.client = connect_thread_local() self.client.login('Keyspace1', {'username': '******', 'password': '******'}) self.cf = ColumnFamily(self.client, 'Keyspace1', 'Super2', write_consistency_level=ConsistencyLevel.ONE, buffer_size=2, timestamp=self.timestamp, super=True) try: self.timestamp_n = int(self.cf.get('meta')['meta']['timestamp']) except NotFoundException: self.timestamp_n = 0 self.clear() def tearDown(self): self.cf.insert('meta', {'meta': {'timestamp': str(self.timestamp_n)}}) # Since the timestamp passed to Cassandra will be in the same second # with the default timestamp function, causing problems with removing # and inserting (Cassandra doesn't know which is later), we supply our own def timestamp(self): self.timestamp_n += 1 return self.timestamp_n def clear(self): for key, columns in self.cf.get_range(include_timestamp=True): for subcolumns in columns.itervalues(): for value, timestamp in subcolumns.itervalues(): self.timestamp_n = max(self.timestamp_n, timestamp) self.cf.remove(key) def test_super(self): key = 'TestSuperColumnFamily.test_super' columns = {'1': {'sub1': 'val1', 'sub2': 'val2'}, '2': {'sub3': 'val3', 'sub4': 'val4'}} assert_raises(NotFoundException, self.cf.get, key) self.cf.insert(key, columns) assert self.cf.get(key) == columns assert self.cf.multiget([key]) == {key: columns} assert list(self.cf.get_range(start=key, finish=key)) == [(key, columns)] def test_super_column_argument(self): key = 'TestSuperColumnFamily.test_super_columns_argument' sub12 = {'sub1': 'val1', 'sub2': 'val2'} sub34 = {'sub3': 'val3', 'sub4': 'val4'} columns = {'1': sub12, '2': sub34} self.cf.insert(key, columns) assert self.cf.get(key, super_column='1') == sub12 assert_raises(NotFoundException, self.cf.get, key, super_column='3') assert self.cf.multiget([key], super_column='1') == {key: sub12} assert list(self.cf.get_range(start=key, finish=key, super_column='1')) == [(key, sub12)]
class TestTimeUUIDs(unittest.TestCase): def setUp(self): self.cf_time = ColumnFamily(pool, 'StdTimeUUID') def tearDown(self): self.cf_time.remove('key1') def test_datetime_to_uuid(self): key = 'key1' timeline = [] timeline.append(datetime.now()) time1 = uuid1() col1 = {time1: '0'} self.cf_time.insert(key, col1) time.sleep(1) timeline.append(datetime.now()) time2 = uuid1() col2 = {time2: '1'} self.cf_time.insert(key, col2) time.sleep(1) timeline.append(datetime.now()) cols = {time1: '0', time2: '1'} assert_equal(self.cf_time.get(key, column_start=timeline[0]), cols) assert_equal(self.cf_time.get(key, column_finish=timeline[2]), cols) assert_equal( self.cf_time.get(key, column_start=timeline[0], column_finish=timeline[2]), cols) assert_equal( self.cf_time.get(key, column_start=timeline[0], column_finish=timeline[2]), cols) assert_equal( self.cf_time.get(key, column_start=timeline[0], column_finish=timeline[1]), col1) assert_equal( self.cf_time.get(key, column_start=timeline[1], column_finish=timeline[2]), col2) def test_time_to_uuid(self): key = 'key1' timeline = [] timeline.append(time.time()) time1 = uuid1() col1 = {time1: '0'} self.cf_time.insert(key, col1) time.sleep(0.1) timeline.append(time.time()) time2 = uuid1() col2 = {time2: '1'} self.cf_time.insert(key, col2) time.sleep(0.1) timeline.append(time.time()) cols = {time1: '0', time2: '1'} assert_equal(self.cf_time.get(key, column_start=timeline[0]), cols) assert_equal(self.cf_time.get(key, column_finish=timeline[2]), cols) assert_equal( self.cf_time.get(key, column_start=timeline[0], column_finish=timeline[2]), cols) assert_equal( self.cf_time.get(key, column_start=timeline[0], column_finish=timeline[2]), cols) assert_equal( self.cf_time.get(key, column_start=timeline[0], column_finish=timeline[1]), col1) assert_equal( self.cf_time.get(key, column_start=timeline[1], column_finish=timeline[2]), col2) def test_auto_time_to_uuid1(self): key = 'key1' t = time.time() col = {t: 'foo'} self.cf_time.insert(key, col) uuid_res = self.cf_time.get(key).keys()[0] timestamp = convert_uuid_to_time(uuid_res) assert_almost_equal(timestamp, t, places=3)
class TestColumnFamilyMap: def setUp(self): self.client = connect() self.client.login('Keyspace1', { 'username': '******', 'password': '******' }) self.cf = ColumnFamily(self.client, 'Keyspace1', 'Standard2', write_consistency_level=ConsistencyLevel.ONE, timestamp=self.timestamp) self.map = ColumnFamilyMap(TestUTF8, self.cf) self.empty_map = ColumnFamilyMap(TestEmpty, self.cf, raw_columns=True) try: self.timestamp_n = int(self.cf.get('meta')['timestamp']) except NotFoundException: self.timestamp_n = 0 self.clear() def tearDown(self): self.cf.insert('meta', {'timestamp': str(self.timestamp_n)}) # Since the timestamp passed to Cassandra will be in the same second # with the default timestamp function, causing problems with removing # and inserting (Cassandra doesn't know which is later), we supply our own def timestamp(self): self.timestamp_n += 1 return self.timestamp_n def clear(self): for key, columns in self.cf.get_range(include_timestamp=True): for value, timestamp in columns.itervalues(): self.timestamp_n = max(self.timestamp_n, timestamp) self.cf.remove(key) def instance(self, key): instance = TestUTF8() instance.key = key instance.strcol = '1' instance.intcol = 2 instance.floatcol = 3.5 instance.datetimecol = datetime.now().replace(microsecond=0) instance.intstrcol = 8 instance.floatstrcol = 4.6 instance.datetimestrcol = datetime.now().replace(microsecond=0) return instance def test_will_not_insert_none(self): for column in ('strcol', 'intcol', 'floatcol', 'datetimecol', 'intstrcol', 'floatstrcol', 'datetimestrcol'): instance = self.instance( 'TestColumnFamilyMap.test_will_not_insert_none') setattr(instance, column, None) assert_raises(TypeError, self.map.insert, instance) def test_empty(self): key = 'TestColumnFamilyMap.test_empty' assert_raises(NotFoundException, self.map.get, key) assert len(self.map.multiget([key])) == 0 def test_insert_get(self): instance = self.instance('TestColumnFamilyMap.test_insert_get') assert_raises(NotFoundException, self.map.get, instance.key) self.map.insert(instance) assert self.map.get(instance.key) == instance assert self.empty_map.get( instance.key).raw_columns['intstrcol'] == str(instance.intstrcol) def test_insert_multiget(self): instance1 = self.instance('TestColumnFamilyMap.test_insert_multiget1') instance2 = self.instance('TestColumnFamilyMap.test_insert_multiget2') missing_key = 'TestColumnFamilyMap.test_insert_multiget3' self.map.insert(instance1) self.map.insert(instance2) rows = self.map.multiget([instance1.key, instance2.key, missing_key]) assert len(rows) == 2 assert rows[instance1.key] == instance1 assert rows[instance2.key] == instance2 assert missing_key not in rows assert self.empty_map.multiget([ instance1.key ])[instance1.key].raw_columns['intstrcol'] == str(instance1.intstrcol) def test_insert_get_count(self): instance = self.instance('TestColumnFamilyMap.test_insert_get_count') self.map.insert(instance) assert self.map.get_count(instance.key) == 7 def test_insert_get_range(self): instances = [] for i in xrange(5): instance = self.instance( 'TestColumnFamilyMap.test_insert_get_range%s' % i) instances.append(instance) for instance in instances: self.map.insert(instance) rows = list( self.map.get_range(start=instances[0].key, finish=instances[-1].key)) assert len(rows) == len(instances) assert rows == instances assert list( self.empty_map.get_range( start=instances[0].key, finish=instances[0].key))[0].raw_columns['intstrcol'] == str( instances[0].intstrcol) def test_remove(self): instance = self.instance('TestColumnFamilyMap.test_remove') self.map.insert(instance) self.map.remove(instance) assert_raises(NotFoundException, self.map.get, instance.key) def test_does_not_insert_extra_column(self): instance = self.instance( 'TestColumnFamilyMap.test_does_not_insert_extra_column') instance.othercol = 'Test' self.map.insert(instance) get_instance = self.map.get(instance.key) assert get_instance.strcol == instance.strcol assert get_instance.intcol == instance.intcol assert get_instance.floatcol == instance.floatcol assert get_instance.datetimecol == instance.datetimecol assert_raises(AttributeError, getattr, get_instance, 'othercol') def test_has_defaults(self): key = 'TestColumnFamilyMap.test_has_defaults' self.cf.insert(key, {'strcol': '1'}) instance = self.map.get(key) assert instance.intcol == TestUTF8.intcol.default assert instance.floatcol == TestUTF8.floatcol.default assert instance.datetimecol == TestUTF8.datetimecol.default assert instance.intstrcol == TestUTF8.intstrcol.default assert instance.floatstrcol == TestUTF8.floatstrcol.default assert instance.datetimestrcol == TestUTF8.datetimestrcol.default
def test_validated_columns(self): sys = SystemManager() sys.create_column_family( TEST_KS, 'Validators', ) sys.alter_column(TEST_KS, 'Validators', 'long', LONG_TYPE) sys.alter_column(TEST_KS, 'Validators', 'int', INT_TYPE) sys.alter_column(TEST_KS, 'Validators', 'time', TIME_UUID_TYPE) sys.alter_column(TEST_KS, 'Validators', 'lex', LEXICAL_UUID_TYPE) sys.alter_column(TEST_KS, 'Validators', 'ascii', ASCII_TYPE) sys.alter_column(TEST_KS, 'Validators', 'utf8', UTF8_TYPE) sys.alter_column(TEST_KS, 'Validators', 'bytes', BYTES_TYPE) sys.close() cf = ColumnFamily(pool, 'Validators') key = 'key1' col = {'long': 1L} cf.insert(key, col) assert_equal(cf.get(key)['long'], 1L) col = {'int': 1} cf.insert(key, col) assert_equal(cf.get(key)['int'], 1) col = {'time': TIME1} cf.insert(key, col) assert_equal(cf.get(key)['time'], TIME1) col = {'lex': uuid.UUID(bytes='aaa aaa aaa aaaa')} cf.insert(key, col) assert_equal(cf.get(key)['lex'], uuid.UUID(bytes='aaa aaa aaa aaaa')) col = {'ascii': 'aaa'} cf.insert(key, col) assert_equal(cf.get(key)['ascii'], 'aaa') col = {'utf8': u'a\u0020'} cf.insert(key, col) assert_equal(cf.get(key)['utf8'], u'a\u0020') col = {'bytes': 'aaa'} cf.insert(key, col) assert_equal(cf.get(key)['bytes'], 'aaa') cf.remove(key)
class CassandraCache(CacheUtils): permanent = True """A cache that uses a Cassandra ColumnFamily. Uses only the column-name 'value'""" def __init__(self, column_family, client, read_consistency_level=CL_ONE, write_consistency_level=CL_QUORUM): self.column_family = column_family self.client = client self.read_consistency_level = read_consistency_level self.write_consistency_level = write_consistency_level self.cf = ColumnFamily(self.client, self.column_family, read_consistency_level=read_consistency_level, write_consistency_level=write_consistency_level) def _rcl(self, alternative): return (alternative if alternative is not None else self.cf.read_consistency_level) def _wcl(self, alternative): return (alternative if alternative is not None else self.cf.write_consistency_level) def get(self, key, default=None, read_consistency_level=None): try: rcl = self._rcl(read_consistency_level) row = self.cf.get(key, columns=['value'], read_consistency_level=rcl) return pickle.loads(row['value']) except (CassandraNotFound, KeyError): return default def simple_get_multi(self, keys, read_consistency_level=None): rcl = self._rcl(read_consistency_level) rows = self.cf.multiget(list(keys), columns=['value'], read_consistency_level=rcl) return dict((key, pickle.loads(row['value'])) for (key, row) in rows.iteritems()) def set(self, key, val, write_consistency_level=None, time=None): if val == NoneResult: # NoneResult caching is for other parts of the chain return wcl = self._wcl(write_consistency_level) ret = self.cf.insert(key, {'value': pickle.dumps(val)}, write_consistency_level=wcl, ttl=time) self._warm([key]) return ret def set_multi(self, keys, prefix='', write_consistency_level=None, time=None): if not isinstance(keys, dict): # allow iterables yielding tuples keys = dict(keys) wcl = self._wcl(write_consistency_level) ret = {} with self.cf.batch(write_consistency_level=wcl): for key, val in keys.iteritems(): if val != NoneResult: ret[key] = self.cf.insert('%s%s' % (prefix, key), {'value': pickle.dumps(val)}, ttl=time) self._warm(keys.keys()) return ret def _warm(self, keys): import random if False and random.random() > 0.98: print 'Warming', keys self.cf.multiget(keys) def delete(self, key, write_consistency_level=None): wcl = self._wcl(write_consistency_level) self.cf.remove(key, write_consistency_level=wcl)
import ccmlib.cluster from pycassa import SystemManager, ConnectionPool, ColumnFamily, SIMPLE_STRATEGY, cassandra values = list(map(str, range(10))) keyspace, table = 'test_keyspace', 'test_table' cluster = ccmlib.cluster.Cluster('.', 'thrift-test', partitioner='RandomPartitioner', cassandra_version='3.11.2') cluster.set_configuration_options({'start_rpc': 'true', 'hinted_handoff_enabled': 'false'}) cluster.populate(3, debug=True).start() sys = SystemManager('127.0.0.1') sys.create_keyspace(keyspace, SIMPLE_STRATEGY, {'replication_factor': '3'}) sys.create_column_family(keyspace, table) # Imitate temporary node unavailability that cause inconsistency in data across nodes failing_node = cluster.nodelist()[2] failing_node.stop() cf = ColumnFamily(ConnectionPool(keyspace, server_list=['127.0.0.1']), table) for value in values: cf.insert(value, {'value': value}, write_consistency_level=cassandra.ttypes.ConsistencyLevel.QUORUM) failing_node.start() failing_node.wait_for_thrift_interface() cf = ColumnFamily(ConnectionPool(keyspace, server_list=['127.0.0.3']), table) # Returns many Nones records print(cf.multiget(values, read_consistency_level=cassandra.ttypes.ConsistencyLevel.QUORUM).values()) cf = ColumnFamily(ConnectionPool(keyspace, server_list=['127.0.0.1']), table) # All returned records are fine print(cf.multiget(values, read_consistency_level=cassandra.ttypes.ConsistencyLevel.ONE).values()) cluster.stop()
class TestColumnFamily: def setUp(self): credentials = {'username': '******', 'password': '******'} self.client = connect('Keyspace1', credentials=credentials) self.cf = ColumnFamily(self.client, 'Standard2', write_consistency_level=ConsistencyLevel.ONE, buffer_size=2, timestamp=self.timestamp, dict_class=TestDict) try: self.timestamp_n = int(self.cf.get('meta')['timestamp']) except NotFoundException: self.timestamp_n = 0 self.clear() def tearDown(self): self.cf.insert('meta', {'timestamp': str(self.timestamp_n)}) # Since the timestamp passed to Cassandra will be in the same second # with the default timestamp function, causing problems with removing # and inserting (Cassandra doesn't know which is later), we supply our own def timestamp(self): self.timestamp_n += 1 return self.timestamp_n def clear(self): for key, columns in self.cf.get_range(include_timestamp=True): for value, timestamp in columns.itervalues(): self.timestamp_n = max(self.timestamp_n, timestamp) self.cf.remove(key) def test_empty(self): key = 'TestColumnFamily.test_empty' assert_raises(NotFoundException, self.cf.get, key) assert len(self.cf.multiget([key])) == 0 for key, columns in self.cf.get_range(): assert len(columns) == 0 def test_insert_get(self): key = 'TestColumnFamily.test_insert_get' columns = {'1': 'val1', '2': 'val2'} assert_raises(NotFoundException, self.cf.get, key) self.cf.insert(key, columns) assert self.cf.get(key) == columns def test_insert_multiget(self): key1 = 'TestColumnFamily.test_insert_multiget1' columns1 = {'1': 'val1', '2': 'val2'} key2 = 'test_insert_multiget1' columns2 = {'3': 'val1', '4': 'val2'} missing_key = 'key3' self.cf.insert(key1, columns1) self.cf.insert(key2, columns2) rows = self.cf.multiget([key1, key2, missing_key]) assert len(rows) == 2 assert rows[key1] == columns1 assert rows[key2] == columns2 assert missing_key not in rows def test_insert_get_count(self): key = 'TestColumnFamily.test_insert_get_count' columns = {'1': 'val1', '2': 'val2'} self.cf.insert(key, columns) assert self.cf.get_count(key) == 2 assert_equal(self.cf.get_count(key, column_start='1'), 2) assert_equal(self.cf.get_count(key, column_finish='2'), 2) assert_equal( self.cf.get_count(key, column_start='1', column_finish='2'), 2) assert_equal( self.cf.get_count(key, column_start='1', column_finish='1'), 1) assert_equal(self.cf.get_count(key, columns=['1', '2']), 2) assert_equal(self.cf.get_count(key, columns=['1']), 1) def test_insert_multiget_count(self): keys = [ 'TestColumnFamily.test_insert_multiget_count1', 'TestColumnFamily.test_insert_multiget_count2', 'TestColumnFamily.test_insert_multiget_count3' ] columns = {'1': 'val1', '2': 'val2'} for key in keys: self.cf.insert(key, columns) result = self.cf.multiget_count(keys) assert_equal(result[keys[0]], 2) assert_equal(result[keys[1]], 2) assert_equal(result[keys[2]], 2) result = self.cf.multiget_count(keys, column_start='1') assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, column_finish='2') assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, column_start='1', column_finish='2') assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, column_start='1', column_finish='1') assert_equal(len(result), 3) assert_equal(result[keys[0]], 1) result = self.cf.multiget_count(keys, columns=['1', '2']) assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, columns=['1']) assert_equal(len(result), 3) assert_equal(result[keys[0]], 1) def test_insert_get_range(self): keys = [ 'TestColumnFamily.test_insert_get_range%s' % i for i in xrange(5) ] columns = {'1': 'val1', '2': 'val2'} for key in keys: self.cf.insert(key, columns) rows = list(self.cf.get_range(start=keys[0], finish=keys[-1])) assert len(rows) == len(keys) for i, (k, c) in enumerate(rows): assert k == keys[i] assert c == columns def test_insert_get_indexed_slices(self): indexed_cf = ColumnFamily(self.client, 'Indexed1') columns = {'birthdate': 1L} key = 'key1' indexed_cf.insert(key, columns, write_consistency_level=ConsistencyLevel.ONE) key = 'key2' indexed_cf.insert(key, columns, write_consistency_level=ConsistencyLevel.ONE) key = 'key3' indexed_cf.insert(key, columns, write_consistency_level=ConsistencyLevel.ONE) expr = index.create_index_expression(column_name='birthdate', value=1L) clause = index.create_index_clause([expr]) result = indexed_cf.get_indexed_slices(clause) assert len(result) == 3 assert result.get('key1') == columns assert result.get('key2') == columns assert result.get('key3') == columns def test_remove(self): key = 'TestColumnFamily.test_remove' columns = {'1': 'val1', '2': 'val2'} self.cf.insert(key, columns) self.cf.remove(key, columns=['2']) del columns['2'] assert self.cf.get(key) == {'1': 'val1'} self.cf.remove(key) assert_raises(NotFoundException, self.cf.get, key) def test_dict_class(self): key = 'TestColumnFamily.test_dict_class' self.cf.insert(key, {'1': 'val1'}) assert isinstance(self.cf.get(key), TestDict)
class CassandraCache(CacheUtils): permanent = True """A cache that uses a Cassandra ColumnFamily. Uses only the column-name 'value'""" def __init__(self, column_family, client, read_consistency_level = CL_ONE, write_consistency_level = CL_QUORUM): self.column_family = column_family self.client = client self.read_consistency_level = read_consistency_level self.write_consistency_level = write_consistency_level self.cf = ColumnFamily(self.client, self.column_family, read_consistency_level = read_consistency_level, write_consistency_level = write_consistency_level) def _rcl(self, alternative): return (alternative if alternative is not None else self.cf.read_consistency_level) def _wcl(self, alternative): return (alternative if alternative is not None else self.cf.write_consistency_level) def get(self, key, default = None, read_consistency_level = None): try: rcl = self._rcl(read_consistency_level) row = self.cf.get(key, columns=['value'], read_consistency_level = rcl) return pickle.loads(row['value']) except (CassandraNotFound, KeyError): return default def simple_get_multi(self, keys, read_consistency_level = None): rcl = self._rcl(read_consistency_level) rows = self.cf.multiget(list(keys), columns=['value'], read_consistency_level = rcl) return dict((key, pickle.loads(row['value'])) for (key, row) in rows.iteritems()) def set(self, key, val, write_consistency_level = None, time = None): if val == NoneResult: # NoneResult caching is for other parts of the chain return wcl = self._wcl(write_consistency_level) ret = self.cf.insert(key, {'value': pickle.dumps(val)}, write_consistency_level = wcl, ttl = time) self._warm([key]) return ret def set_multi(self, keys, prefix='', write_consistency_level = None, time = None): if not isinstance(keys, dict): # allow iterables yielding tuples keys = dict(keys) wcl = self._wcl(write_consistency_level) ret = {} with self.cf.batch(write_consistency_level = wcl): for key, val in keys.iteritems(): if val != NoneResult: ret[key] = self.cf.insert('%s%s' % (prefix, key), {'value': pickle.dumps(val)}, ttl = time or None) self._warm(keys.keys()) return ret def _warm(self, keys): import random if False and random.random() > 0.98: print 'Warming', keys self.cf.multiget(keys) def delete(self, key, write_consistency_level = None): wcl = self._wcl(write_consistency_level) self.cf.remove(key, write_consistency_level = wcl)
def insert(self, colfam, key, data, ttl=None): cf = ColumnFamily(self.db, colfam) return cf.insert(key, data, ttl=ttl)
class TestAutoPacking: def setUp(self): credentials = {'username': '******', 'password': '******'} self.client = connect_thread_local('Keyspace1', credentials=credentials) self.cf = ColumnFamily(self.client, 'Standard2') self.cf_long = ColumnFamily(self.client, 'StdLong') self.cf_int = ColumnFamily(self.client, 'StdInteger') self.cf_time = ColumnFamily(self.client, 'StdTimeUUID') self.cf_lex = ColumnFamily(self.client, 'StdLexicalUUID') self.cf_ascii = ColumnFamily(self.client, 'StdAscii') self.cf_utf8 = ColumnFamily(self.client, 'StdUTF8') self.cf_bytes = ColumnFamily(self.client, 'StdBytes') self.cf_suplong = ColumnFamily(self.client, 'SuperLong', super=True) self.cf_supint = ColumnFamily(self.client, 'SuperInt', super=True) self.cf_suptime = ColumnFamily(self.client, 'SuperTime', super=True) self.cf_suplex = ColumnFamily(self.client, 'SuperLex', super=True) self.cf_supascii = ColumnFamily(self.client, 'SuperAscii', super=True) self.cf_suputf8 = ColumnFamily(self.client, 'SuperUTF8', super=True) self.cf_supbytes = ColumnFamily(self.client, 'SuperBytes', super=True) self.cf_suplong_sublong = ColumnFamily(self.client, 'SuperLongSubLong', super=True) self.cf_suplong_subint = ColumnFamily(self.client, 'SuperLongSubInt', super=True) self.cf_suplong_subtime = ColumnFamily(self.client, 'SuperLongSubTime', super=True) self.cf_suplong_sublex = ColumnFamily(self.client, 'SuperLongSubLex', super=True) self.cf_suplong_subascii = ColumnFamily(self.client, 'SuperLongSubAscii', super=True) self.cf_suplong_subutf8 = ColumnFamily(self.client, 'SuperLongSubUTF8', super=True) self.cf_suplong_subbytes = ColumnFamily(self.client, 'SuperLongSubBytes', super=True) self.cf_valid_long = ColumnFamily(self.client, 'ValidatorLong') self.cf_valid_int = ColumnFamily(self.client, 'ValidatorInt') self.cf_valid_time = ColumnFamily(self.client, 'ValidatorTime') self.cf_valid_lex = ColumnFamily(self.client, 'ValidatorLex') self.cf_valid_ascii = ColumnFamily(self.client, 'ValidatorAscii') self.cf_valid_utf8 = ColumnFamily(self.client, 'ValidatorUTF8') self.cf_valid_bytes = ColumnFamily(self.client, 'ValidatorBytes') self.cf_def_valid = ColumnFamily(self.client, 'DefaultValidator') self.cfs = [ self.cf_long, self.cf_int, self.cf_time, self.cf_lex, self.cf_ascii, self.cf_utf8, self.cf_bytes, # self.cf_suplong, self.cf_supint, self.cf_suptime, self.cf_suplex, self.cf_supascii, self.cf_suputf8, self.cf_supbytes, # self.cf_suplong_subint, self.cf_suplong_subint, self.cf_suplong_subtime, self.cf_suplong_sublex, self.cf_suplong_subascii, self.cf_suplong_subutf8, self.cf_suplong_subbytes, # self.cf_valid_long, self.cf_valid_int, self.cf_valid_time, self.cf_valid_lex, self.cf_valid_ascii, self.cf_valid_utf8, self.cf_valid_bytes, # self.cf_def_valid, ] try: self.timestamp_n = int(self.cf.get('meta')['timestamp']) except NotFoundException: self.timestamp_n = 0 self.clear() def tearDown(self): self.cf.insert('meta', {'timestamp': str(self.timestamp_n)}) def timestamp(self): self.timestamp_n += 1 return self.timestamp_n def clear(self): for key, columns in self.cf.get_range(include_timestamp=True): for value, timestamp in columns.itervalues(): self.timestamp_n = max(self.timestamp_n, timestamp) self.cf.remove(key) for cf in self.cfs: for key, columns in cf.get_range(): cf.remove(key) def test_basic_inserts(self): long_col = {1111111111111111L: VALS[0]} int_col = {1: VALS[0]} time_col = {TIME1: VALS[0]} lex_col = {uuid.UUID(bytes='abc abc abc abcd'): VALS[0]} ascii_col = {'foo': VALS[0]} utf8_col = {u'\u0020': VALS[0]} bytes_col = {'bytes': VALS[0]} self.cf_long.insert(KEYS[0], long_col) self.cf_int.insert(KEYS[0], int_col) self.cf_time.insert(KEYS[0], time_col) self.cf_lex.insert(KEYS[0], lex_col) self.cf_ascii.insert(KEYS[0], ascii_col) self.cf_utf8.insert(KEYS[0], utf8_col) self.cf_bytes.insert(KEYS[0], bytes_col) assert self.cf_long.get(KEYS[0]) == long_col assert self.cf_int.get(KEYS[0]) == int_col assert self.cf_time.get(KEYS[0]) == time_col assert self.cf_lex.get(KEYS[0]) == lex_col assert self.cf_ascii.get(KEYS[0]) == ascii_col assert self.cf_utf8.get(KEYS[0]) == utf8_col assert self.cf_bytes.get(KEYS[0]) == bytes_col self.cf_suplong.insert(KEYS[0], {123L: bytes_col})
class TestColumnFamily: def setUp(self): credentials = {'username': '******', 'password': '******'} self.client = connect('Keyspace1', credentials=credentials) self.cf = ColumnFamily(self.client, 'Standard2', write_consistency_level=ConsistencyLevel.ONE, buffer_size=2, timestamp=self.timestamp, dict_class=TestDict) try: self.timestamp_n = int(self.cf.get('meta')['timestamp']) except NotFoundException: self.timestamp_n = 0 self.clear() def tearDown(self): self.cf.insert('meta', {'timestamp': str(self.timestamp_n)}) # Since the timestamp passed to Cassandra will be in the same second # with the default timestamp function, causing problems with removing # and inserting (Cassandra doesn't know which is later), we supply our own def timestamp(self): self.timestamp_n += 1 return self.timestamp_n def clear(self): for key, columns in self.cf.get_range(include_timestamp=True): for value, timestamp in columns.itervalues(): self.timestamp_n = max(self.timestamp_n, timestamp) self.cf.remove(key) def test_empty(self): key = 'TestColumnFamily.test_empty' assert_raises(NotFoundException, self.cf.get, key) assert len(self.cf.multiget([key])) == 0 for key, columns in self.cf.get_range(): assert len(columns) == 0 def test_insert_get(self): key = 'TestColumnFamily.test_insert_get' columns = {'1': 'val1', '2': 'val2'} assert_raises(NotFoundException, self.cf.get, key) self.cf.insert(key, columns) assert self.cf.get(key) == columns def test_insert_multiget(self): key1 = 'TestColumnFamily.test_insert_multiget1' columns1 = {'1': 'val1', '2': 'val2'} key2 = 'test_insert_multiget1' columns2 = {'3': 'val1', '4': 'val2'} missing_key = 'key3' self.cf.insert(key1, columns1) self.cf.insert(key2, columns2) rows = self.cf.multiget([key1, key2, missing_key]) assert len(rows) == 2 assert rows[key1] == columns1 assert rows[key2] == columns2 assert missing_key not in rows def test_insert_get_count(self): key = 'TestColumnFamily.test_insert_get_count' columns = {'1': 'val1', '2': 'val2'} self.cf.insert(key, columns) assert self.cf.get_count(key) == 2 assert_equal(self.cf.get_count(key, column_start='1'), 2) assert_equal(self.cf.get_count(key, column_finish='2'), 2) assert_equal(self.cf.get_count(key, column_start='1', column_finish='2'), 2) assert_equal(self.cf.get_count(key, column_start='1', column_finish='1'), 1) assert_equal(self.cf.get_count(key, columns=['1','2']), 2) assert_equal(self.cf.get_count(key, columns=['1']), 1) def test_insert_multiget_count(self): keys = ['TestColumnFamily.test_insert_multiget_count1', 'TestColumnFamily.test_insert_multiget_count2', 'TestColumnFamily.test_insert_multiget_count3'] columns = {'1': 'val1', '2': 'val2'} for key in keys: self.cf.insert(key, columns) result = self.cf.multiget_count(keys) assert_equal(result[keys[0]], 2) assert_equal(result[keys[1]], 2) assert_equal(result[keys[2]], 2) result = self.cf.multiget_count(keys, column_start='1') assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, column_finish='2') assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, column_start='1', column_finish='2') assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, column_start='1', column_finish='1') assert_equal(len(result), 3) assert_equal(result[keys[0]], 1) result = self.cf.multiget_count(keys, columns=['1','2']) assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, columns=['1']) assert_equal(len(result), 3) assert_equal(result[keys[0]], 1) def test_insert_get_range(self): keys = ['TestColumnFamily.test_insert_get_range%s' % i for i in xrange(5)] columns = {'1': 'val1', '2': 'val2'} for key in keys: self.cf.insert(key, columns) rows = list(self.cf.get_range(start=keys[0], finish=keys[-1])) assert len(rows) == len(keys) for i, (k, c) in enumerate(rows): assert k == keys[i] assert c == columns def test_get_range_batching(self): self.cf.truncate() keys = [] columns = {'c': 'v'} for i in range(100, 201): keys.append('key%d' % i) self.cf.insert('key%d' % i, columns) for i in range(201, 301): self.cf.insert('key%d' % i, columns) count = 0 for (k,v) in self.cf.get_range(row_count=100, buffer_size=10): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 100) count = 0 for (k,v) in self.cf.get_range(row_count=100, buffer_size=1000): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 100) count = 0 for (k,v) in self.cf.get_range(row_count=100, buffer_size=150): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 100) count = 0 for (k,v) in self.cf.get_range(row_count=100, buffer_size=7): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 100) count = 0 for (k,v) in self.cf.get_range(row_count=100, buffer_size=2): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 100) # Put the remaining keys in our list for i in range(201, 301): keys.append('key%d' % i) count = 0 for (k,v) in self.cf.get_range(row_count=10000, buffer_size=2): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) count = 0 for (k,v) in self.cf.get_range(row_count=10000, buffer_size=7): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) count = 0 for (k,v) in self.cf.get_range(row_count=10000, buffer_size=200): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) count = 0 for (k,v) in self.cf.get_range(row_count=10000, buffer_size=10000): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) # Don't give a row count count = 0 for (k,v) in self.cf.get_range(buffer_size=2): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) count = 0 for (k,v) in self.cf.get_range(buffer_size=77): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) count = 0 for (k,v) in self.cf.get_range(buffer_size=200): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) count = 0 for (k,v) in self.cf.get_range(buffer_size=10000): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) self.cf.truncate() def insert_insert_get_indexed_slices(self): indexed_cf = ColumnFamily(self.client, 'Indexed1') columns = {'birthdate': 1L} keys = [] for i in range(1,4): indexed_cf.insert('key%d' % i, columns) keys.append('key%d') expr = index.create_index_expression(column_name='birthdate', value=1L) clause = index.create_index_clause([expr]) count = 0 for key,cols in indexed_cf.get_indexed_slices(clause): assert cols == columns assert key in keys count += 1 assert_equal(count, 3) def test_get_indexed_slices_batching(self): indexed_cf = ColumnFamily(self.client, 'Indexed1') columns = {'birthdate': 1L} for i in range(200): indexed_cf.insert('key%d' % i, columns) expr = index.create_index_expression(column_name='birthdate', value=1L) clause = index.create_index_clause([expr], count=10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=2)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=10)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=77)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=200)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=1000)) assert_equal(len(result), 10) clause = index.create_index_clause([expr], count=250) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=2)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=10)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=77)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=200)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=1000)) assert_equal(len(result), 200) def test_remove(self): key = 'TestColumnFamily.test_remove' columns = {'1': 'val1', '2': 'val2'} self.cf.insert(key, columns) self.cf.remove(key, columns=['2']) del columns['2'] assert self.cf.get(key) == {'1': 'val1'} self.cf.remove(key) assert_raises(NotFoundException, self.cf.get, key) def test_dict_class(self): key = 'TestColumnFamily.test_dict_class' self.cf.insert(key, {'1': 'val1'}) assert isinstance(self.cf.get(key), TestDict)
class CassandraImporter: def __init__(self): parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('-s', '--source', help='Generally the prod cassandra path, list of machines: \ localhost:9162 localhost:9163', nargs='*', required=True) parser.add_argument('-d', '--destination', help='Cassandra path where you need your data: \ localhost:9160 localhost:9161', nargs='*', required=True) parser.add_argument('-ks', '--keyspace', help='The keyspace: myks', required=True) parser.add_argument('-cf', '--column_family', help='The Column family: mycf', required=True) parser.add_argument('-k', '--key', help='A specific key to be imported', required=False) parser.add_argument('-c', '--count', help='Total count of keys to be imported', required=False) parser.add_argument('-a', '--all', action='store_true', help='Get all. Not recommended!', required=False) args = vars(parser.parse_args()) """Connection setting with cassandra The script is meant to sync data. So source and destination KS and CF shold be the same.""" try: source_pool = ConnectionPool(args["keyspace"], args["source"]) destination_pool = ConnectionPool(args["keyspace"], args["destination"]) self.source_cf = ColumnFamily(source_pool, args["column_family"]) self.source_cf.autopack_names = False self.source_cf.autopack_values = False self.source_cf.autopack_keys = False self.source_cf.default_validation_class = pycassa.types.UTF8Type() self.destination_cf = ColumnFamily(destination_pool, args["column_family"]) self.destination_cf.autopack_names = False self.destination_cf.autopack_values = False self.destination_cf.autopack_keys = False self.destination_cf.default_validation_class = pycassa.types.UTF8Type() except Exception as e: print "ERROR: The keyspace or the column family does not exist or request is timing out!" sys.exit() # Optional data self.count = args["count"] if self.count: self.count = int(self.count) self.key = args["key"] self.all = args["all"] def importData(self): data = dict() # Get columns for a key if self.key: column_data = self.source_cf.get(self.key) data[self.key] = column_data # Get last x keys and their columns elif self.count: counter = 0 error_count = 0 for value in self.source_cf.get_range(column_count=0, filter_empty=False): if(counter < self.count): try: column_data = self.source_cf.get(value[0], column_count=100) data[value[0]] = column_data counter += 1 except NotFoundException: #Ignore keys with empty columns pass except Exception: error_count += 1 if error_count > 10: # Write the read data self.insertData(data) print "ERROR: Remote cassandra is too slow to read, exiting after writing..." sys.exit() # Use this to throttle reads from cassandra time.sleep(0.2) else: break # Get All, Not recommended elif self.all: for value in self.source_cf.get_range(column_count=0, filter_empty=False): column_data = self.source_cf.get(value[0]) key = value[0] data[key] = column_data else: print "Please pass -c or -k or -a arguments!" return data def insertData(self, data): print "Writing " + str(len(data.keys())) + " keys" for key, value in data.iteritems(): self.destination_cf.insert(key, value) def run(self): self.update_progress(0) data = self.importData() self.update_progress(50) self.insertData(data) self.update_progress(100) print "Import complete!" def update_progress(self, progress): print '\r[{0}] {1}%'.format('#' * (progress / 10), progress)
class TestTimeUUIDs(unittest.TestCase): def setUp(self): self.cf_time = ColumnFamily(pool, 'StdTimeUUID') def tearDown(self): self.cf_time.remove('key1') def test_datetime_to_uuid(self): key = 'key1' timeline = [] timeline.append(datetime.now()) time1 = uuid1() col1 = {time1:'0'} self.cf_time.insert(key, col1) time.sleep(1) timeline.append(datetime.now()) time2 = uuid1() col2 = {time2:'1'} self.cf_time.insert(key, col2) time.sleep(1) timeline.append(datetime.now()) cols = {time1:'0', time2:'1'} assert_equal(self.cf_time.get(key, column_start=timeline[0]) , cols) assert_equal(self.cf_time.get(key, column_finish=timeline[2]) , cols) assert_equal(self.cf_time.get(key, column_start=timeline[0], column_finish=timeline[2]) , cols) assert_equal(self.cf_time.get(key, column_start=timeline[0], column_finish=timeline[2]) , cols) assert_equal(self.cf_time.get(key, column_start=timeline[0], column_finish=timeline[1]) , col1) assert_equal(self.cf_time.get(key, column_start=timeline[1], column_finish=timeline[2]) , col2) def test_time_to_uuid(self): key = 'key1' timeline = [] timeline.append(time.time()) time1 = uuid1() col1 = {time1:'0'} self.cf_time.insert(key, col1) time.sleep(0.1) timeline.append(time.time()) time2 = uuid1() col2 = {time2:'1'} self.cf_time.insert(key, col2) time.sleep(0.1) timeline.append(time.time()) cols = {time1:'0', time2:'1'} assert_equal(self.cf_time.get(key, column_start=timeline[0]) , cols) assert_equal(self.cf_time.get(key, column_finish=timeline[2]) , cols) assert_equal(self.cf_time.get(key, column_start=timeline[0], column_finish=timeline[2]) , cols) assert_equal(self.cf_time.get(key, column_start=timeline[0], column_finish=timeline[2]) , cols) assert_equal(self.cf_time.get(key, column_start=timeline[0], column_finish=timeline[1]) , col1) assert_equal(self.cf_time.get(key, column_start=timeline[1], column_finish=timeline[2]) , col2) def test_auto_time_to_uuid1(self): key = 'key1' t = time.time() col = {t: 'foo'} self.cf_time.insert(key, col) uuid_res = self.cf_time.get(key).keys()[0] timestamp = convert_uuid_to_time(uuid_res) assert_almost_equal(timestamp, t, places=3)
def test_validated_columns(self): sys = SystemManager() sys.create_column_family(TEST_KS, 'Validators',) sys.alter_column(TEST_KS, 'Validators', 'long', LONG_TYPE) sys.alter_column(TEST_KS, 'Validators', 'int', INT_TYPE) sys.alter_column(TEST_KS, 'Validators', 'time', TIME_UUID_TYPE) sys.alter_column(TEST_KS, 'Validators', 'lex', LEXICAL_UUID_TYPE) sys.alter_column(TEST_KS, 'Validators', 'ascii', ASCII_TYPE) sys.alter_column(TEST_KS, 'Validators', 'utf8', UTF8_TYPE) sys.alter_column(TEST_KS, 'Validators', 'bytes', BYTES_TYPE) sys.close() cf = ColumnFamily(pool, 'Validators') key = 'key1' col = {'long':1L} cf.insert(key, col) assert_equal(cf.get(key)['long'], 1L) col = {'int':1} cf.insert(key, col) assert_equal(cf.get(key)['int'], 1) col = {'time':TIME1} cf.insert(key, col) assert_equal(cf.get(key)['time'], TIME1) col = {'lex':uuid.UUID(bytes='aaa aaa aaa aaaa')} cf.insert(key, col) assert_equal(cf.get(key)['lex'], uuid.UUID(bytes='aaa aaa aaa aaaa')) col = {'ascii':'aaa'} cf.insert(key, col) assert_equal(cf.get(key)['ascii'], 'aaa') col = {'utf8':u'a\u0020'} cf.insert(key, col) assert_equal(cf.get(key)['utf8'], u'a\u0020') col = {'bytes':'aaa'} cf.insert(key, col) assert_equal(cf.get(key)['bytes'], 'aaa') cf.remove(key)