class TestSuperColumnFamily: def setUp(self): credentials = {'username': '******', 'password': '******'} self.pool = ConnectionPool(keyspace='Keyspace1', credentials=credentials) self.cf = ColumnFamily(self.pool, 'Super2') def tearDown(self): for key, columns in self.cf.get_range(): self.cf.remove(key) def test_super(self): key = 'TestSuperColumnFamily.test_super' columns = {'1': {'sub1': 'val1', 'sub2': 'val2'}, '2': {'sub3': 'val3', 'sub4': 'val4'}} assert_raises(NotFoundException, self.cf.get, key) self.cf.insert(key, columns) assert_equal(self.cf.get(key), columns) assert_equal(self.cf.multiget([key]), {key: columns}) assert_equal(list(self.cf.get_range(start=key, finish=key)), [(key, columns)]) def test_super_column_argument(self): key = 'TestSuperColumnFamily.test_super_columns_argument' sub12 = {'sub1': 'val1', 'sub2': 'val2'} sub34 = {'sub3': 'val3', 'sub4': 'val4'} columns = {'1': sub12, '2': sub34} self.cf.insert(key, columns) assert_equal(self.cf.get(key, super_column='1'), sub12) assert_raises(NotFoundException, self.cf.get, key, super_column='3') assert_equal(self.cf.multiget([key], super_column='1'), {key: sub12}) assert_equal(list(self.cf.get_range(start=key, finish=key, super_column='1')), [(key, sub12)])
class TestSuperColumnFamily: def setUp(self): credentials = {'username': '******', 'password': '******'} self.client = connect_thread_local('Keyspace1', credentials=credentials) self.cf = ColumnFamily(self.client, 'Super2', write_consistency_level=ConsistencyLevel.ONE, buffer_size=2, timestamp=self.timestamp, super=True) try: self.timestamp_n = int(self.cf.get('meta')['meta']['timestamp']) except NotFoundException: self.timestamp_n = 0 self.clear() def tearDown(self): self.cf.insert('meta', {'meta': {'timestamp': str(self.timestamp_n)}}) # Since the timestamp passed to Cassandra will be in the same second # with the default timestamp function, causing problems with removing # and inserting (Cassandra doesn't know which is later), we supply our own def timestamp(self): self.timestamp_n += 1 return self.timestamp_n def clear(self): for key, columns in self.cf.get_range(include_timestamp=True): for subcolumns in columns.itervalues(): for value, timestamp in subcolumns.itervalues(): self.timestamp_n = max(self.timestamp_n, timestamp) self.cf.remove(key) def test_super(self): key = 'TestSuperColumnFamily.test_super' columns = {'1': {'sub1': 'val1', 'sub2': 'val2'}, '2': {'sub3': 'val3', 'sub4': 'val4'}} assert_raises(NotFoundException, self.cf.get, key) self.cf.insert(key, columns) assert self.cf.get(key) == columns assert self.cf.multiget([key]) == {key: columns} assert list(self.cf.get_range(start=key, finish=key)) == [(key, columns)] def test_super_column_argument(self): key = 'TestSuperColumnFamily.test_super_columns_argument' sub12 = {'sub1': 'val1', 'sub2': 'val2'} sub34 = {'sub3': 'val3', 'sub4': 'val4'} columns = {'1': sub12, '2': sub34} self.cf.insert(key, columns) assert self.cf.get(key, super_column='1') == sub12 assert_raises(NotFoundException, self.cf.get, key, super_column='3') assert self.cf.multiget([key], super_column='1') == {key: sub12} assert list(self.cf.get_range(start=key, finish=key, super_column='1')) == [(key, sub12)]
class TestSuperColumnFamily: def setUp(self): self.client = connect_thread_local() self.client.login('Keyspace1', {'username': '******', 'password': '******'}) self.cf = ColumnFamily(self.client, 'Keyspace1', 'Super2', write_consistency_level=ConsistencyLevel.ONE, buffer_size=2, timestamp=self.timestamp, super=True) try: self.timestamp_n = int(self.cf.get('meta')['meta']['timestamp']) except NotFoundException: self.timestamp_n = 0 self.clear() def tearDown(self): self.cf.insert('meta', {'meta': {'timestamp': str(self.timestamp_n)}}) # Since the timestamp passed to Cassandra will be in the same second # with the default timestamp function, causing problems with removing # and inserting (Cassandra doesn't know which is later), we supply our own def timestamp(self): self.timestamp_n += 1 return self.timestamp_n def clear(self): for key, columns in self.cf.get_range(include_timestamp=True): for subcolumns in columns.itervalues(): for value, timestamp in subcolumns.itervalues(): self.timestamp_n = max(self.timestamp_n, timestamp) self.cf.remove(key) def test_super(self): key = 'TestSuperColumnFamily.test_super' columns = {'1': {'sub1': 'val1', 'sub2': 'val2'}, '2': {'sub3': 'val3', 'sub4': 'val4'}} assert_raises(NotFoundException, self.cf.get, key) self.cf.insert(key, columns) assert self.cf.get(key) == columns assert self.cf.multiget([key]) == {key: columns} assert list(self.cf.get_range(start=key, finish=key)) == [(key, columns)] def test_super_column_argument(self): key = 'TestSuperColumnFamily.test_super_columns_argument' sub12 = {'sub1': 'val1', 'sub2': 'val2'} sub34 = {'sub3': 'val3', 'sub4': 'val4'} columns = {'1': sub12, '2': sub34} self.cf.insert(key, columns) assert self.cf.get(key, super_column='1') == sub12 assert_raises(NotFoundException, self.cf.get, key, super_column='3') assert self.cf.multiget([key], super_column='1') == {key: sub12} assert list(self.cf.get_range(start=key, finish=key, super_column='1')) == [(key, sub12)]
class TestSuperColumnFamilyMap: def setUp(self): credentials = {'username': '******', 'password': '******'} self.pool = ConnectionPool(keyspace='Keyspace1', credentials=credentials) self.cf = ColumnFamily(self.pool, 'Super2') self.map = ColumnFamilyMap(TestUTF8, self.cf) def tearDown(self): for key, columns in self.cf.get_range(): self.cf.remove(key) def instance(self, key, super_column): instance = TestUTF8() instance.key = key instance.super_column = super_column instance.strcol = '1' instance.intcol = 2 instance.floatcol = 3.5 instance.datetimecol = datetime.now().replace(microsecond=0) instance.intstrcol = 8 instance.floatstrcol = 4.6 instance.datetimestrcol = datetime.now().replace(microsecond=0) return instance def test_super(self): instance = self.instance('TestSuperColumnFamilyMap.test_super', 'super1') assert_raises(NotFoundException, self.map.get, instance.key) self.map.insert(instance) res = self.map.get(instance.key)[instance.super_column] assert_equal(res, instance) assert_equal(self.map.multiget([instance.key])[instance.key][instance.super_column], instance) assert_equal(list(self.map.get_range(start=instance.key, finish=instance.key)), [{instance.super_column: instance}])
class TestBigInt(unittest.TestCase): @classmethod def setup_class(cls): sys = SystemManager() sys.create_column_family(TEST_KS, 'StdInteger', comparator_type=INT_TYPE) @classmethod def teardown_class(cls): sys = SystemManager() sys.drop_column_family(TEST_KS, 'StdInteger') def setUp(self): self.key = 'TestBigInt' self.cf = ColumnFamily(pool, 'StdInteger') def tearDown(self): self.cf.remove(self.key) def test_negative_integers(self): self.cf.insert(self.key, {-1: '-1'}) self.cf.insert(self.key, {-12342390: '-12342390'}) self.cf.insert(self.key, {-255: '-255'}) self.cf.insert(self.key, {-256: '-256'}) self.cf.insert(self.key, {-257: '-257'}) for key, cols in self.cf.get_range(): self.assertEquals(str(cols.keys()[0]), cols.values()[0])
class TestDefaultValidators(unittest.TestCase): def setUp(self): credentials = {"username": "******", "password": "******"} self.pool = ConnectionPool(pool_size=5, keyspace="Keyspace1", credentials=credentials) self.cf_def_valid = ColumnFamily(self.pool, "DefaultValidator") def tearDown(self): for key, cols in self.cf_def_valid.get_range(): self.cf_def_valid.remove(key) self.pool.dispose() def test_default_validated_columns(self): key = "key1" col_cf = {"aaaaaa": 1L} col_cm = {"subcol": TIME1} col_ncf = {"aaaaaa": TIME1} col_ncm = {"subcol": 1L} # Both of these inserts work, as cf allows # longs and cm for 'subcol' allows TIMEUUIDs. self.cf_def_valid.insert(key, col_cf) self.cf_def_valid.insert(key, col_cm) assert self.cf_def_valid.get(key) == {"aaaaaa": 1L, "subcol": TIME1} assert_raises(TypeError, self.cf_def_valid.insert, key, col_ncf) assert_raises(TypeError, self.cf_def_valid.insert, key, col_ncm)
class TestBigInt(unittest.TestCase): @classmethod def setup_class(cls): sys = SystemManager() sys.create_column_family(TEST_KS, 'StdInteger', comparator_type=INT_TYPE) @classmethod def teardown_class(cls): sys = SystemManager() sys.drop_column_family(TEST_KS, 'StdInteger') def setUp(self): self.key = 'TestBigInt' self.cf = ColumnFamily(pool, 'StdInteger') def tearDown(self): self.cf.remove(self.key) def test_negative_integers(self): self.cf.insert(self.key, {-1: '-1'}) self.cf.insert(self.key, {-12342390: '-12342390'}) self.cf.insert(self.key, {-255: '-255'}) self.cf.insert(self.key, {-256: '-256'}) self.cf.insert(self.key, {-257: '-257'}) for key, cols in self.cf.get_range(): self.assertEquals(str(cols.keys()[0]), cols.values()[0])
class TestSuperColumnFamilyMap: def setUp(self): credentials = {'username': '******', 'password': '******'} self.client = connect_thread_local('Keyspace1', credentials=credentials) self.cf = ColumnFamily(self.client, 'Super2', write_consistency_level=ConsistencyLevel.ONE, timestamp=self.timestamp, super=True) self.map = ColumnFamilyMap(TestUTF8, self.cf) try: self.timestamp_n = int(self.cf.get('meta')['meta']['timestamp']) except NotFoundException: self.timestamp_n = 0 self.clear() def tearDown(self): self.cf.insert('meta', {'meta': {'timestamp': str(self.timestamp_n)}}) # Since the timestamp passed to Cassandra will be in the same second # with the default timestamp function, causing problems with removing # and inserting (Cassandra doesn't know which is later), we supply our own def timestamp(self): self.timestamp_n += 1 return self.timestamp_n def clear(self): for key, columns in self.cf.get_range(include_timestamp=True): for subcolumns in columns.itervalues(): for value, timestamp in subcolumns.itervalues(): self.timestamp_n = max(self.timestamp_n, timestamp) self.cf.remove(key) def instance(self, key, super_column): instance = TestUTF8() instance.key = key instance.super_column = super_column instance.strcol = '1' instance.intcol = 2 instance.floatcol = 3.5 instance.datetimecol = datetime.now().replace(microsecond=0) instance.intstrcol = 8 instance.floatstrcol = 4.6 instance.datetimestrcol = datetime.now().replace(microsecond=0) return instance def test_super(self): instance = self.instance('TestSuperColumnFamilyMap.test_super', 'super1') assert_raises(NotFoundException, self.map.get, instance.key) self.map.insert(instance) assert self.map.get(instance.key)[instance.super_column] == instance assert self.map.multiget([instance.key])[instance.key][instance.super_column] == instance assert list(self.map.get_range(start=instance.key, finish=instance.key)) == [{instance.super_column: instance}]
class TestSuperColumnFamilyMap: def setUp(self): self.client = connect_thread_local() self.client.login('Keyspace1', {'username': '******', 'password': '******'}) self.cf = ColumnFamily(self.client, 'Keyspace1', 'Super2', write_consistency_level=ConsistencyLevel.ONE, timestamp=self.timestamp, super=True) self.map = ColumnFamilyMap(TestUTF8, self.cf) try: self.timestamp_n = int(self.cf.get('meta')['meta']['timestamp']) except NotFoundException: self.timestamp_n = 0 self.clear() def tearDown(self): self.cf.insert('meta', {'meta': {'timestamp': str(self.timestamp_n)}}) # Since the timestamp passed to Cassandra will be in the same second # with the default timestamp function, causing problems with removing # and inserting (Cassandra doesn't know which is later), we supply our own def timestamp(self): self.timestamp_n += 1 return self.timestamp_n def clear(self): for key, columns in self.cf.get_range(include_timestamp=True): for subcolumns in columns.itervalues(): for value, timestamp in subcolumns.itervalues(): self.timestamp_n = max(self.timestamp_n, timestamp) self.cf.remove(key) def instance(self, key, super_column): instance = TestUTF8() instance.key = key instance.super_column = super_column instance.strcol = '1' instance.intcol = 2 instance.floatcol = 3.5 instance.datetimecol = datetime.now().replace(microsecond=0) instance.intstrcol = 8 instance.floatstrcol = 4.6 instance.datetimestrcol = datetime.now().replace(microsecond=0) return instance def test_super(self): instance = self.instance('TestSuperColumnFamilyMap.test_super', 'super1') assert_raises(NotFoundException, self.map.get, instance.key) self.map.insert(instance) assert self.map.get(instance.key)[instance.super_column] == instance assert self.map.multiget([instance.key])[instance.key][instance.super_column] == instance assert list(self.map.get_range(start=instance.key, finish=instance.key)) == [{instance.super_column: instance}]
def exportdata(self,destination=None): ''' Method to export data to files ''' logging.info("inside export data method") #result={} count = 0 for keyspace in self.sysmanager.list_keyspaces(): if (keyspace != 'system'): #check to skip the system database pool = ConnectionPool(keyspace,[self.address]) columnfamilies = self.sysmanager.get_keyspace_column_families(keyspace) #result[keyspace]={} result = {} # iterate through all the column family for columnfamilyname in columnfamilies.keys(): #result[keyspace][columnfamilyname]=[] colfamily = ColumnFamily(pool,columnfamilyname) cols = colfamily.get_range(column_reversed=True) result['keyspace']=keyspace result['columnfamily']= columnfamilyname result['cols']=[] for col in cols: result['cols'].append(col) count = count + 1 # check count if it 10000 then reset it flush result if (count == 100000): filename ="%s.out" % str(uuid1()).replace("-","") filepath = "output/%s" % filename file = open(filepath,"w") pickle.dump(result, file, protocol=0) file.close() #reset cols print sys.getsizeof(result)#in mb result['cols']=[] count = 0 if count > 0: filename ="%s.out" % str(uuid1()).replace("-","") filepath = "output/%s" % filename file = open(filepath,"w") pickle.dump(result, file, protocol=0) file.close()
class TestAutoPacking: def setUp(self): credentials = {'username': '******', 'password': '******'} self.client = connect_thread_local('Keyspace1', credentials=credentials) self.cf = ColumnFamily(self.client, 'Standard2') self.cf_long = ColumnFamily(self.client, 'StdLong') self.cf_int = ColumnFamily(self.client, 'StdInteger') self.cf_time = ColumnFamily(self.client, 'StdTimeUUID') self.cf_lex = ColumnFamily(self.client, 'StdLexicalUUID') self.cf_ascii = ColumnFamily(self.client, 'StdAscii') self.cf_utf8 = ColumnFamily(self.client, 'StdUTF8') self.cf_bytes = ColumnFamily(self.client, 'StdBytes') self.cf_suplong = ColumnFamily(self.client, 'SuperLong', super=True) self.cf_supint = ColumnFamily(self.client, 'SuperInt', super=True) self.cf_suptime = ColumnFamily(self.client, 'SuperTime', super=True) self.cf_suplex = ColumnFamily(self.client, 'SuperLex', super=True) self.cf_supascii = ColumnFamily(self.client, 'SuperAscii', super=True) self.cf_suputf8 = ColumnFamily(self.client, 'SuperUTF8', super=True) self.cf_supbytes = ColumnFamily(self.client, 'SuperBytes', super=True) self.cf_suplong_sublong = ColumnFamily(self.client, 'SuperLongSubLong', super=True) self.cf_suplong_subint = ColumnFamily(self.client, 'SuperLongSubInt', super=True) self.cf_suplong_subtime = ColumnFamily(self.client, 'SuperLongSubTime', super=True) self.cf_suplong_sublex = ColumnFamily(self.client, 'SuperLongSubLex', super=True) self.cf_suplong_subascii = ColumnFamily(self.client, 'SuperLongSubAscii', super=True) self.cf_suplong_subutf8 = ColumnFamily(self.client, 'SuperLongSubUTF8', super=True) self.cf_suplong_subbytes = ColumnFamily(self.client, 'SuperLongSubBytes', super=True) self.cf_valid_long = ColumnFamily(self.client, 'ValidatorLong') self.cf_valid_int = ColumnFamily(self.client, 'ValidatorInt') self.cf_valid_time = ColumnFamily(self.client, 'ValidatorTime') self.cf_valid_lex = ColumnFamily(self.client, 'ValidatorLex') self.cf_valid_ascii = ColumnFamily(self.client, 'ValidatorAscii') self.cf_valid_utf8 = ColumnFamily(self.client, 'ValidatorUTF8') self.cf_valid_bytes = ColumnFamily(self.client, 'ValidatorBytes') self.cf_def_valid = ColumnFamily(self.client, 'DefaultValidator') self.cfs = [ self.cf_long, self.cf_int, self.cf_time, self.cf_lex, self.cf_ascii, self.cf_utf8, self.cf_bytes, # self.cf_suplong, self.cf_supint, self.cf_suptime, self.cf_suplex, self.cf_supascii, self.cf_suputf8, self.cf_supbytes, # self.cf_suplong_subint, self.cf_suplong_subint, self.cf_suplong_subtime, self.cf_suplong_sublex, self.cf_suplong_subascii, self.cf_suplong_subutf8, self.cf_suplong_subbytes, # self.cf_valid_long, self.cf_valid_int, self.cf_valid_time, self.cf_valid_lex, self.cf_valid_ascii, self.cf_valid_utf8, self.cf_valid_bytes, # self.cf_def_valid, ] try: self.timestamp_n = int(self.cf.get('meta')['timestamp']) except NotFoundException: self.timestamp_n = 0 self.clear() def tearDown(self): self.cf.insert('meta', {'timestamp': str(self.timestamp_n)}) def timestamp(self): self.timestamp_n += 1 return self.timestamp_n def clear(self): for key, columns in self.cf.get_range(include_timestamp=True): for value, timestamp in columns.itervalues(): self.timestamp_n = max(self.timestamp_n, timestamp) self.cf.remove(key) for cf in self.cfs: for key, columns in cf.get_range(): cf.remove(key) def test_basic_inserts(self): long_col = {1111111111111111L: VALS[0]} int_col = {1: VALS[0]} time_col = {TIME1: VALS[0]} lex_col = {uuid.UUID(bytes='abc abc abc abcd'): VALS[0]} ascii_col = {'foo': VALS[0]} utf8_col = {u'\u0020': VALS[0]} bytes_col = {'bytes': VALS[0]} self.cf_long.insert(KEYS[0], long_col) self.cf_int.insert(KEYS[0], int_col) self.cf_time.insert(KEYS[0], time_col) self.cf_lex.insert(KEYS[0], lex_col) self.cf_ascii.insert(KEYS[0], ascii_col) self.cf_utf8.insert(KEYS[0], utf8_col) self.cf_bytes.insert(KEYS[0], bytes_col) assert self.cf_long.get(KEYS[0]) == long_col assert self.cf_int.get(KEYS[0]) == int_col assert self.cf_time.get(KEYS[0]) == time_col assert self.cf_lex.get(KEYS[0]) == lex_col assert self.cf_ascii.get(KEYS[0]) == ascii_col assert self.cf_utf8.get(KEYS[0]) == utf8_col assert self.cf_bytes.get(KEYS[0]) == bytes_col self.cf_suplong.insert(KEYS[0], {123L: bytes_col})
def tearDown(self): pool = ConnectionPool('PycassaTestKeyspace') cf = ColumnFamily(pool, 'Standard1') for key, cols in cf.get_range(): cf.remove(key)
def db_export(self): db_contents = {'cassandra': {}, 'zookeeper': {}} cassandra_contents = db_contents['cassandra'] creds = None if self._api_args.cassandra_user and self._api_args.cassandra_password: creds = {'username': self._api_args.cassandra_user, 'password': self._api_args.cassandra_password} socket_factory = default_socket_factory if ('cassandra_use_ssl' in self._api_args and self._api_args.cassandra_use_ssl): socket_factory = self._make_ssl_socket_factory( self._api_args.cassandra_ca_certs, validate=False) sys_mgr = SystemManager( self._api_args.cassandra_server_list[0], credentials=creds, socket_factory=socket_factory) existing_keyspaces = sys_mgr.list_keyspaces() for ks_name in set(KEYSPACES) - set(self._args.omit_keyspaces or []): if self._api_args.cluster_id: full_ks_name = '%s_%s' %(self._api_args.cluster_id, ks_name) else: full_ks_name = ks_name if full_ks_name not in existing_keyspaces: continue cassandra_contents[ks_name] = {} pool = ConnectionPool( full_ks_name, self._api_args.cassandra_server_list, pool_timeout=120, max_retries=-1, timeout=5, socket_factory=socket_factory, credentials=creds) for cf_name in sys_mgr.get_keyspace_column_families(full_ks_name): cassandra_contents[ks_name][cf_name] = {} cf = ColumnFamily(pool, cf_name, buffer_size=self._args.buffer_size) for r,c in cf.get_range(column_count=10000000, include_timestamp=True): cassandra_contents[ks_name][cf_name][r] = c logger.info("Cassandra DB dumped") def get_nodes(path): if path[:-1].rpartition('/')[-1] in self._zk_ignore_list: return [] try: if not zk.get_children(path): return [(path, zk.get(path))] except kazoo.exceptions.NoNodeError: return [] nodes = [] for child in zk.get_children(path): nodes.extend(get_nodes('%s%s/' %(path, child))) return nodes zk = kazoo.client.KazooClient(self._api_args.zk_server_ip) zk.start() nodes = get_nodes(self._api_args.cluster_id+'/') zk.stop() db_contents['zookeeper'] = json.dumps(nodes) logger.info("Zookeeper DB dumped") f = open(self._args.export_to, 'w') try: f.write(json.dumps(db_contents)) finally: f.close() logger.info("DB dump wrote to file %s" % self._args.export_to)
class TestAutoPacking: def setUp(self): credentials = {'username': '******', 'password': '******'} self.client = connect_thread_local('Keyspace1', credentials=credentials) self.cf = ColumnFamily(self.client, 'Standard2') self.cf_long = ColumnFamily(self.client, 'StdLong') self.cf_int = ColumnFamily(self.client, 'StdInteger') self.cf_time = ColumnFamily(self.client, 'StdTimeUUID') self.cf_lex = ColumnFamily(self.client, 'StdLexicalUUID') self.cf_ascii = ColumnFamily(self.client, 'StdAscii') self.cf_utf8 = ColumnFamily(self.client, 'StdUTF8') self.cf_bytes = ColumnFamily(self.client, 'StdBytes') self.cf_suplong = ColumnFamily(self.client, 'SuperLong', super=True) self.cf_supint = ColumnFamily(self.client, 'SuperInt', super=True) self.cf_suptime = ColumnFamily(self.client, 'SuperTime', super=True) self.cf_suplex = ColumnFamily(self.client, 'SuperLex', super=True) self.cf_supascii = ColumnFamily(self.client, 'SuperAscii', super=True) self.cf_suputf8 = ColumnFamily(self.client, 'SuperUTF8', super=True) self.cf_supbytes = ColumnFamily(self.client, 'SuperBytes', super=True) self.cf_suplong_sublong = ColumnFamily(self.client, 'SuperLongSubLong', super=True) self.cf_suplong_subint = ColumnFamily(self.client, 'SuperLongSubInt', super=True) self.cf_suplong_subtime = ColumnFamily(self.client, 'SuperLongSubTime', super=True) self.cf_suplong_sublex = ColumnFamily(self.client, 'SuperLongSubLex', super=True) self.cf_suplong_subascii = ColumnFamily(self.client, 'SuperLongSubAscii', super=True) self.cf_suplong_subutf8 = ColumnFamily(self.client, 'SuperLongSubUTF8', super=True) self.cf_suplong_subbytes = ColumnFamily(self.client, 'SuperLongSubBytes', super=True) self.cf_valid_long = ColumnFamily(self.client, 'ValidatorLong') self.cf_valid_int = ColumnFamily(self.client, 'ValidatorInt') self.cf_valid_time = ColumnFamily(self.client, 'ValidatorTime') self.cf_valid_lex = ColumnFamily(self.client, 'ValidatorLex') self.cf_valid_ascii = ColumnFamily(self.client, 'ValidatorAscii') self.cf_valid_utf8 = ColumnFamily(self.client, 'ValidatorUTF8') self.cf_valid_bytes = ColumnFamily(self.client, 'ValidatorBytes') self.cfs = [self.cf_long, self.cf_int, self.cf_time, self.cf_lex, self.cf_ascii, self.cf_utf8, self.cf_bytes, self.cf_suplong, self.cf_supint, self.cf_suptime, self.cf_suplex, self.cf_supascii, self.cf_suputf8, self.cf_supbytes, self.cf_suplong_subint, self.cf_suplong_subint, self.cf_suplong_subtime, self.cf_suplong_sublex, self.cf_suplong_subascii, self.cf_suplong_subutf8, self.cf_suplong_subbytes, self.cf_valid_long, self.cf_valid_int, self.cf_valid_time, self.cf_valid_lex, self.cf_valid_ascii, self.cf_valid_utf8, self.cf_valid_bytes] try: self.timestamp_n = int(self.cf.get('meta')['timestamp']) except NotFoundException: self.timestamp_n = 0 self.clear() def tearDown(self): self.cf.insert('meta', {'timestamp': str(self.timestamp_n)}) def timestamp(self): self.timestamp_n += 1 return self.timestamp_n def clear(self): for key, columns in self.cf.get_range(include_timestamp=True): for value, timestamp in columns.itervalues(): self.timestamp_n = max(self.timestamp_n, timestamp) self.cf.remove(key) for cf in self.cfs: for key, columns in cf.get_range(): cf.remove(key) def test_basic_inserts(self): long_col = {1111111111111111L: VALS[0]} int_col = {1: VALS[0]} time_col = {TIME1: VALS[0]} lex_col = {uuid.UUID(bytes='abc abc abc abcd'): VALS[0]} ascii_col = {'foo': VALS[0]} utf8_col = {u'\u0020': VALS[0]} bytes_col = {'bytes': VALS[0]} self.cf_long.insert(KEYS[0], long_col) self.cf_int.insert(KEYS[0], int_col) self.cf_time.insert(KEYS[0], time_col) self.cf_lex.insert(KEYS[0], lex_col) self.cf_ascii.insert(KEYS[0], ascii_col) self.cf_utf8.insert(KEYS[0], utf8_col) self.cf_bytes.insert(KEYS[0], bytes_col) assert self.cf_long.get(KEYS[0]) == long_col assert self.cf_int.get(KEYS[0]) == int_col assert self.cf_time.get(KEYS[0]) == time_col assert self.cf_lex.get(KEYS[0]) == lex_col assert self.cf_ascii.get(KEYS[0]) == ascii_col assert self.cf_utf8.get(KEYS[0]) == utf8_col assert self.cf_bytes.get(KEYS[0]) == bytes_col self.cf_suplong.insert(KEYS[0], {123L: bytes_col})
class TestColumnFamily: def setUp(self): self.client = connect() self.client.login('Keyspace1', {'username': '******', 'password': '******'}) self.cf = ColumnFamily(self.client, 'Keyspace1', 'Standard2', write_consistency_level=ConsistencyLevel.ONE, buffer_size=2, timestamp=self.timestamp, dict_class=TestDict) try: self.timestamp_n = int(self.cf.get('meta')['timestamp']) except NotFoundException: self.timestamp_n = 0 self.clear() def tearDown(self): self.cf.insert('meta', {'timestamp': str(self.timestamp_n)}) # Since the timestamp passed to Cassandra will be in the same second # with the default timestamp function, causing problems with removing # and inserting (Cassandra doesn't know which is later), we supply our own def timestamp(self): self.timestamp_n += 1 return self.timestamp_n def clear(self): for key, columns in self.cf.get_range(include_timestamp=True): for value, timestamp in columns.itervalues(): self.timestamp_n = max(self.timestamp_n, timestamp) self.cf.remove(key) def test_empty(self): key = 'TestColumnFamily.test_empty' assert_raises(NotFoundException, self.cf.get, key) assert len(self.cf.multiget([key])) == 0 for key, columns in self.cf.get_range(): assert len(columns) == 0 def test_insert_get(self): key = 'TestColumnFamily.test_insert_get' columns = {'1': 'val1', '2': 'val2'} assert_raises(NotFoundException, self.cf.get, key) self.cf.insert(key, columns) assert self.cf.get(key) == columns def test_insert_multiget(self): key1 = 'TestColumnFamily.test_insert_multiget1' columns1 = {'1': 'val1', '2': 'val2'} key2 = 'test_insert_multiget1' columns2 = {'3': 'val1', '4': 'val2'} missing_key = 'key3' self.cf.insert(key1, columns1) self.cf.insert(key2, columns2) rows = self.cf.multiget([key1, key2, missing_key]) assert len(rows) == 2 assert rows[key1] == columns1 assert rows[key2] == columns2 assert missing_key not in rows def test_insert_get_count(self): key = 'TestColumnFamily.test_insert_get_count' columns = {'1': 'val1', '2': 'val2'} self.cf.insert(key, columns) assert self.cf.get_count(key) == 2 def test_insert_get_range(self): keys = ['TestColumnFamily.test_insert_get_range%s' % i for i in xrange(5)] columns = {'1': 'val1', '2': 'val2'} for key in keys: self.cf.insert(key, columns) rows = list(self.cf.get_range(start=keys[0], finish=keys[-1])) assert len(rows) == len(keys) for i, (k, c) in enumerate(rows): assert k == keys[i] assert c == columns def test_remove(self): key = 'TestColumnFamily.test_remove' columns = {'1': 'val1', '2': 'val2'} self.cf.insert(key, columns) self.cf.remove(key, columns=['2']) del columns['2'] assert self.cf.get(key) == {'1': 'val1'} self.cf.remove(key) assert_raises(NotFoundException, self.cf.get, key) def test_dict_class(self): key = 'TestColumnFamily.test_dict_class' self.cf.insert(key, {'1': 'val1'}) assert isinstance(self.cf.get(key), TestDict)
class CassandraImporter: def __init__(self): parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('-s', '--source', help='Generally the prod cassandra path, list of machines: \ localhost:9162 localhost:9163', nargs='*', required=True) parser.add_argument('-d', '--destination', help='Cassandra path where you need your data: \ localhost:9160 localhost:9161', nargs='*', required=True) parser.add_argument('-ks', '--keyspace', help='The keyspace: myks', required=True) parser.add_argument('-cf', '--column_family', help='The Column family: mycf', required=True) parser.add_argument('-k', '--key', help='A specific key to be imported', required=False) parser.add_argument('-c', '--count', help='Total count of keys to be imported', required=False) parser.add_argument('-a', '--all', action='store_true', help='Get all. Not recommended!', required=False) args = vars(parser.parse_args()) """Connection setting with cassandra The script is meant to sync data. So source and destination KS and CF shold be the same.""" try: source_pool = ConnectionPool(args["keyspace"], args["source"]) destination_pool = ConnectionPool(args["keyspace"], args["destination"]) self.source_cf = ColumnFamily(source_pool, args["column_family"]) self.source_cf.autopack_names = False self.source_cf.autopack_values = False self.source_cf.autopack_keys = False self.source_cf.default_validation_class = pycassa.types.UTF8Type() self.destination_cf = ColumnFamily(destination_pool, args["column_family"]) self.destination_cf.autopack_names = False self.destination_cf.autopack_values = False self.destination_cf.autopack_keys = False self.destination_cf.default_validation_class = pycassa.types.UTF8Type() except Exception as e: print "ERROR: The keyspace or the column family does not exist or request is timing out!" sys.exit() # Optional data self.count = args["count"] if self.count: self.count = int(self.count) self.key = args["key"] self.all = args["all"] def importData(self): data = dict() # Get columns for a key if self.key: column_data = self.source_cf.get(self.key) data[self.key] = column_data # Get last x keys and their columns elif self.count: counter = 0 error_count = 0 for value in self.source_cf.get_range(column_count=0, filter_empty=False): if(counter < self.count): try: column_data = self.source_cf.get(value[0], column_count=100) data[value[0]] = column_data counter += 1 except NotFoundException: #Ignore keys with empty columns pass except Exception: error_count += 1 if error_count > 10: # Write the read data self.insertData(data) print "ERROR: Remote cassandra is too slow to read, exiting after writing..." sys.exit() # Use this to throttle reads from cassandra time.sleep(0.2) else: break # Get All, Not recommended elif self.all: for value in self.source_cf.get_range(column_count=0, filter_empty=False): column_data = self.source_cf.get(value[0]) key = value[0] data[key] = column_data else: print "Please pass -c or -k or -a arguments!" return data def insertData(self, data): print "Writing " + str(len(data.keys())) + " keys" for key, value in data.iteritems(): self.destination_cf.insert(key, value) def run(self): self.update_progress(0) data = self.importData() self.update_progress(50) self.insertData(data) self.update_progress(100) print "Import complete!" def update_progress(self, progress): print '\r[{0}] {1}%'.format('#' * (progress / 10), progress)
class TestColumnFamily: def setUp(self): credentials = {'username': '******', 'password': '******'} self.client = connect('Keyspace1', credentials=credentials) self.cf = ColumnFamily(self.client, 'Standard2', write_consistency_level=ConsistencyLevel.ONE, buffer_size=2, timestamp=self.timestamp, dict_class=TestDict) try: self.timestamp_n = int(self.cf.get('meta')['timestamp']) except NotFoundException: self.timestamp_n = 0 self.clear() def tearDown(self): self.cf.insert('meta', {'timestamp': str(self.timestamp_n)}) # Since the timestamp passed to Cassandra will be in the same second # with the default timestamp function, causing problems with removing # and inserting (Cassandra doesn't know which is later), we supply our own def timestamp(self): self.timestamp_n += 1 return self.timestamp_n def clear(self): for key, columns in self.cf.get_range(include_timestamp=True): for value, timestamp in columns.itervalues(): self.timestamp_n = max(self.timestamp_n, timestamp) self.cf.remove(key) def test_empty(self): key = 'TestColumnFamily.test_empty' assert_raises(NotFoundException, self.cf.get, key) assert len(self.cf.multiget([key])) == 0 for key, columns in self.cf.get_range(): assert len(columns) == 0 def test_insert_get(self): key = 'TestColumnFamily.test_insert_get' columns = {'1': 'val1', '2': 'val2'} assert_raises(NotFoundException, self.cf.get, key) self.cf.insert(key, columns) assert self.cf.get(key) == columns def test_insert_multiget(self): key1 = 'TestColumnFamily.test_insert_multiget1' columns1 = {'1': 'val1', '2': 'val2'} key2 = 'test_insert_multiget1' columns2 = {'3': 'val1', '4': 'val2'} missing_key = 'key3' self.cf.insert(key1, columns1) self.cf.insert(key2, columns2) rows = self.cf.multiget([key1, key2, missing_key]) assert len(rows) == 2 assert rows[key1] == columns1 assert rows[key2] == columns2 assert missing_key not in rows def test_insert_get_count(self): key = 'TestColumnFamily.test_insert_get_count' columns = {'1': 'val1', '2': 'val2'} self.cf.insert(key, columns) assert self.cf.get_count(key) == 2 assert_equal(self.cf.get_count(key, column_start='1'), 2) assert_equal(self.cf.get_count(key, column_finish='2'), 2) assert_equal(self.cf.get_count(key, column_start='1', column_finish='2'), 2) assert_equal(self.cf.get_count(key, column_start='1', column_finish='1'), 1) assert_equal(self.cf.get_count(key, columns=['1','2']), 2) assert_equal(self.cf.get_count(key, columns=['1']), 1) def test_insert_multiget_count(self): keys = ['TestColumnFamily.test_insert_multiget_count1', 'TestColumnFamily.test_insert_multiget_count2', 'TestColumnFamily.test_insert_multiget_count3'] columns = {'1': 'val1', '2': 'val2'} for key in keys: self.cf.insert(key, columns) result = self.cf.multiget_count(keys) assert_equal(result[keys[0]], 2) assert_equal(result[keys[1]], 2) assert_equal(result[keys[2]], 2) result = self.cf.multiget_count(keys, column_start='1') assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, column_finish='2') assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, column_start='1', column_finish='2') assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, column_start='1', column_finish='1') assert_equal(len(result), 3) assert_equal(result[keys[0]], 1) result = self.cf.multiget_count(keys, columns=['1','2']) assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, columns=['1']) assert_equal(len(result), 3) assert_equal(result[keys[0]], 1) def test_insert_get_range(self): keys = ['TestColumnFamily.test_insert_get_range%s' % i for i in xrange(5)] columns = {'1': 'val1', '2': 'val2'} for key in keys: self.cf.insert(key, columns) rows = list(self.cf.get_range(start=keys[0], finish=keys[-1])) assert len(rows) == len(keys) for i, (k, c) in enumerate(rows): assert k == keys[i] assert c == columns def test_get_range_batching(self): self.cf.truncate() keys = [] columns = {'c': 'v'} for i in range(100, 201): keys.append('key%d' % i) self.cf.insert('key%d' % i, columns) for i in range(201, 301): self.cf.insert('key%d' % i, columns) count = 0 for (k,v) in self.cf.get_range(row_count=100, buffer_size=10): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 100) count = 0 for (k,v) in self.cf.get_range(row_count=100, buffer_size=1000): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 100) count = 0 for (k,v) in self.cf.get_range(row_count=100, buffer_size=150): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 100) count = 0 for (k,v) in self.cf.get_range(row_count=100, buffer_size=7): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 100) count = 0 for (k,v) in self.cf.get_range(row_count=100, buffer_size=2): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 100) # Put the remaining keys in our list for i in range(201, 301): keys.append('key%d' % i) count = 0 for (k,v) in self.cf.get_range(row_count=10000, buffer_size=2): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) count = 0 for (k,v) in self.cf.get_range(row_count=10000, buffer_size=7): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) count = 0 for (k,v) in self.cf.get_range(row_count=10000, buffer_size=200): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) count = 0 for (k,v) in self.cf.get_range(row_count=10000, buffer_size=10000): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) # Don't give a row count count = 0 for (k,v) in self.cf.get_range(buffer_size=2): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) count = 0 for (k,v) in self.cf.get_range(buffer_size=77): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) count = 0 for (k,v) in self.cf.get_range(buffer_size=200): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) count = 0 for (k,v) in self.cf.get_range(buffer_size=10000): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) self.cf.truncate() def insert_insert_get_indexed_slices(self): indexed_cf = ColumnFamily(self.client, 'Indexed1') columns = {'birthdate': 1L} keys = [] for i in range(1,4): indexed_cf.insert('key%d' % i, columns) keys.append('key%d') expr = index.create_index_expression(column_name='birthdate', value=1L) clause = index.create_index_clause([expr]) count = 0 for key,cols in indexed_cf.get_indexed_slices(clause): assert cols == columns assert key in keys count += 1 assert_equal(count, 3) def test_get_indexed_slices_batching(self): indexed_cf = ColumnFamily(self.client, 'Indexed1') columns = {'birthdate': 1L} for i in range(200): indexed_cf.insert('key%d' % i, columns) expr = index.create_index_expression(column_name='birthdate', value=1L) clause = index.create_index_clause([expr], count=10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=2)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=10)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=77)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=200)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=1000)) assert_equal(len(result), 10) clause = index.create_index_clause([expr], count=250) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=2)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=10)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=77)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=200)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=1000)) assert_equal(len(result), 200) def test_remove(self): key = 'TestColumnFamily.test_remove' columns = {'1': 'val1', '2': 'val2'} self.cf.insert(key, columns) self.cf.remove(key, columns=['2']) del columns['2'] assert self.cf.get(key) == {'1': 'val1'} self.cf.remove(key) assert_raises(NotFoundException, self.cf.get, key) def test_dict_class(self): key = 'TestColumnFamily.test_dict_class' self.cf.insert(key, {'1': 'val1'}) assert isinstance(self.cf.get(key), TestDict)
class TestColumnFamilyMap: def setUp(self): self.client = connect() self.client.login('Keyspace1', {'username': '******', 'password': '******'}) self.cf = ColumnFamily(self.client, 'Keyspace1', 'Standard2', write_consistency_level=ConsistencyLevel.ONE, timestamp=self.timestamp) self.map = ColumnFamilyMap(TestUTF8, self.cf) self.empty_map = ColumnFamilyMap(TestEmpty, self.cf, raw_columns=True) try: self.timestamp_n = int(self.cf.get('meta')['timestamp']) except NotFoundException: self.timestamp_n = 0 self.clear() def tearDown(self): self.cf.insert('meta', {'timestamp': str(self.timestamp_n)}) # Since the timestamp passed to Cassandra will be in the same second # with the default timestamp function, causing problems with removing # and inserting (Cassandra doesn't know which is later), we supply our own def timestamp(self): self.timestamp_n += 1 return self.timestamp_n def clear(self): for key, columns in self.cf.get_range(include_timestamp=True): for value, timestamp in columns.itervalues(): self.timestamp_n = max(self.timestamp_n, timestamp) self.cf.remove(key) def instance(self, key): instance = TestUTF8() instance.key = key instance.strcol = '1' instance.intcol = 2 instance.floatcol = 3.5 instance.datetimecol = datetime.now().replace(microsecond=0) instance.intstrcol = 8 instance.floatstrcol = 4.6 instance.datetimestrcol = datetime.now().replace(microsecond=0) return instance def test_will_not_insert_none(self): for column in ('strcol', 'intcol', 'floatcol', 'datetimecol', 'intstrcol', 'floatstrcol', 'datetimestrcol'): instance = self.instance('TestColumnFamilyMap.test_will_not_insert_none') setattr(instance, column, None) assert_raises(TypeError, self.map.insert, instance) def test_empty(self): key = 'TestColumnFamilyMap.test_empty' assert_raises(NotFoundException, self.map.get, key) assert len(self.map.multiget([key])) == 0 def test_insert_get(self): instance = self.instance('TestColumnFamilyMap.test_insert_get') assert_raises(NotFoundException, self.map.get, instance.key) self.map.insert(instance) assert self.map.get(instance.key) == instance assert self.empty_map.get(instance.key).raw_columns['intstrcol'] == str(instance.intstrcol) def test_insert_multiget(self): instance1 = self.instance('TestColumnFamilyMap.test_insert_multiget1') instance2 = self.instance('TestColumnFamilyMap.test_insert_multiget2') missing_key = 'TestColumnFamilyMap.test_insert_multiget3' self.map.insert(instance1) self.map.insert(instance2) rows = self.map.multiget([instance1.key, instance2.key, missing_key]) assert len(rows) == 2 assert rows[instance1.key] == instance1 assert rows[instance2.key] == instance2 assert missing_key not in rows assert self.empty_map.multiget([instance1.key])[instance1.key].raw_columns['intstrcol'] == str(instance1.intstrcol) def test_insert_get_count(self): instance = self.instance('TestColumnFamilyMap.test_insert_get_count') self.map.insert(instance) assert self.map.get_count(instance.key) == 7 def test_insert_get_range(self): instances = [] for i in xrange(5): instance = self.instance('TestColumnFamilyMap.test_insert_get_range%s' % i) instances.append(instance) for instance in instances: self.map.insert(instance) rows = list(self.map.get_range(start=instances[0].key, finish=instances[-1].key)) assert len(rows) == len(instances) assert rows == instances assert list(self.empty_map.get_range(start=instances[0].key, finish=instances[0].key))[0].raw_columns['intstrcol'] == str(instances[0].intstrcol) def test_remove(self): instance = self.instance('TestColumnFamilyMap.test_remove') self.map.insert(instance) self.map.remove(instance) assert_raises(NotFoundException, self.map.get, instance.key) def test_does_not_insert_extra_column(self): instance = self.instance('TestColumnFamilyMap.test_does_not_insert_extra_column') instance.othercol = 'Test' self.map.insert(instance) get_instance = self.map.get(instance.key) assert get_instance.strcol == instance.strcol assert get_instance.intcol == instance.intcol assert get_instance.floatcol == instance.floatcol assert get_instance.datetimecol == instance.datetimecol assert_raises(AttributeError, getattr, get_instance, 'othercol') def test_has_defaults(self): key = 'TestColumnFamilyMap.test_has_defaults' self.cf.insert(key, {'strcol': '1'}) instance = self.map.get(key) assert instance.intcol == TestUTF8.intcol.default assert instance.floatcol == TestUTF8.floatcol.default assert instance.datetimecol == TestUTF8.datetimecol.default assert instance.intstrcol == TestUTF8.intstrcol.default assert instance.floatstrcol == TestUTF8.floatstrcol.default assert instance.datetimestrcol == TestUTF8.datetimestrcol.default
class TestColumnFamily(unittest.TestCase): def setUp(self): credentials = {'username': '******', 'password': '******'} self.pool = ConnectionPool(keyspace='Keyspace1', credentials=credentials) self.cf = ColumnFamily(self.pool, 'Standard2', dict_class=TestDict) def tearDown(self): for key, columns in self.cf.get_range(): self.cf.remove(key) def test_empty(self): key = 'TestColumnFamily.test_empty' assert_raises(NotFoundException, self.cf.get, key) assert_equal(len(self.cf.multiget([key])), 0) for key, columns in self.cf.get_range(): assert_equal(len(columns), 0) def test_insert_get(self): key = 'TestColumnFamily.test_insert_get' columns = {'1': 'val1', '2': 'val2'} assert_raises(NotFoundException, self.cf.get, key) self.cf.insert(key, columns) assert_equal(self.cf.get(key), columns) def test_insert_multiget(self): key1 = 'TestColumnFamily.test_insert_multiget1' columns1 = {'1': 'val1', '2': 'val2'} key2 = 'test_insert_multiget1' columns2 = {'3': 'val1', '4': 'val2'} missing_key = 'key3' self.cf.insert(key1, columns1) self.cf.insert(key2, columns2) rows = self.cf.multiget([key1, key2, missing_key]) assert_equal(len(rows), 2) assert_equal(rows[key1], columns1) assert_equal(rows[key2], columns2) assert_true(missing_key not in rows) def test_insert_get_count(self): key = 'TestColumnFamily.test_insert_get_count' columns = {'1': 'val1', '2': 'val2'} self.cf.insert(key, columns) assert_equal(self.cf.get_count(key), 2) assert_equal(self.cf.get_count(key, column_start='1'), 2) assert_equal(self.cf.get_count(key, column_finish='2'), 2) assert_equal(self.cf.get_count(key, column_start='1', column_finish='2'), 2) assert_equal(self.cf.get_count(key, column_start='1', column_finish='1'), 1) assert_equal(self.cf.get_count(key, columns=['1','2']), 2) assert_equal(self.cf.get_count(key, columns=['1']), 1) def test_insert_multiget_count(self): keys = ['TestColumnFamily.test_insert_multiget_count1', 'TestColumnFamily.test_insert_multiget_count2', 'TestColumnFamily.test_insert_multiget_count3'] columns = {'1': 'val1', '2': 'val2'} for key in keys: self.cf.insert(key, columns) result = self.cf.multiget_count(keys) assert_equal(result[keys[0]], 2) assert_equal(result[keys[1]], 2) assert_equal(result[keys[2]], 2) result = self.cf.multiget_count(keys, column_start='1') assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, column_finish='2') assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, column_start='1', column_finish='2') assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, column_start='1', column_finish='1') assert_equal(len(result), 3) assert_equal(result[keys[0]], 1) result = self.cf.multiget_count(keys, columns=['1','2']) assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, columns=['1']) assert_equal(len(result), 3) assert_equal(result[keys[0]], 1) def test_insert_get_range(self): keys = ['TestColumnFamily.test_insert_get_range%s' % i for i in xrange(5)] columns = {'1': 'val1', '2': 'val2'} for key in keys: self.cf.insert(key, columns) rows = list(self.cf.get_range(start=keys[0], finish=keys[-1])) assert_equal(len(rows), len(keys)) for i, (k, c) in enumerate(rows): assert_equal(k, keys[i]) assert_equal(c, columns) def test_get_range_batching(self): self.cf.truncate() keys = [] columns = {'c': 'v'} for i in range(100, 201): keys.append('key%d' % i) self.cf.insert('key%d' % i, columns) for i in range(201, 301): self.cf.insert('key%d' % i, columns) count = 0 for (k,v) in self.cf.get_range(row_count=100, buffer_size=10): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 100) count = 0 for (k,v) in self.cf.get_range(row_count=100, buffer_size=1000): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 100) count = 0 for (k,v) in self.cf.get_range(row_count=100, buffer_size=150): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 100) count = 0 for (k,v) in self.cf.get_range(row_count=100, buffer_size=7): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 100) count = 0 for (k,v) in self.cf.get_range(row_count=100, buffer_size=2): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 100) # Put the remaining keys in our list for i in range(201, 301): keys.append('key%d' % i) count = 0 for (k,v) in self.cf.get_range(row_count=10000, buffer_size=2): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) count = 0 for (k,v) in self.cf.get_range(row_count=10000, buffer_size=7): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) count = 0 for (k,v) in self.cf.get_range(row_count=10000, buffer_size=200): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) count = 0 for (k,v) in self.cf.get_range(row_count=10000, buffer_size=10000): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) # Don't give a row count count = 0 for (k,v) in self.cf.get_range(buffer_size=2): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) count = 0 for (k,v) in self.cf.get_range(buffer_size=77): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) count = 0 for (k,v) in self.cf.get_range(buffer_size=200): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) count = 0 for (k,v) in self.cf.get_range(buffer_size=10000): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) self.cf.truncate() def insert_insert_get_indexed_slices(self): indexed_cf = ColumnFamily(self.pool, 'Indexed1') columns = {'birthdate': 1L} keys = [] for i in range(1,4): indexed_cf.insert('key%d' % i, columns) keys.append('key%d') expr = index.create_index_expression(column_name='birthdate', value=1L) clause = index.create_index_clause([expr]) count = 0 for key,cols in indexed_cf.get_indexed_slices(clause): assert_equal(cols, columns) assert key in keys count += 1 assert_equal(count, 3) def test_get_indexed_slices_batching(self): indexed_cf = ColumnFamily(self.pool, 'Indexed1') columns = {'birthdate': 1L} for i in range(200): indexed_cf.insert('key%d' % i, columns) expr = index.create_index_expression(column_name='birthdate', value=1L) clause = index.create_index_clause([expr], count=10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=2)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=10)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=77)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=200)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=1000)) assert_equal(len(result), 10) clause = index.create_index_clause([expr], count=250) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=2)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=10)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=77)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=200)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=1000)) assert_equal(len(result), 200) def test_remove(self): key = 'TestColumnFamily.test_remove' columns = {'1': 'val1', '2': 'val2'} self.cf.insert(key, columns) self.cf.remove(key, columns=['2']) del columns['2'] assert_equal(self.cf.get(key), {'1': 'val1'}) self.cf.remove(key) assert_raises(NotFoundException, self.cf.get, key) def test_dict_class(self): key = 'TestColumnFamily.test_dict_class' self.cf.insert(key, {'1': 'val1'}) assert isinstance(self.cf.get(key), TestDict)
class TestColumnFamily: def setUp(self): credentials = {'username': '******', 'password': '******'} self.client = connect('Keyspace1', credentials=credentials) self.cf = ColumnFamily(self.client, 'Standard2', write_consistency_level=ConsistencyLevel.ONE, buffer_size=2, timestamp=self.timestamp, dict_class=TestDict) try: self.timestamp_n = int(self.cf.get('meta')['timestamp']) except NotFoundException: self.timestamp_n = 0 self.clear() def tearDown(self): self.cf.insert('meta', {'timestamp': str(self.timestamp_n)}) # Since the timestamp passed to Cassandra will be in the same second # with the default timestamp function, causing problems with removing # and inserting (Cassandra doesn't know which is later), we supply our own def timestamp(self): self.timestamp_n += 1 return self.timestamp_n def clear(self): for key, columns in self.cf.get_range(include_timestamp=True): for value, timestamp in columns.itervalues(): self.timestamp_n = max(self.timestamp_n, timestamp) self.cf.remove(key) def test_empty(self): key = 'TestColumnFamily.test_empty' assert_raises(NotFoundException, self.cf.get, key) assert len(self.cf.multiget([key])) == 0 for key, columns in self.cf.get_range(): assert len(columns) == 0 def test_insert_get(self): key = 'TestColumnFamily.test_insert_get' columns = {'1': 'val1', '2': 'val2'} assert_raises(NotFoundException, self.cf.get, key) self.cf.insert(key, columns) assert self.cf.get(key) == columns def test_insert_multiget(self): key1 = 'TestColumnFamily.test_insert_multiget1' columns1 = {'1': 'val1', '2': 'val2'} key2 = 'test_insert_multiget1' columns2 = {'3': 'val1', '4': 'val2'} missing_key = 'key3' self.cf.insert(key1, columns1) self.cf.insert(key2, columns2) rows = self.cf.multiget([key1, key2, missing_key]) assert len(rows) == 2 assert rows[key1] == columns1 assert rows[key2] == columns2 assert missing_key not in rows def test_insert_get_count(self): key = 'TestColumnFamily.test_insert_get_count' columns = {'1': 'val1', '2': 'val2'} self.cf.insert(key, columns) assert self.cf.get_count(key) == 2 assert_equal(self.cf.get_count(key, column_start='1'), 2) assert_equal(self.cf.get_count(key, column_finish='2'), 2) assert_equal( self.cf.get_count(key, column_start='1', column_finish='2'), 2) assert_equal( self.cf.get_count(key, column_start='1', column_finish='1'), 1) assert_equal(self.cf.get_count(key, columns=['1', '2']), 2) assert_equal(self.cf.get_count(key, columns=['1']), 1) def test_insert_multiget_count(self): keys = [ 'TestColumnFamily.test_insert_multiget_count1', 'TestColumnFamily.test_insert_multiget_count2', 'TestColumnFamily.test_insert_multiget_count3' ] columns = {'1': 'val1', '2': 'val2'} for key in keys: self.cf.insert(key, columns) result = self.cf.multiget_count(keys) assert_equal(result[keys[0]], 2) assert_equal(result[keys[1]], 2) assert_equal(result[keys[2]], 2) result = self.cf.multiget_count(keys, column_start='1') assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, column_finish='2') assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, column_start='1', column_finish='2') assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, column_start='1', column_finish='1') assert_equal(len(result), 3) assert_equal(result[keys[0]], 1) result = self.cf.multiget_count(keys, columns=['1', '2']) assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, columns=['1']) assert_equal(len(result), 3) assert_equal(result[keys[0]], 1) def test_insert_get_range(self): keys = [ 'TestColumnFamily.test_insert_get_range%s' % i for i in xrange(5) ] columns = {'1': 'val1', '2': 'val2'} for key in keys: self.cf.insert(key, columns) rows = list(self.cf.get_range(start=keys[0], finish=keys[-1])) assert len(rows) == len(keys) for i, (k, c) in enumerate(rows): assert k == keys[i] assert c == columns def test_insert_get_indexed_slices(self): indexed_cf = ColumnFamily(self.client, 'Indexed1') columns = {'birthdate': 1L} key = 'key1' indexed_cf.insert(key, columns, write_consistency_level=ConsistencyLevel.ONE) key = 'key2' indexed_cf.insert(key, columns, write_consistency_level=ConsistencyLevel.ONE) key = 'key3' indexed_cf.insert(key, columns, write_consistency_level=ConsistencyLevel.ONE) expr = index.create_index_expression(column_name='birthdate', value=1L) clause = index.create_index_clause([expr]) result = indexed_cf.get_indexed_slices(clause) assert len(result) == 3 assert result.get('key1') == columns assert result.get('key2') == columns assert result.get('key3') == columns def test_remove(self): key = 'TestColumnFamily.test_remove' columns = {'1': 'val1', '2': 'val2'} self.cf.insert(key, columns) self.cf.remove(key, columns=['2']) del columns['2'] assert self.cf.get(key) == {'1': 'val1'} self.cf.remove(key) assert_raises(NotFoundException, self.cf.get, key) def test_dict_class(self): key = 'TestColumnFamily.test_dict_class' self.cf.insert(key, {'1': 'val1'}) assert isinstance(self.cf.get(key), TestDict)
count = 0 try: os.mkdir('output') except Exception, e: pass for keyspace in self.sysmanager.list_keyspaces(): if (keyspace != 'system'): #check to skip the system database pool = ConnectionPool(keyspace,[self.address]) columnfamilies = self.sysmanager.get_keyspace_column_families(keyspace) result = {} relt['cols']=[] # iterate through all the column family for columnfamilyname in columnfamilies.keys(): #result[keyspace][columnfamilyname]=[] colfamily = ColumnFamily(pool,columnfamilyname) cols = colfamily.get_range(column_reversed=True) result['keyspace']=keyspace result['columnfamily']= columnfamilyname for col in cols: result['cols'].append(col) count = count + 1 # check count if it 10000 then reset it flush result if (count == 100000): filename ="%s.out" % str(uuid1()).replace("-","") filepath = "output/%s" % filename file = open(filepath,"w") pickle.dump(result, file, protocol=-1) file.close() #reset cols print sys.getsizeof(result)#in mb result['cols']=[]
class CassandraImporter: def __init__(self): parser = argparse.ArgumentParser(description="Process some integers.") parser.add_argument( "-s", "--source", help="Generally the prod cassandra path, list of machines: \ localhost:9162 localhost:9163", nargs="*", required=True, ) parser.add_argument( "-d", "--destination", help="Cassandra path where you need your data: \ localhost:9160 localhost:9161", nargs="*", required=True, ) parser.add_argument("-ks", "--keyspace", help="The keyspace: myks", required=True) parser.add_argument("-cf", "--column_family", help="The Column family: mycf", required=True) parser.add_argument("-k", "--key", help="A specific key to be imported", required=False) parser.add_argument("-c", "--count", help="Total count of keys to be imported", required=False) parser.add_argument("-a", "--all", action="store_true", help="Get all. Not recommended!", required=False) args = vars(parser.parse_args()) """Connection setting with cassandra The script is meant to sync data. So source and destination KS and CF shold be the same.""" try: source_pool = ConnectionPool(args["keyspace"], args["source"]) destination_pool = ConnectionPool(args["keyspace"], args["destination"]) self.source_cf = ColumnFamily(source_pool, args["column_family"]) self.source_cf.autopack_names = False self.source_cf.autopack_values = False self.source_cf.autopack_keys = False self.source_cf.default_validation_class = pycassa.types.UTF8Type() self.destination_cf = ColumnFamily(destination_pool, args["column_family"]) self.destination_cf.autopack_names = False self.destination_cf.autopack_values = False self.destination_cf.autopack_keys = False self.destination_cf.default_validation_class = pycassa.types.UTF8Type() except Exception as e: print "ERROR: The keyspace or the column family does not exist or request is timing out!" sys.exit() # Optional data self.count = args["count"] if self.count: self.count = int(self.count) self.key = args["key"] self.all = args["all"] def importData(self): data = dict() # Get columns for a key if self.key: column_data = self.source_cf.get(self.key) data[self.key] = column_data # Get last x keys and their columns elif self.count: counter = 0 error_count = 0 for value in self.source_cf.get_range(column_count=0, filter_empty=False): if counter < self.count: try: column_data = self.source_cf.get(value[0], column_count=100) data[value[0]] = column_data counter += 1 except NotFoundException: # Ignore keys with empty columns pass except Exception: error_count += 1 if error_count > 10: # Write the read data self.insertData(data) print "ERROR: Remote cassandra is too slow to read, exiting after writing..." sys.exit() # Use this to throttle reads from cassandra time.sleep(0.2) else: break # Get All, Not recommended elif self.all: for value in self.source_cf.get_range(column_count=0, filter_empty=False): column_data = self.source_cf.get(value[0]) key = value[0] data[key] = column_data else: print "Please pass -c or -k or -a arguments!" return data def insertData(self, data): print "Writing " + str(len(data.keys())) + " keys" for key, value in data.iteritems(): self.destination_cf.insert(key, value) def run(self): self.update_progress(0) data = self.importData() self.update_progress(50) self.insertData(data) self.update_progress(100) print "Import complete!" def update_progress(self, progress): print "\r[{0}] {1}%".format("#" * (progress / 10), progress)
class TestTimeUUIDs(unittest.TestCase): def setUp(self): credentials = {"username": "******", "password": "******"} self.pool = ConnectionPool(pool_size=5, keyspace="Keyspace1", credentials=credentials) self.cf_time = ColumnFamily(self.pool, "StdTimeUUID") def tearDown(self): for key, cols in self.cf_time.get_range(): self.cf_time.remove(key) self.pool.dispose() def test_datetime_to_uuid(self): key = "key1" timeline = [] timeline.append(datetime.now()) time1 = uuid1() col1 = {time1: "0"} self.cf_time.insert(key, col1) time.sleep(1) timeline.append(datetime.now()) time2 = uuid1() col2 = {time2: "1"} self.cf_time.insert(key, col2) time.sleep(1) timeline.append(datetime.now()) cols = {time1: "0", time2: "1"} assert_equal(self.cf_time.get(key, column_start=timeline[0]), cols) assert_equal(self.cf_time.get(key, column_finish=timeline[2]), cols) assert_equal(self.cf_time.get(key, column_start=timeline[0], column_finish=timeline[2]), cols) assert_equal(self.cf_time.get(key, column_start=timeline[0], column_finish=timeline[2]), cols) assert_equal(self.cf_time.get(key, column_start=timeline[0], column_finish=timeline[1]), col1) assert_equal(self.cf_time.get(key, column_start=timeline[1], column_finish=timeline[2]), col2) def test_time_to_uuid(self): key = "key1" timeline = [] timeline.append(time.time()) time1 = uuid1() col1 = {time1: "0"} self.cf_time.insert(key, col1) time.sleep(0.1) timeline.append(time.time()) time2 = uuid1() col2 = {time2: "1"} self.cf_time.insert(key, col2) time.sleep(0.1) timeline.append(time.time()) cols = {time1: "0", time2: "1"} assert_equal(self.cf_time.get(key, column_start=timeline[0]), cols) assert_equal(self.cf_time.get(key, column_finish=timeline[2]), cols) assert_equal(self.cf_time.get(key, column_start=timeline[0], column_finish=timeline[2]), cols) assert_equal(self.cf_time.get(key, column_start=timeline[0], column_finish=timeline[2]), cols) assert_equal(self.cf_time.get(key, column_start=timeline[0], column_finish=timeline[1]), col1) assert_equal(self.cf_time.get(key, column_start=timeline[1], column_finish=timeline[2]), col2) def test_auto_time_to_uuid1(self): key = "key" t = time.time() col = {t: "foo"} self.cf_time.insert(key, col) uuid_res = self.cf_time.get(key).keys()[0] timestamp = convert_uuid_to_time(uuid_res) assert_almost_equal(timestamp, t, places=3)
def tearDown(self): pool = ConnectionPool('PycassaTestKeyspace') cf = ColumnFamily(pool, 'Standard1') for key, cols in cf.get_range(): cf.remove(key)
class TestColumnFamilyMap: def setUp(self): self.client = connect() self.client.login('Keyspace1', { 'username': '******', 'password': '******' }) self.cf = ColumnFamily(self.client, 'Keyspace1', 'Standard2', write_consistency_level=ConsistencyLevel.ONE, timestamp=self.timestamp) self.map = ColumnFamilyMap(TestUTF8, self.cf) self.empty_map = ColumnFamilyMap(TestEmpty, self.cf, raw_columns=True) try: self.timestamp_n = int(self.cf.get('meta')['timestamp']) except NotFoundException: self.timestamp_n = 0 self.clear() def tearDown(self): self.cf.insert('meta', {'timestamp': str(self.timestamp_n)}) # Since the timestamp passed to Cassandra will be in the same second # with the default timestamp function, causing problems with removing # and inserting (Cassandra doesn't know which is later), we supply our own def timestamp(self): self.timestamp_n += 1 return self.timestamp_n def clear(self): for key, columns in self.cf.get_range(include_timestamp=True): for value, timestamp in columns.itervalues(): self.timestamp_n = max(self.timestamp_n, timestamp) self.cf.remove(key) def instance(self, key): instance = TestUTF8() instance.key = key instance.strcol = '1' instance.intcol = 2 instance.floatcol = 3.5 instance.datetimecol = datetime.now().replace(microsecond=0) instance.intstrcol = 8 instance.floatstrcol = 4.6 instance.datetimestrcol = datetime.now().replace(microsecond=0) return instance def test_will_not_insert_none(self): for column in ('strcol', 'intcol', 'floatcol', 'datetimecol', 'intstrcol', 'floatstrcol', 'datetimestrcol'): instance = self.instance( 'TestColumnFamilyMap.test_will_not_insert_none') setattr(instance, column, None) assert_raises(TypeError, self.map.insert, instance) def test_empty(self): key = 'TestColumnFamilyMap.test_empty' assert_raises(NotFoundException, self.map.get, key) assert len(self.map.multiget([key])) == 0 def test_insert_get(self): instance = self.instance('TestColumnFamilyMap.test_insert_get') assert_raises(NotFoundException, self.map.get, instance.key) self.map.insert(instance) assert self.map.get(instance.key) == instance assert self.empty_map.get( instance.key).raw_columns['intstrcol'] == str(instance.intstrcol) def test_insert_multiget(self): instance1 = self.instance('TestColumnFamilyMap.test_insert_multiget1') instance2 = self.instance('TestColumnFamilyMap.test_insert_multiget2') missing_key = 'TestColumnFamilyMap.test_insert_multiget3' self.map.insert(instance1) self.map.insert(instance2) rows = self.map.multiget([instance1.key, instance2.key, missing_key]) assert len(rows) == 2 assert rows[instance1.key] == instance1 assert rows[instance2.key] == instance2 assert missing_key not in rows assert self.empty_map.multiget([ instance1.key ])[instance1.key].raw_columns['intstrcol'] == str(instance1.intstrcol) def test_insert_get_count(self): instance = self.instance('TestColumnFamilyMap.test_insert_get_count') self.map.insert(instance) assert self.map.get_count(instance.key) == 7 def test_insert_get_range(self): instances = [] for i in xrange(5): instance = self.instance( 'TestColumnFamilyMap.test_insert_get_range%s' % i) instances.append(instance) for instance in instances: self.map.insert(instance) rows = list( self.map.get_range(start=instances[0].key, finish=instances[-1].key)) assert len(rows) == len(instances) assert rows == instances assert list( self.empty_map.get_range( start=instances[0].key, finish=instances[0].key))[0].raw_columns['intstrcol'] == str( instances[0].intstrcol) def test_remove(self): instance = self.instance('TestColumnFamilyMap.test_remove') self.map.insert(instance) self.map.remove(instance) assert_raises(NotFoundException, self.map.get, instance.key) def test_does_not_insert_extra_column(self): instance = self.instance( 'TestColumnFamilyMap.test_does_not_insert_extra_column') instance.othercol = 'Test' self.map.insert(instance) get_instance = self.map.get(instance.key) assert get_instance.strcol == instance.strcol assert get_instance.intcol == instance.intcol assert get_instance.floatcol == instance.floatcol assert get_instance.datetimecol == instance.datetimecol assert_raises(AttributeError, getattr, get_instance, 'othercol') def test_has_defaults(self): key = 'TestColumnFamilyMap.test_has_defaults' self.cf.insert(key, {'strcol': '1'}) instance = self.map.get(key) assert instance.intcol == TestUTF8.intcol.default assert instance.floatcol == TestUTF8.floatcol.default assert instance.datetimecol == TestUTF8.datetimecol.default assert instance.intstrcol == TestUTF8.intstrcol.default assert instance.floatstrcol == TestUTF8.floatstrcol.default assert instance.datetimestrcol == TestUTF8.datetimestrcol.default
class TestColumnFamilyMap: def setUp(self): credentials = {'username': '******', 'password': '******'} self.pool = ConnectionPool(keyspace='Keyspace1', credentials=credentials) self.cf = ColumnFamily(self.pool, 'Standard2', autopack_names=False, autopack_values=False) self.indexed_cf = ColumnFamily(self.pool, 'Indexed1', autopack_names=False, autopack_values=False) self.map = ColumnFamilyMap(TestUTF8, self.cf) self.indexed_map = ColumnFamilyMap(TestIndex, self.indexed_cf) self.empty_map = ColumnFamilyMap(TestEmpty, self.cf, raw_columns=True) def tearDown(self): for key, columns in self.cf.get_range(): self.cf.remove(key) for key, columns in self.indexed_cf.get_range(): self.cf.remove(key) def instance(self, key): instance = TestUTF8() instance.key = key instance.strcol = '1' instance.intcol = 2 instance.floatcol = 3.5 instance.datetimecol = datetime.now().replace(microsecond=0) instance.intstrcol = 8 instance.floatstrcol = 4.6 instance.datetimestrcol = datetime.now().replace(microsecond=0) return instance def test_empty(self): key = 'TestColumnFamilyMap.test_empty' assert_raises(NotFoundException, self.map.get, key) assert_equal(len(self.map.multiget([key])), 0) def test_insert_get(self): instance = self.instance('TestColumnFamilyMap.test_insert_get') assert_raises(NotFoundException, self.map.get, instance.key) self.map.insert(instance) assert_equal(self.map.get(instance.key), instance) assert_equal(self.empty_map.get(instance.key).raw_columns['intstrcol'], str(instance.intstrcol)) def test_insert_get_indexed_slices(self): instance = TestIndex() instance.key = 'key' instance.birthdate = 1L self.indexed_map.insert(instance) instance.key = 'key2' self.indexed_map.insert(instance) instance.key = 'key3' self.indexed_map.insert(instance) expr = index.create_index_expression(column_name='birthdate', value=1L) clause = index.create_index_clause([expr]) result = self.indexed_map.get_indexed_slices(instance, index_clause=clause) assert_equal(len(result), 3) assert_equal(result.get('key3'), instance) def test_insert_multiget(self): instance1 = self.instance('TestColumnFamilyMap.test_insert_multiget1') instance2 = self.instance('TestColumnFamilyMap.test_insert_multiget2') missing_key = 'TestColumnFamilyMap.test_insert_multiget3' self.map.insert(instance1) self.map.insert(instance2) rows = self.map.multiget([instance1.key, instance2.key, missing_key]) assert_equal(len(rows), 2) assert_equal(rows[instance1.key], instance1) assert_equal(rows[instance2.key], instance2) assert_true(missing_key not in rows) assert_equal(self.empty_map.multiget([instance1.key])[instance1.key].raw_columns['intstrcol'], str(instance1.intstrcol)) def test_insert_get_count(self): instance = self.instance('TestColumnFamilyMap.test_insert_get_count') self.map.insert(instance) assert_equal(self.map.get_count(instance.key), 7) def test_insert_get_range(self): instances = [] for i in xrange(5): instance = self.instance('TestColumnFamilyMap.test_insert_get_range%s' % i) instances.append(instance) for instance in instances: self.map.insert(instance) rows = list(self.map.get_range(start=instances[0].key, finish=instances[-1].key)) assert_equal(len(rows), len(instances)) assert_equal(rows, instances) assert_equal(list(self.empty_map.get_range(start=instances[0].key, finish=instances[0].key))[0].raw_columns['intstrcol'], str(instances[0].intstrcol)) def test_remove(self): instance = self.instance('TestColumnFamilyMap.test_remove') self.map.insert(instance) self.map.remove(instance) assert_raises(NotFoundException, self.map.get, instance.key) def test_does_not_insert_extra_column(self): instance = self.instance('TestColumnFamilyMap.test_does_not_insert_extra_column') instance.othercol = 'Test' self.map.insert(instance) get_instance = self.map.get(instance.key) assert_equal(get_instance.strcol, instance.strcol) assert_equal(get_instance.intcol, instance.intcol) assert_equal(get_instance.floatcol, instance.floatcol) assert_equal(get_instance.datetimecol, instance.datetimecol) assert_raises(AttributeError, getattr, get_instance, 'othercol') def test_has_defaults(self): key = 'TestColumnFamilyMap.test_has_defaults' self.cf.insert(key, {'strcol': '1'}) instance = self.map.get(key) assert_equal(instance.intcol, TestUTF8.intcol.default) assert_equal(instance.floatcol, TestUTF8.floatcol.default) assert_equal(instance.datetimecol, TestUTF8.datetimecol.default) assert_equal(instance.intstrcol, TestUTF8.intstrcol.default) assert_equal(instance.floatstrcol, TestUTF8.floatstrcol.default) assert_equal(instance.datetimestrcol, TestUTF8.datetimestrcol.default)
class TestMutator(unittest.TestCase): def setUp(self): credentials = {"username": "******", "password": "******"} self.pool = ConnectionPool(keyspace="Keyspace1", credentials=credentials) self.cf = ColumnFamily(self.pool, "Standard2") self.scf = ColumnFamily(self.pool, "Super1") def tearDown(self): for key, cols in self.cf.get_range(): self.cf.remove(key) for key, cols in self.scf.get_range(): self.scf.remove(key) def test_insert(self): batch = self.cf.batch() for key, cols in ROWS.iteritems(): batch.insert(key, cols) batch.send() for key, cols in ROWS.items(): assert self.cf.get(key) == cols def test_insert_supercolumns(self): batch = self.scf.batch() batch.insert("one", ROWS) batch.insert("two", ROWS) batch.insert("three", ROWS) batch.send() assert self.scf.get("one") == ROWS assert self.scf.get("two") == ROWS assert self.scf.get("three") == ROWS def test_queue_size(self): batch = self.cf.batch(queue_size=2) batch.insert("1", ROWS["1"]) batch.insert("2", ROWS["2"]) batch.insert("3", ROWS["3"]) assert self.cf.get("1") == ROWS["1"] assert_raises(NotFoundException, self.cf.get, "3") batch.send() for key, cols in ROWS.items(): assert self.cf.get(key) == cols def test_remove_key(self): batch = self.cf.batch() batch.insert("1", ROWS["1"]) batch.remove("1") batch.send() assert_raises(NotFoundException, self.cf.get, "1") def test_remove_columns(self): batch = self.cf.batch() batch.insert("1", {"a": "123", "b": "123"}) batch.remove("1", ["a"]) batch.send() assert self.cf.get("1") == {"b": "123"} def test_remove_supercolumns(self): batch = self.scf.batch() batch.insert("one", ROWS) batch.insert("two", ROWS) batch.insert("three", ROWS) batch.remove("two", ["b"], "2") batch.send() assert self.scf.get("one") == ROWS assert self.scf.get("two")["2"] == {"a": "234"} assert self.scf.get("three") == ROWS def test_chained(self): batch = self.cf.batch() batch.insert("1", ROWS["1"]).insert("2", ROWS["2"]).insert("3", ROWS["3"]).send() assert self.cf.get("1") == ROWS["1"] assert self.cf.get("2") == ROWS["2"] assert self.cf.get("3") == ROWS["3"] def test_contextmgr(self): if sys.version_info < (2, 5): raise SkipTest("No context managers in Python < 2.5") exec """with self.cf.batch(queue_size=2) as b: b.insert('1', ROWS['1']) b.insert('2', ROWS['2']) b.insert('3', ROWS['3']) assert self.cf.get('3') == ROWS['3']""" def test_multi_column_family(self): batch = batch_mod.Mutator(self.pool) cf2 = self.cf batch.insert(self.cf, "1", ROWS["1"]) batch.insert(self.cf, "2", ROWS["2"]) batch.remove(cf2, "1", ROWS["1"]) batch.send() assert self.cf.get("2") == ROWS["2"] assert_raises(NotFoundException, self.cf.get, "1")