def get_connection(): """ Creates a connection to Cassandra. Returs: pool """ cassandra_host = os.environ.get('CASSANDRA_HOST', 'localhost') sys_mgr = SystemManager() try: sys_mgr.describe_ring(KEYSPACE) except: sys_mgr.create_keyspace(KEYSPACE, SIMPLE_STRATEGY, {'replication_factor': '1'}) pool = ConnectionPool(KEYSPACE, server_list=[cassandra_host]) for cf_name in [CF_LOGS, CF_LOGS_BY_APP, CF_LOGS_BY_HOST, CF_LOGS_BY_SEVERITY]: try: cf = ColumnFamily(pool, cf_name) except: sys_mgr.create_column_family(KEYSPACE, cf_name, comparator_type=TimeUUIDType()) cf = ColumnFamily(pool, cf_name) cf.get_count(str(uuid.uuid4())) sys_mgr.close() return pool
def create_cfs(self): """ Creates the Cassandra Column Families (if not exist) """ sys_mgr = None pool = None try: sys_mgr = SystemManager() pool = ConnectionPool(settings.KEYSPACE, server_list=settings.CASSANDRA_HOSTS) for cf_name in [ CF_LOGS, CF_LOGS_BY_APP, CF_LOGS_BY_HOST, CF_LOGS_BY_SEVERITY ]: try: cf = ColumnFamily(pool, cf_name) except: logger.info("create_cfs(): Creating column family %s", cf_name) sys_mgr.create_column_family( settings.KEYSPACE, cf_name, comparator_type=TimeUUIDType()) cf = ColumnFamily(pool, cf_name) cf.get_count(str(uuid.uuid4())) finally: if pool: pool.dispose() if sys_mgr: sys_mgr.close()
def create_cfs(self): """ Creates the Cassandra Column Families (if not exist) """ sys_mgr = None pool = None try: sys_mgr = SystemManager() pool = ConnectionPool(settings.KEYSPACE, server_list=settings.CASSANDRA_HOSTS) for cf_name in [CF_LOGS, CF_LOGS_BY_APP, CF_LOGS_BY_HOST, CF_LOGS_BY_SEVERITY]: try: cf = ColumnFamily(pool, cf_name) except: logger.info("create_cfs(): Creating column family %s", cf_name) sys_mgr.create_column_family(settings.KEYSPACE, cf_name, comparator_type=TimeUUIDType()) cf = ColumnFamily(pool, cf_name) cf.get_count(str(uuid.uuid4())) finally: if pool: pool.dispose() if sys_mgr: sys_mgr.close()
class Dictionary: """ Nhóm chức năng từ điển: * Tra từ Anh-Việt * Tra từ Việt-Anh """ def __init__(self): # Connect to Cassandra servers client = connect(cassandra_hosts) self.d = ColumnFamily(client, cassandra_keyspace, 'Dictionary', super=True) self.u = ColumnFamily(client, cassandra_keyspace, 'Users', super=True) self.e = Error() def _lookup(self, keyword, dict_type='en_vi'): try: return self.d.get(dict_type, super_column=str(keyword)) except (NotFoundException, InvalidRequestException): return None def lookup(self, environ): try: session_id = environ['request']['session_id'] except KeyError: return self.e.authen_error("Thiếu session_id") try: self.u.get('session_id', super_column=session_id) except (NotFoundException, InvalidRequestException): return self.e.authen_error() result = self._lookup(environ['request']['keyword']) result2 = self._lookup(environ['request']['keyword'], 'vi_en') result3 = self._lookup(environ['request']['keyword'], 'en_en') if (result is None) and (result2 is None) and (result3 is None): return self.e.not_found("Từ khóa bạn tìm không có trong từ điển") xml = [] if result is not None: xml.append('<result type="en_vi" keyword="%s" mean="%s" spell="%s" status_code="200"/>' \ % (xml_format(environ['request']['keyword']), xml_format(result['nghia']), xml_format(result['phien_am_quoc_te']))) if result2 is not None: xml.append('<result type="vi_en" keyword="%s" mean="%s" spell="" status_code="200"/>' \ % (xml_format(environ['request']['keyword']), xml_format(result2['nghia']))) return '\n\n'.join(xml) def total_words(self, dict_type='en_vi'): return self.d.get_count(dict_type)
class TestColumnFamily: def setUp(self): credentials = {'username': '******', 'password': '******'} self.client = connect('Keyspace1', credentials=credentials) self.cf = ColumnFamily(self.client, 'Standard2', write_consistency_level=ConsistencyLevel.ONE, buffer_size=2, timestamp=self.timestamp, dict_class=TestDict) try: self.timestamp_n = int(self.cf.get('meta')['timestamp']) except NotFoundException: self.timestamp_n = 0 self.clear() def tearDown(self): self.cf.insert('meta', {'timestamp': str(self.timestamp_n)}) # Since the timestamp passed to Cassandra will be in the same second # with the default timestamp function, causing problems with removing # and inserting (Cassandra doesn't know which is later), we supply our own def timestamp(self): self.timestamp_n += 1 return self.timestamp_n def clear(self): for key, columns in self.cf.get_range(include_timestamp=True): for value, timestamp in columns.itervalues(): self.timestamp_n = max(self.timestamp_n, timestamp) self.cf.remove(key) def test_empty(self): key = 'TestColumnFamily.test_empty' assert_raises(NotFoundException, self.cf.get, key) assert len(self.cf.multiget([key])) == 0 for key, columns in self.cf.get_range(): assert len(columns) == 0 def test_insert_get(self): key = 'TestColumnFamily.test_insert_get' columns = {'1': 'val1', '2': 'val2'} assert_raises(NotFoundException, self.cf.get, key) self.cf.insert(key, columns) assert self.cf.get(key) == columns def test_insert_multiget(self): key1 = 'TestColumnFamily.test_insert_multiget1' columns1 = {'1': 'val1', '2': 'val2'} key2 = 'test_insert_multiget1' columns2 = {'3': 'val1', '4': 'val2'} missing_key = 'key3' self.cf.insert(key1, columns1) self.cf.insert(key2, columns2) rows = self.cf.multiget([key1, key2, missing_key]) assert len(rows) == 2 assert rows[key1] == columns1 assert rows[key2] == columns2 assert missing_key not in rows def test_insert_get_count(self): key = 'TestColumnFamily.test_insert_get_count' columns = {'1': 'val1', '2': 'val2'} self.cf.insert(key, columns) assert self.cf.get_count(key) == 2 assert_equal(self.cf.get_count(key, column_start='1'), 2) assert_equal(self.cf.get_count(key, column_finish='2'), 2) assert_equal( self.cf.get_count(key, column_start='1', column_finish='2'), 2) assert_equal( self.cf.get_count(key, column_start='1', column_finish='1'), 1) assert_equal(self.cf.get_count(key, columns=['1', '2']), 2) assert_equal(self.cf.get_count(key, columns=['1']), 1) def test_insert_multiget_count(self): keys = [ 'TestColumnFamily.test_insert_multiget_count1', 'TestColumnFamily.test_insert_multiget_count2', 'TestColumnFamily.test_insert_multiget_count3' ] columns = {'1': 'val1', '2': 'val2'} for key in keys: self.cf.insert(key, columns) result = self.cf.multiget_count(keys) assert_equal(result[keys[0]], 2) assert_equal(result[keys[1]], 2) assert_equal(result[keys[2]], 2) result = self.cf.multiget_count(keys, column_start='1') assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, column_finish='2') assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, column_start='1', column_finish='2') assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, column_start='1', column_finish='1') assert_equal(len(result), 3) assert_equal(result[keys[0]], 1) result = self.cf.multiget_count(keys, columns=['1', '2']) assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, columns=['1']) assert_equal(len(result), 3) assert_equal(result[keys[0]], 1) def test_insert_get_range(self): keys = [ 'TestColumnFamily.test_insert_get_range%s' % i for i in xrange(5) ] columns = {'1': 'val1', '2': 'val2'} for key in keys: self.cf.insert(key, columns) rows = list(self.cf.get_range(start=keys[0], finish=keys[-1])) assert len(rows) == len(keys) for i, (k, c) in enumerate(rows): assert k == keys[i] assert c == columns def test_insert_get_indexed_slices(self): indexed_cf = ColumnFamily(self.client, 'Indexed1') columns = {'birthdate': 1L} key = 'key1' indexed_cf.insert(key, columns, write_consistency_level=ConsistencyLevel.ONE) key = 'key2' indexed_cf.insert(key, columns, write_consistency_level=ConsistencyLevel.ONE) key = 'key3' indexed_cf.insert(key, columns, write_consistency_level=ConsistencyLevel.ONE) expr = index.create_index_expression(column_name='birthdate', value=1L) clause = index.create_index_clause([expr]) result = indexed_cf.get_indexed_slices(clause) assert len(result) == 3 assert result.get('key1') == columns assert result.get('key2') == columns assert result.get('key3') == columns def test_remove(self): key = 'TestColumnFamily.test_remove' columns = {'1': 'val1', '2': 'val2'} self.cf.insert(key, columns) self.cf.remove(key, columns=['2']) del columns['2'] assert self.cf.get(key) == {'1': 'val1'} self.cf.remove(key) assert_raises(NotFoundException, self.cf.get, key) def test_dict_class(self): key = 'TestColumnFamily.test_dict_class' self.cf.insert(key, {'1': 'val1'}) assert isinstance(self.cf.get(key), TestDict)
class TestColumnFamily: def setUp(self): credentials = {'username': '******', 'password': '******'} self.client = connect('Keyspace1', credentials=credentials) self.cf = ColumnFamily(self.client, 'Standard2', write_consistency_level=ConsistencyLevel.ONE, buffer_size=2, timestamp=self.timestamp, dict_class=TestDict) try: self.timestamp_n = int(self.cf.get('meta')['timestamp']) except NotFoundException: self.timestamp_n = 0 self.clear() def tearDown(self): self.cf.insert('meta', {'timestamp': str(self.timestamp_n)}) # Since the timestamp passed to Cassandra will be in the same second # with the default timestamp function, causing problems with removing # and inserting (Cassandra doesn't know which is later), we supply our own def timestamp(self): self.timestamp_n += 1 return self.timestamp_n def clear(self): for key, columns in self.cf.get_range(include_timestamp=True): for value, timestamp in columns.itervalues(): self.timestamp_n = max(self.timestamp_n, timestamp) self.cf.remove(key) def test_empty(self): key = 'TestColumnFamily.test_empty' assert_raises(NotFoundException, self.cf.get, key) assert len(self.cf.multiget([key])) == 0 for key, columns in self.cf.get_range(): assert len(columns) == 0 def test_insert_get(self): key = 'TestColumnFamily.test_insert_get' columns = {'1': 'val1', '2': 'val2'} assert_raises(NotFoundException, self.cf.get, key) self.cf.insert(key, columns) assert self.cf.get(key) == columns def test_insert_multiget(self): key1 = 'TestColumnFamily.test_insert_multiget1' columns1 = {'1': 'val1', '2': 'val2'} key2 = 'test_insert_multiget1' columns2 = {'3': 'val1', '4': 'val2'} missing_key = 'key3' self.cf.insert(key1, columns1) self.cf.insert(key2, columns2) rows = self.cf.multiget([key1, key2, missing_key]) assert len(rows) == 2 assert rows[key1] == columns1 assert rows[key2] == columns2 assert missing_key not in rows def test_insert_get_count(self): key = 'TestColumnFamily.test_insert_get_count' columns = {'1': 'val1', '2': 'val2'} self.cf.insert(key, columns) assert self.cf.get_count(key) == 2 assert_equal(self.cf.get_count(key, column_start='1'), 2) assert_equal(self.cf.get_count(key, column_finish='2'), 2) assert_equal(self.cf.get_count(key, column_start='1', column_finish='2'), 2) assert_equal(self.cf.get_count(key, column_start='1', column_finish='1'), 1) assert_equal(self.cf.get_count(key, columns=['1','2']), 2) assert_equal(self.cf.get_count(key, columns=['1']), 1) def test_insert_multiget_count(self): keys = ['TestColumnFamily.test_insert_multiget_count1', 'TestColumnFamily.test_insert_multiget_count2', 'TestColumnFamily.test_insert_multiget_count3'] columns = {'1': 'val1', '2': 'val2'} for key in keys: self.cf.insert(key, columns) result = self.cf.multiget_count(keys) assert_equal(result[keys[0]], 2) assert_equal(result[keys[1]], 2) assert_equal(result[keys[2]], 2) result = self.cf.multiget_count(keys, column_start='1') assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, column_finish='2') assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, column_start='1', column_finish='2') assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, column_start='1', column_finish='1') assert_equal(len(result), 3) assert_equal(result[keys[0]], 1) result = self.cf.multiget_count(keys, columns=['1','2']) assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, columns=['1']) assert_equal(len(result), 3) assert_equal(result[keys[0]], 1) def test_insert_get_range(self): keys = ['TestColumnFamily.test_insert_get_range%s' % i for i in xrange(5)] columns = {'1': 'val1', '2': 'val2'} for key in keys: self.cf.insert(key, columns) rows = list(self.cf.get_range(start=keys[0], finish=keys[-1])) assert len(rows) == len(keys) for i, (k, c) in enumerate(rows): assert k == keys[i] assert c == columns def test_get_range_batching(self): self.cf.truncate() keys = [] columns = {'c': 'v'} for i in range(100, 201): keys.append('key%d' % i) self.cf.insert('key%d' % i, columns) for i in range(201, 301): self.cf.insert('key%d' % i, columns) count = 0 for (k,v) in self.cf.get_range(row_count=100, buffer_size=10): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 100) count = 0 for (k,v) in self.cf.get_range(row_count=100, buffer_size=1000): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 100) count = 0 for (k,v) in self.cf.get_range(row_count=100, buffer_size=150): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 100) count = 0 for (k,v) in self.cf.get_range(row_count=100, buffer_size=7): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 100) count = 0 for (k,v) in self.cf.get_range(row_count=100, buffer_size=2): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 100) # Put the remaining keys in our list for i in range(201, 301): keys.append('key%d' % i) count = 0 for (k,v) in self.cf.get_range(row_count=10000, buffer_size=2): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) count = 0 for (k,v) in self.cf.get_range(row_count=10000, buffer_size=7): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) count = 0 for (k,v) in self.cf.get_range(row_count=10000, buffer_size=200): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) count = 0 for (k,v) in self.cf.get_range(row_count=10000, buffer_size=10000): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) # Don't give a row count count = 0 for (k,v) in self.cf.get_range(buffer_size=2): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) count = 0 for (k,v) in self.cf.get_range(buffer_size=77): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) count = 0 for (k,v) in self.cf.get_range(buffer_size=200): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) count = 0 for (k,v) in self.cf.get_range(buffer_size=10000): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) self.cf.truncate() def insert_insert_get_indexed_slices(self): indexed_cf = ColumnFamily(self.client, 'Indexed1') columns = {'birthdate': 1L} keys = [] for i in range(1,4): indexed_cf.insert('key%d' % i, columns) keys.append('key%d') expr = index.create_index_expression(column_name='birthdate', value=1L) clause = index.create_index_clause([expr]) count = 0 for key,cols in indexed_cf.get_indexed_slices(clause): assert cols == columns assert key in keys count += 1 assert_equal(count, 3) def test_get_indexed_slices_batching(self): indexed_cf = ColumnFamily(self.client, 'Indexed1') columns = {'birthdate': 1L} for i in range(200): indexed_cf.insert('key%d' % i, columns) expr = index.create_index_expression(column_name='birthdate', value=1L) clause = index.create_index_clause([expr], count=10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=2)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=10)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=77)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=200)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=1000)) assert_equal(len(result), 10) clause = index.create_index_clause([expr], count=250) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=2)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=10)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=77)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=200)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=1000)) assert_equal(len(result), 200) def test_remove(self): key = 'TestColumnFamily.test_remove' columns = {'1': 'val1', '2': 'val2'} self.cf.insert(key, columns) self.cf.remove(key, columns=['2']) del columns['2'] assert self.cf.get(key) == {'1': 'val1'} self.cf.remove(key) assert_raises(NotFoundException, self.cf.get, key) def test_dict_class(self): key = 'TestColumnFamily.test_dict_class' self.cf.insert(key, {'1': 'val1'}) assert isinstance(self.cf.get(key), TestDict)
class TestColumnFamily: def setUp(self): self.client = connect() self.client.login('Keyspace1', {'username': '******', 'password': '******'}) self.cf = ColumnFamily(self.client, 'Keyspace1', 'Standard2', write_consistency_level=ConsistencyLevel.ONE, buffer_size=2, timestamp=self.timestamp, dict_class=TestDict) try: self.timestamp_n = int(self.cf.get('meta')['timestamp']) except NotFoundException: self.timestamp_n = 0 self.clear() def tearDown(self): self.cf.insert('meta', {'timestamp': str(self.timestamp_n)}) # Since the timestamp passed to Cassandra will be in the same second # with the default timestamp function, causing problems with removing # and inserting (Cassandra doesn't know which is later), we supply our own def timestamp(self): self.timestamp_n += 1 return self.timestamp_n def clear(self): for key, columns in self.cf.get_range(include_timestamp=True): for value, timestamp in columns.itervalues(): self.timestamp_n = max(self.timestamp_n, timestamp) self.cf.remove(key) def test_empty(self): key = 'TestColumnFamily.test_empty' assert_raises(NotFoundException, self.cf.get, key) assert len(self.cf.multiget([key])) == 0 for key, columns in self.cf.get_range(): assert len(columns) == 0 def test_insert_get(self): key = 'TestColumnFamily.test_insert_get' columns = {'1': 'val1', '2': 'val2'} assert_raises(NotFoundException, self.cf.get, key) self.cf.insert(key, columns) assert self.cf.get(key) == columns def test_insert_multiget(self): key1 = 'TestColumnFamily.test_insert_multiget1' columns1 = {'1': 'val1', '2': 'val2'} key2 = 'test_insert_multiget1' columns2 = {'3': 'val1', '4': 'val2'} missing_key = 'key3' self.cf.insert(key1, columns1) self.cf.insert(key2, columns2) rows = self.cf.multiget([key1, key2, missing_key]) assert len(rows) == 2 assert rows[key1] == columns1 assert rows[key2] == columns2 assert missing_key not in rows def test_insert_get_count(self): key = 'TestColumnFamily.test_insert_get_count' columns = {'1': 'val1', '2': 'val2'} self.cf.insert(key, columns) assert self.cf.get_count(key) == 2 def test_insert_get_range(self): keys = ['TestColumnFamily.test_insert_get_range%s' % i for i in xrange(5)] columns = {'1': 'val1', '2': 'val2'} for key in keys: self.cf.insert(key, columns) rows = list(self.cf.get_range(start=keys[0], finish=keys[-1])) assert len(rows) == len(keys) for i, (k, c) in enumerate(rows): assert k == keys[i] assert c == columns def test_remove(self): key = 'TestColumnFamily.test_remove' columns = {'1': 'val1', '2': 'val2'} self.cf.insert(key, columns) self.cf.remove(key, columns=['2']) del columns['2'] assert self.cf.get(key) == {'1': 'val1'} self.cf.remove(key) assert_raises(NotFoundException, self.cf.get, key) def test_dict_class(self): key = 'TestColumnFamily.test_dict_class' self.cf.insert(key, {'1': 'val1'}) assert isinstance(self.cf.get(key), TestDict)
def create_cfs(self): """ Creates the Cassandra Column Families (if not exist) """ sys_mgr = None pool = None try: sys_mgr = SystemManager() pool = ConnectionPool(settings.KEYSPACE, server_list=settings.CASSANDRA_HOSTS) try: cf = ColumnFamily(pool, CF_LOGS) except: logger.info("create_cfs(): Creating column family %s", CF_LOGS) #======================================== # Column key -> CompositeType #======================================== # 1. UUID + Timestamp # 2. Host / Origin # 3. Application # 4. Severiry comparator = CompositeType(TimeUUIDType(), UTF8Type(), UTF8Type(), UTF8Type()) sys_mgr.create_column_family(settings.KEYSPACE, CF_LOGS, comparator_type=comparator) cf = ColumnFamily(pool, CF_LOGS) # cf.get_count(str(uuid.uuid4())) try: cf = ColumnFamily(pool, CF_METADATA) except: logger.info("create_cfs(): Creating column family %s", CF_METADATA) sys_mgr.create_column_family(settings.KEYSPACE, CF_METADATA, comparator_type=UTF8Type()) cf = ColumnFamily(pool, CF_METADATA) cf.get_count(str(uuid.uuid4())) try: cf = ColumnFamily(pool, CF_TIMESTAMP_BITMAP) except: logger.info("create_cfs(): Creating column family %s", CF_TIMESTAMP_BITMAP) sys_mgr.create_column_family(settings.KEYSPACE, CF_TIMESTAMP_BITMAP, comparator_type=IntegerType()) cf = ColumnFamily(pool, CF_TIMESTAMP_BITMAP) try: cf = ColumnFamily(pool, CF_MULTI_MESSAGELOGS) except: logger.info("create_cfs(): Creating column family %s", CF_MULTI_MESSAGELOGS) sys_mgr.create_column_family(settings.KEYSPACE, CF_MULTI_MESSAGELOGS, comparator_type=UTF8Type()) cf = ColumnFamily(pool, CF_MULTI_MESSAGELOGS) sys_mgr.create_index(settings.KEYSPACE, CF_MULTI_MESSAGELOGS, 'meta:host', UTF8_TYPE, index_name='multimsg_host_index') sys_mgr.create_index(settings.KEYSPACE, CF_MULTI_MESSAGELOGS, 'meta:application', UTF8_TYPE, index_name='multimsg_application_index') sys_mgr.create_index(settings.KEYSPACE, CF_MULTI_MESSAGELOGS, 'meta:status', UTF8_TYPE, index_name='multimsg_finish_status_index') finally: if pool: pool.dispose() if sys_mgr: sys_mgr.close()
def create_cfs(self): """ Creates the Cassandra Column Families (if not exist) """ sys_mgr = None pool = None try: sys_mgr = SystemManager() pool = ConnectionPool(settings.KEYSPACE, server_list=settings.CASSANDRA_HOSTS) try: cf = ColumnFamily(pool, CF_LOGS) except: logger.info("create_cfs(): Creating column family %s", CF_LOGS) #======================================== # Column key -> CompositeType #======================================== # 1. UUID + Timestamp # 2. Host / Origin # 3. Application # 4. Severiry comparator = CompositeType( TimeUUIDType(), UTF8Type(), UTF8Type(), UTF8Type() ) sys_mgr.create_column_family(settings.KEYSPACE, CF_LOGS, comparator_type=comparator) cf = ColumnFamily(pool, CF_LOGS) # cf.get_count(str(uuid.uuid4())) try: cf = ColumnFamily(pool, CF_METADATA) except: logger.info("create_cfs(): Creating column family %s", CF_METADATA) sys_mgr.create_column_family(settings.KEYSPACE, CF_METADATA, comparator_type=UTF8Type()) cf = ColumnFamily(pool, CF_METADATA) cf.get_count(str(uuid.uuid4())) try: cf = ColumnFamily(pool, CF_TIMESTAMP_BITMAP) except: logger.info("create_cfs(): Creating column family %s", CF_TIMESTAMP_BITMAP) sys_mgr.create_column_family(settings.KEYSPACE, CF_TIMESTAMP_BITMAP, comparator_type=IntegerType()) cf = ColumnFamily(pool, CF_TIMESTAMP_BITMAP) try: cf = ColumnFamily(pool, CF_MULTI_MESSAGELOGS) except: logger.info("create_cfs(): Creating column family %s", CF_MULTI_MESSAGELOGS) sys_mgr.create_column_family(settings.KEYSPACE, CF_MULTI_MESSAGELOGS, comparator_type=UTF8Type()) cf = ColumnFamily(pool, CF_MULTI_MESSAGELOGS) sys_mgr.create_index(settings.KEYSPACE, CF_MULTI_MESSAGELOGS, 'meta:host', UTF8_TYPE, index_name='multimsg_host_index') sys_mgr.create_index(settings.KEYSPACE, CF_MULTI_MESSAGELOGS, 'meta:application', UTF8_TYPE, index_name='multimsg_application_index') sys_mgr.create_index(settings.KEYSPACE, CF_MULTI_MESSAGELOGS, 'meta:status', UTF8_TYPE, index_name='multimsg_finish_status_index') finally: if pool: pool.dispose() if sys_mgr: sys_mgr.close()
def get_count(self, colfam, key): cf = ColumnFamily(self.db, colfam) return cf.get_count(key)
class TestColumnFamily(unittest.TestCase): def setUp(self): credentials = {'username': '******', 'password': '******'} self.pool = ConnectionPool(keyspace='Keyspace1', credentials=credentials) self.cf = ColumnFamily(self.pool, 'Standard2', dict_class=TestDict) def tearDown(self): for key, columns in self.cf.get_range(): self.cf.remove(key) def test_empty(self): key = 'TestColumnFamily.test_empty' assert_raises(NotFoundException, self.cf.get, key) assert_equal(len(self.cf.multiget([key])), 0) for key, columns in self.cf.get_range(): assert_equal(len(columns), 0) def test_insert_get(self): key = 'TestColumnFamily.test_insert_get' columns = {'1': 'val1', '2': 'val2'} assert_raises(NotFoundException, self.cf.get, key) self.cf.insert(key, columns) assert_equal(self.cf.get(key), columns) def test_insert_multiget(self): key1 = 'TestColumnFamily.test_insert_multiget1' columns1 = {'1': 'val1', '2': 'val2'} key2 = 'test_insert_multiget1' columns2 = {'3': 'val1', '4': 'val2'} missing_key = 'key3' self.cf.insert(key1, columns1) self.cf.insert(key2, columns2) rows = self.cf.multiget([key1, key2, missing_key]) assert_equal(len(rows), 2) assert_equal(rows[key1], columns1) assert_equal(rows[key2], columns2) assert_true(missing_key not in rows) def test_insert_get_count(self): key = 'TestColumnFamily.test_insert_get_count' columns = {'1': 'val1', '2': 'val2'} self.cf.insert(key, columns) assert_equal(self.cf.get_count(key), 2) assert_equal(self.cf.get_count(key, column_start='1'), 2) assert_equal(self.cf.get_count(key, column_finish='2'), 2) assert_equal(self.cf.get_count(key, column_start='1', column_finish='2'), 2) assert_equal(self.cf.get_count(key, column_start='1', column_finish='1'), 1) assert_equal(self.cf.get_count(key, columns=['1','2']), 2) assert_equal(self.cf.get_count(key, columns=['1']), 1) def test_insert_multiget_count(self): keys = ['TestColumnFamily.test_insert_multiget_count1', 'TestColumnFamily.test_insert_multiget_count2', 'TestColumnFamily.test_insert_multiget_count3'] columns = {'1': 'val1', '2': 'val2'} for key in keys: self.cf.insert(key, columns) result = self.cf.multiget_count(keys) assert_equal(result[keys[0]], 2) assert_equal(result[keys[1]], 2) assert_equal(result[keys[2]], 2) result = self.cf.multiget_count(keys, column_start='1') assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, column_finish='2') assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, column_start='1', column_finish='2') assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, column_start='1', column_finish='1') assert_equal(len(result), 3) assert_equal(result[keys[0]], 1) result = self.cf.multiget_count(keys, columns=['1','2']) assert_equal(len(result), 3) assert_equal(result[keys[0]], 2) result = self.cf.multiget_count(keys, columns=['1']) assert_equal(len(result), 3) assert_equal(result[keys[0]], 1) def test_insert_get_range(self): keys = ['TestColumnFamily.test_insert_get_range%s' % i for i in xrange(5)] columns = {'1': 'val1', '2': 'val2'} for key in keys: self.cf.insert(key, columns) rows = list(self.cf.get_range(start=keys[0], finish=keys[-1])) assert_equal(len(rows), len(keys)) for i, (k, c) in enumerate(rows): assert_equal(k, keys[i]) assert_equal(c, columns) def test_get_range_batching(self): self.cf.truncate() keys = [] columns = {'c': 'v'} for i in range(100, 201): keys.append('key%d' % i) self.cf.insert('key%d' % i, columns) for i in range(201, 301): self.cf.insert('key%d' % i, columns) count = 0 for (k,v) in self.cf.get_range(row_count=100, buffer_size=10): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 100) count = 0 for (k,v) in self.cf.get_range(row_count=100, buffer_size=1000): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 100) count = 0 for (k,v) in self.cf.get_range(row_count=100, buffer_size=150): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 100) count = 0 for (k,v) in self.cf.get_range(row_count=100, buffer_size=7): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 100) count = 0 for (k,v) in self.cf.get_range(row_count=100, buffer_size=2): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 100) # Put the remaining keys in our list for i in range(201, 301): keys.append('key%d' % i) count = 0 for (k,v) in self.cf.get_range(row_count=10000, buffer_size=2): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) count = 0 for (k,v) in self.cf.get_range(row_count=10000, buffer_size=7): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) count = 0 for (k,v) in self.cf.get_range(row_count=10000, buffer_size=200): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) count = 0 for (k,v) in self.cf.get_range(row_count=10000, buffer_size=10000): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) # Don't give a row count count = 0 for (k,v) in self.cf.get_range(buffer_size=2): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) count = 0 for (k,v) in self.cf.get_range(buffer_size=77): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) count = 0 for (k,v) in self.cf.get_range(buffer_size=200): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) count = 0 for (k,v) in self.cf.get_range(buffer_size=10000): assert_true(k in keys, 'key "%s" should be in keys' % k) count += 1 assert_equal(count, 201) self.cf.truncate() def insert_insert_get_indexed_slices(self): indexed_cf = ColumnFamily(self.pool, 'Indexed1') columns = {'birthdate': 1L} keys = [] for i in range(1,4): indexed_cf.insert('key%d' % i, columns) keys.append('key%d') expr = index.create_index_expression(column_name='birthdate', value=1L) clause = index.create_index_clause([expr]) count = 0 for key,cols in indexed_cf.get_indexed_slices(clause): assert_equal(cols, columns) assert key in keys count += 1 assert_equal(count, 3) def test_get_indexed_slices_batching(self): indexed_cf = ColumnFamily(self.pool, 'Indexed1') columns = {'birthdate': 1L} for i in range(200): indexed_cf.insert('key%d' % i, columns) expr = index.create_index_expression(column_name='birthdate', value=1L) clause = index.create_index_clause([expr], count=10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=2)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=10)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=77)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=200)) assert_equal(len(result), 10) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=1000)) assert_equal(len(result), 10) clause = index.create_index_clause([expr], count=250) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=2)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=10)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=77)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=200)) assert_equal(len(result), 200) result = list(indexed_cf.get_indexed_slices(clause, buffer_size=1000)) assert_equal(len(result), 200) def test_remove(self): key = 'TestColumnFamily.test_remove' columns = {'1': 'val1', '2': 'val2'} self.cf.insert(key, columns) self.cf.remove(key, columns=['2']) del columns['2'] assert_equal(self.cf.get(key), {'1': 'val1'}) self.cf.remove(key) assert_raises(NotFoundException, self.cf.get, key) def test_dict_class(self): key = 'TestColumnFamily.test_dict_class' self.cf.insert(key, {'1': 'val1'}) assert isinstance(self.cf.get(key), TestDict)