Exemple #1
0
def get_connection():
    """
    Creates a connection to Cassandra.

    Returs:
        pool
    """
    cassandra_host = os.environ.get('CASSANDRA_HOST', 'localhost')
    sys_mgr = SystemManager()
    try:
        sys_mgr.describe_ring(KEYSPACE)
    except:
        sys_mgr.create_keyspace(KEYSPACE, SIMPLE_STRATEGY, {'replication_factor': '1'})

    pool = ConnectionPool(KEYSPACE, server_list=[cassandra_host])
    for cf_name in [CF_LOGS, CF_LOGS_BY_APP, CF_LOGS_BY_HOST, CF_LOGS_BY_SEVERITY]:
        try:
            cf = ColumnFamily(pool, cf_name)
        except:
            sys_mgr.create_column_family(KEYSPACE, cf_name, comparator_type=TimeUUIDType())
            cf = ColumnFamily(pool, cf_name)
            cf.get_count(str(uuid.uuid4()))

    sys_mgr.close()

    return pool
Exemple #2
0
 def create_cfs(self):
     """
     Creates the Cassandra Column Families (if not exist)
     """
     sys_mgr = None
     pool = None
     try:
         sys_mgr = SystemManager()
         pool = ConnectionPool(settings.KEYSPACE,
                               server_list=settings.CASSANDRA_HOSTS)
         for cf_name in [
                 CF_LOGS, CF_LOGS_BY_APP, CF_LOGS_BY_HOST,
                 CF_LOGS_BY_SEVERITY
         ]:
             try:
                 cf = ColumnFamily(pool, cf_name)
             except:
                 logger.info("create_cfs(): Creating column family %s",
                             cf_name)
                 sys_mgr.create_column_family(
                     settings.KEYSPACE,
                     cf_name,
                     comparator_type=TimeUUIDType())
                 cf = ColumnFamily(pool, cf_name)
                 cf.get_count(str(uuid.uuid4()))
     finally:
         if pool:
             pool.dispose()
         if sys_mgr:
             sys_mgr.close()
Exemple #3
0
 def create_cfs(self):
     """
     Creates the Cassandra Column Families (if not exist)
     """
     sys_mgr = None
     pool = None
     try:
         sys_mgr = SystemManager()
         pool = ConnectionPool(settings.KEYSPACE, server_list=settings.CASSANDRA_HOSTS)
         for cf_name in [CF_LOGS, CF_LOGS_BY_APP, CF_LOGS_BY_HOST, CF_LOGS_BY_SEVERITY]:
             try:
                 cf = ColumnFamily(pool, cf_name)
             except:
                 logger.info("create_cfs(): Creating column family %s", cf_name)
                 sys_mgr.create_column_family(settings.KEYSPACE, cf_name, comparator_type=TimeUUIDType())
                 cf = ColumnFamily(pool, cf_name)
                 cf.get_count(str(uuid.uuid4()))
     finally:
         if pool:
             pool.dispose()
         if sys_mgr:
             sys_mgr.close()
Exemple #4
0
class Dictionary:
    """
    Nhóm chức năng từ điển:
        * Tra từ Anh-Việt
        * Tra từ Việt-Anh
    """
    def __init__(self):
        # Connect to Cassandra servers
        client = connect(cassandra_hosts)
        self.d = ColumnFamily(client, cassandra_keyspace, 'Dictionary', super=True)
        self.u = ColumnFamily(client, cassandra_keyspace, 'Users', super=True)
        self.e = Error()
    
    def _lookup(self, keyword, dict_type='en_vi'):
        try:
            return self.d.get(dict_type, super_column=str(keyword))
        except (NotFoundException, InvalidRequestException):
            return None
    
    def lookup(self, environ):
        try:
            session_id = environ['request']['session_id']
        except KeyError:
            return self.e.authen_error("Thiếu session_id")
        try:
            self.u.get('session_id', super_column=session_id)
        except (NotFoundException, InvalidRequestException):
            return self.e.authen_error()
        result = self._lookup(environ['request']['keyword'])
        result2 = self._lookup(environ['request']['keyword'], 'vi_en')
        result3 = self._lookup(environ['request']['keyword'], 'en_en')
        if (result is None) and (result2 is None) and (result3 is None):
            return self.e.not_found("Từ khóa bạn tìm không có trong từ điển")
        xml = []
        if result is not None:
            xml.append('<result type="en_vi" keyword="%s" mean="%s" spell="%s" status_code="200"/>' \
                % (xml_format(environ['request']['keyword']), 
                   xml_format(result['nghia']), 
                   xml_format(result['phien_am_quoc_te'])))
        if result2 is not None:
            xml.append('<result type="vi_en" keyword="%s" mean="%s" spell="" status_code="200"/>' \
                % (xml_format(environ['request']['keyword']), 
                   xml_format(result2['nghia'])))
        return '\n\n'.join(xml)

    def total_words(self, dict_type='en_vi'):
        return self.d.get_count(dict_type)
Exemple #5
0
class TestColumnFamily:
    def setUp(self):
        credentials = {'username': '******', 'password': '******'}
        self.client = connect('Keyspace1', credentials=credentials)
        self.cf = ColumnFamily(self.client,
                               'Standard2',
                               write_consistency_level=ConsistencyLevel.ONE,
                               buffer_size=2,
                               timestamp=self.timestamp,
                               dict_class=TestDict)
        try:
            self.timestamp_n = int(self.cf.get('meta')['timestamp'])
        except NotFoundException:
            self.timestamp_n = 0
        self.clear()

    def tearDown(self):
        self.cf.insert('meta', {'timestamp': str(self.timestamp_n)})

    # Since the timestamp passed to Cassandra will be in the same second
    # with the default timestamp function, causing problems with removing
    # and inserting (Cassandra doesn't know which is later), we supply our own
    def timestamp(self):
        self.timestamp_n += 1
        return self.timestamp_n

    def clear(self):
        for key, columns in self.cf.get_range(include_timestamp=True):
            for value, timestamp in columns.itervalues():
                self.timestamp_n = max(self.timestamp_n, timestamp)
            self.cf.remove(key)

    def test_empty(self):
        key = 'TestColumnFamily.test_empty'
        assert_raises(NotFoundException, self.cf.get, key)
        assert len(self.cf.multiget([key])) == 0
        for key, columns in self.cf.get_range():
            assert len(columns) == 0

    def test_insert_get(self):
        key = 'TestColumnFamily.test_insert_get'
        columns = {'1': 'val1', '2': 'val2'}
        assert_raises(NotFoundException, self.cf.get, key)
        self.cf.insert(key, columns)
        assert self.cf.get(key) == columns

    def test_insert_multiget(self):
        key1 = 'TestColumnFamily.test_insert_multiget1'
        columns1 = {'1': 'val1', '2': 'val2'}
        key2 = 'test_insert_multiget1'
        columns2 = {'3': 'val1', '4': 'val2'}
        missing_key = 'key3'

        self.cf.insert(key1, columns1)
        self.cf.insert(key2, columns2)
        rows = self.cf.multiget([key1, key2, missing_key])
        assert len(rows) == 2
        assert rows[key1] == columns1
        assert rows[key2] == columns2
        assert missing_key not in rows

    def test_insert_get_count(self):
        key = 'TestColumnFamily.test_insert_get_count'
        columns = {'1': 'val1', '2': 'val2'}
        self.cf.insert(key, columns)
        assert self.cf.get_count(key) == 2

        assert_equal(self.cf.get_count(key, column_start='1'), 2)
        assert_equal(self.cf.get_count(key, column_finish='2'), 2)
        assert_equal(
            self.cf.get_count(key, column_start='1', column_finish='2'), 2)
        assert_equal(
            self.cf.get_count(key, column_start='1', column_finish='1'), 1)
        assert_equal(self.cf.get_count(key, columns=['1', '2']), 2)
        assert_equal(self.cf.get_count(key, columns=['1']), 1)

    def test_insert_multiget_count(self):
        keys = [
            'TestColumnFamily.test_insert_multiget_count1',
            'TestColumnFamily.test_insert_multiget_count2',
            'TestColumnFamily.test_insert_multiget_count3'
        ]
        columns = {'1': 'val1', '2': 'val2'}
        for key in keys:
            self.cf.insert(key, columns)
        result = self.cf.multiget_count(keys)
        assert_equal(result[keys[0]], 2)
        assert_equal(result[keys[1]], 2)
        assert_equal(result[keys[2]], 2)

        result = self.cf.multiget_count(keys, column_start='1')
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 2)

        result = self.cf.multiget_count(keys, column_finish='2')
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 2)

        result = self.cf.multiget_count(keys,
                                        column_start='1',
                                        column_finish='2')
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 2)

        result = self.cf.multiget_count(keys,
                                        column_start='1',
                                        column_finish='1')
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 1)

        result = self.cf.multiget_count(keys, columns=['1', '2'])
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 2)

        result = self.cf.multiget_count(keys, columns=['1'])
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 1)

    def test_insert_get_range(self):
        keys = [
            'TestColumnFamily.test_insert_get_range%s' % i for i in xrange(5)
        ]
        columns = {'1': 'val1', '2': 'val2'}
        for key in keys:
            self.cf.insert(key, columns)

        rows = list(self.cf.get_range(start=keys[0], finish=keys[-1]))
        assert len(rows) == len(keys)
        for i, (k, c) in enumerate(rows):
            assert k == keys[i]
            assert c == columns

    def test_insert_get_indexed_slices(self):
        indexed_cf = ColumnFamily(self.client, 'Indexed1')

        columns = {'birthdate': 1L}

        key = 'key1'
        indexed_cf.insert(key,
                          columns,
                          write_consistency_level=ConsistencyLevel.ONE)

        key = 'key2'
        indexed_cf.insert(key,
                          columns,
                          write_consistency_level=ConsistencyLevel.ONE)

        key = 'key3'
        indexed_cf.insert(key,
                          columns,
                          write_consistency_level=ConsistencyLevel.ONE)

        expr = index.create_index_expression(column_name='birthdate', value=1L)
        clause = index.create_index_clause([expr])
        result = indexed_cf.get_indexed_slices(clause)
        assert len(result) == 3
        assert result.get('key1') == columns
        assert result.get('key2') == columns
        assert result.get('key3') == columns

    def test_remove(self):
        key = 'TestColumnFamily.test_remove'
        columns = {'1': 'val1', '2': 'val2'}
        self.cf.insert(key, columns)

        self.cf.remove(key, columns=['2'])
        del columns['2']
        assert self.cf.get(key) == {'1': 'val1'}

        self.cf.remove(key)
        assert_raises(NotFoundException, self.cf.get, key)

    def test_dict_class(self):
        key = 'TestColumnFamily.test_dict_class'
        self.cf.insert(key, {'1': 'val1'})
        assert isinstance(self.cf.get(key), TestDict)
Exemple #6
0
class TestColumnFamily:
    def setUp(self):
        credentials = {'username': '******', 'password': '******'}
        self.client = connect('Keyspace1', credentials=credentials)
        self.cf = ColumnFamily(self.client, 'Standard2',
                               write_consistency_level=ConsistencyLevel.ONE,
                               buffer_size=2, timestamp=self.timestamp,
                               dict_class=TestDict)
        try:
            self.timestamp_n = int(self.cf.get('meta')['timestamp'])
        except NotFoundException:
            self.timestamp_n = 0
        self.clear()

    def tearDown(self):
        self.cf.insert('meta', {'timestamp': str(self.timestamp_n)})

    # Since the timestamp passed to Cassandra will be in the same second
    # with the default timestamp function, causing problems with removing
    # and inserting (Cassandra doesn't know which is later), we supply our own
    def timestamp(self):
        self.timestamp_n += 1
        return self.timestamp_n

    def clear(self):
        for key, columns in self.cf.get_range(include_timestamp=True):
            for value, timestamp in columns.itervalues():
                self.timestamp_n = max(self.timestamp_n, timestamp)
            self.cf.remove(key)

    def test_empty(self):
        key = 'TestColumnFamily.test_empty'
        assert_raises(NotFoundException, self.cf.get, key)
        assert len(self.cf.multiget([key])) == 0
        for key, columns in self.cf.get_range():
            assert len(columns) == 0

    def test_insert_get(self):
        key = 'TestColumnFamily.test_insert_get'
        columns = {'1': 'val1', '2': 'val2'}
        assert_raises(NotFoundException, self.cf.get, key)
        self.cf.insert(key, columns)
        assert self.cf.get(key) == columns

    def test_insert_multiget(self):
        key1 = 'TestColumnFamily.test_insert_multiget1'
        columns1 = {'1': 'val1', '2': 'val2'}
        key2 = 'test_insert_multiget1'
        columns2 = {'3': 'val1', '4': 'val2'}
        missing_key = 'key3'

        self.cf.insert(key1, columns1)
        self.cf.insert(key2, columns2)
        rows = self.cf.multiget([key1, key2, missing_key])
        assert len(rows) == 2
        assert rows[key1] == columns1
        assert rows[key2] == columns2
        assert missing_key not in rows

    def test_insert_get_count(self):
        key = 'TestColumnFamily.test_insert_get_count'
        columns = {'1': 'val1', '2': 'val2'}
        self.cf.insert(key, columns)
        assert self.cf.get_count(key) == 2

        assert_equal(self.cf.get_count(key, column_start='1'), 2)
        assert_equal(self.cf.get_count(key, column_finish='2'), 2)
        assert_equal(self.cf.get_count(key, column_start='1', column_finish='2'), 2)
        assert_equal(self.cf.get_count(key, column_start='1', column_finish='1'), 1)
        assert_equal(self.cf.get_count(key, columns=['1','2']), 2)
        assert_equal(self.cf.get_count(key, columns=['1']), 1)

    def test_insert_multiget_count(self):
        keys = ['TestColumnFamily.test_insert_multiget_count1',
               'TestColumnFamily.test_insert_multiget_count2',
               'TestColumnFamily.test_insert_multiget_count3']
        columns = {'1': 'val1', '2': 'val2'}
        for key in keys:
            self.cf.insert(key, columns)
        result = self.cf.multiget_count(keys)
        assert_equal(result[keys[0]], 2)
        assert_equal(result[keys[1]], 2)
        assert_equal(result[keys[2]], 2)

        result = self.cf.multiget_count(keys, column_start='1')
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 2)

        result = self.cf.multiget_count(keys, column_finish='2')
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 2)

        result = self.cf.multiget_count(keys, column_start='1', column_finish='2')
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 2)

        result = self.cf.multiget_count(keys, column_start='1', column_finish='1')
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 1)

        result = self.cf.multiget_count(keys, columns=['1','2'])
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 2)

        result = self.cf.multiget_count(keys, columns=['1'])
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 1)

    def test_insert_get_range(self):
        keys = ['TestColumnFamily.test_insert_get_range%s' % i for i in xrange(5)]
        columns = {'1': 'val1', '2': 'val2'}
        for key in keys:
            self.cf.insert(key, columns)

        rows = list(self.cf.get_range(start=keys[0], finish=keys[-1]))
        assert len(rows) == len(keys)
        for i, (k, c) in enumerate(rows):
            assert k == keys[i]
            assert c == columns

    def test_get_range_batching(self):
        self.cf.truncate()

        keys = []
        columns = {'c': 'v'}
        for i in range(100, 201):
            keys.append('key%d' % i)
            self.cf.insert('key%d' % i, columns)

        for i in range(201, 301):
            self.cf.insert('key%d' % i, columns)

        count = 0
        for (k,v) in self.cf.get_range(row_count=100, buffer_size=10):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 100)

        count = 0
        for (k,v) in self.cf.get_range(row_count=100, buffer_size=1000):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 100)

        count = 0
        for (k,v) in self.cf.get_range(row_count=100, buffer_size=150):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 100)

        count = 0
        for (k,v) in self.cf.get_range(row_count=100, buffer_size=7):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 100)

        count = 0
        for (k,v) in self.cf.get_range(row_count=100, buffer_size=2):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 100)

        # Put the remaining keys in our list
        for i in range(201, 301):
            keys.append('key%d' % i)

        count = 0
        for (k,v) in self.cf.get_range(row_count=10000, buffer_size=2):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        count = 0
        for (k,v) in self.cf.get_range(row_count=10000, buffer_size=7):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        count = 0
        for (k,v) in self.cf.get_range(row_count=10000, buffer_size=200):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        count = 0
        for (k,v) in self.cf.get_range(row_count=10000, buffer_size=10000):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        # Don't give a row count
        count = 0
        for (k,v) in self.cf.get_range(buffer_size=2):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        count = 0
        for (k,v) in self.cf.get_range(buffer_size=77):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        count = 0
        for (k,v) in self.cf.get_range(buffer_size=200):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        count = 0
        for (k,v) in self.cf.get_range(buffer_size=10000):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        self.cf.truncate()

    def insert_insert_get_indexed_slices(self):
        indexed_cf = ColumnFamily(self.client, 'Indexed1')

        columns = {'birthdate': 1L}

        keys = []
        for i in range(1,4):
            indexed_cf.insert('key%d' % i, columns)
            keys.append('key%d')

        expr = index.create_index_expression(column_name='birthdate', value=1L)
        clause = index.create_index_clause([expr])

        count = 0
        for key,cols in indexed_cf.get_indexed_slices(clause):
            assert cols == columns
            assert key in keys
            count += 1
        assert_equal(count, 3)

    def test_get_indexed_slices_batching(self):
        indexed_cf = ColumnFamily(self.client, 'Indexed1')

        columns = {'birthdate': 1L}

        for i in range(200):
            indexed_cf.insert('key%d' % i, columns)

        expr = index.create_index_expression(column_name='birthdate', value=1L)
        clause = index.create_index_clause([expr], count=10)

        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=2))
        assert_equal(len(result), 10)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=10))
        assert_equal(len(result), 10)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=77))
        assert_equal(len(result), 10)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=200))
        assert_equal(len(result), 10)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=1000))
        assert_equal(len(result), 10)

        clause = index.create_index_clause([expr], count=250)

        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=2))
        assert_equal(len(result), 200)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=10))
        assert_equal(len(result), 200)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=77))
        assert_equal(len(result), 200)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=200))
        assert_equal(len(result), 200)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=1000))
        assert_equal(len(result), 200)

    def test_remove(self):
        key = 'TestColumnFamily.test_remove'
        columns = {'1': 'val1', '2': 'val2'}
        self.cf.insert(key, columns)

        self.cf.remove(key, columns=['2'])
        del columns['2']
        assert self.cf.get(key) == {'1': 'val1'}

        self.cf.remove(key)
        assert_raises(NotFoundException, self.cf.get, key)

    def test_dict_class(self):
        key = 'TestColumnFamily.test_dict_class'
        self.cf.insert(key, {'1': 'val1'})
        assert isinstance(self.cf.get(key), TestDict)
class TestColumnFamily:
    def setUp(self):
        self.client = connect()
        self.client.login('Keyspace1', {'username': '******', 'password': '******'})
        self.cf = ColumnFamily(self.client, 'Keyspace1', 'Standard2',
                               write_consistency_level=ConsistencyLevel.ONE,
                               buffer_size=2, timestamp=self.timestamp,
                               dict_class=TestDict)
        try:
            self.timestamp_n = int(self.cf.get('meta')['timestamp'])
        except NotFoundException:
            self.timestamp_n = 0
        self.clear()

    def tearDown(self):
        self.cf.insert('meta', {'timestamp': str(self.timestamp_n)})

    # Since the timestamp passed to Cassandra will be in the same second
    # with the default timestamp function, causing problems with removing
    # and inserting (Cassandra doesn't know which is later), we supply our own
    def timestamp(self):
        self.timestamp_n += 1
        return self.timestamp_n

    def clear(self):
        for key, columns in self.cf.get_range(include_timestamp=True):
            for value, timestamp in columns.itervalues():
                self.timestamp_n = max(self.timestamp_n, timestamp)
            self.cf.remove(key)

    def test_empty(self):
        key = 'TestColumnFamily.test_empty'
        assert_raises(NotFoundException, self.cf.get, key)
        assert len(self.cf.multiget([key])) == 0
        for key, columns in self.cf.get_range():
            assert len(columns) == 0

    def test_insert_get(self):
        key = 'TestColumnFamily.test_insert_get'
        columns = {'1': 'val1', '2': 'val2'}
        assert_raises(NotFoundException, self.cf.get, key)
        self.cf.insert(key, columns)
        assert self.cf.get(key) == columns

    def test_insert_multiget(self):
        key1 = 'TestColumnFamily.test_insert_multiget1'
        columns1 = {'1': 'val1', '2': 'val2'}
        key2 = 'test_insert_multiget1'
        columns2 = {'3': 'val1', '4': 'val2'}
        missing_key = 'key3'

        self.cf.insert(key1, columns1)
        self.cf.insert(key2, columns2)
        rows = self.cf.multiget([key1, key2, missing_key])
        assert len(rows) == 2
        assert rows[key1] == columns1
        assert rows[key2] == columns2
        assert missing_key not in rows

    def test_insert_get_count(self):
        key = 'TestColumnFamily.test_insert_get_count'
        columns = {'1': 'val1', '2': 'val2'}
        self.cf.insert(key, columns)
        assert self.cf.get_count(key) == 2

    def test_insert_get_range(self):
        keys = ['TestColumnFamily.test_insert_get_range%s' % i for i in xrange(5)]
        columns = {'1': 'val1', '2': 'val2'}
        for key in keys:
            self.cf.insert(key, columns)

        rows = list(self.cf.get_range(start=keys[0], finish=keys[-1]))
        assert len(rows) == len(keys)
        for i, (k, c) in enumerate(rows):
            assert k == keys[i]
            assert c == columns

    def test_remove(self):
        key = 'TestColumnFamily.test_remove'
        columns = {'1': 'val1', '2': 'val2'}
        self.cf.insert(key, columns)

        self.cf.remove(key, columns=['2'])
        del columns['2']
        assert self.cf.get(key) == {'1': 'val1'}

        self.cf.remove(key)
        assert_raises(NotFoundException, self.cf.get, key)

    def test_dict_class(self):
        key = 'TestColumnFamily.test_dict_class'
        self.cf.insert(key, {'1': 'val1'})
        assert isinstance(self.cf.get(key), TestDict)
Exemple #8
0
    def create_cfs(self):
        """
        Creates the Cassandra Column Families (if not exist)
        """
        sys_mgr = None
        pool = None
        try:
            sys_mgr = SystemManager()
            pool = ConnectionPool(settings.KEYSPACE,
                                  server_list=settings.CASSANDRA_HOSTS)

            try:
                cf = ColumnFamily(pool, CF_LOGS)
            except:
                logger.info("create_cfs(): Creating column family %s", CF_LOGS)
                #========================================
                # Column key -> CompositeType
                #========================================
                # 1. UUID + Timestamp
                # 2. Host / Origin
                # 3. Application
                # 4. Severiry
                comparator = CompositeType(TimeUUIDType(), UTF8Type(),
                                           UTF8Type(), UTF8Type())
                sys_mgr.create_column_family(settings.KEYSPACE,
                                             CF_LOGS,
                                             comparator_type=comparator)
                cf = ColumnFamily(pool, CF_LOGS)
                # cf.get_count(str(uuid.uuid4()))

            try:
                cf = ColumnFamily(pool, CF_METADATA)
            except:
                logger.info("create_cfs(): Creating column family %s",
                            CF_METADATA)
                sys_mgr.create_column_family(settings.KEYSPACE,
                                             CF_METADATA,
                                             comparator_type=UTF8Type())
                cf = ColumnFamily(pool, CF_METADATA)
                cf.get_count(str(uuid.uuid4()))

            try:
                cf = ColumnFamily(pool, CF_TIMESTAMP_BITMAP)
            except:
                logger.info("create_cfs(): Creating column family %s",
                            CF_TIMESTAMP_BITMAP)
                sys_mgr.create_column_family(settings.KEYSPACE,
                                             CF_TIMESTAMP_BITMAP,
                                             comparator_type=IntegerType())
                cf = ColumnFamily(pool, CF_TIMESTAMP_BITMAP)

            try:
                cf = ColumnFamily(pool, CF_MULTI_MESSAGELOGS)
            except:
                logger.info("create_cfs(): Creating column family %s",
                            CF_MULTI_MESSAGELOGS)
                sys_mgr.create_column_family(settings.KEYSPACE,
                                             CF_MULTI_MESSAGELOGS,
                                             comparator_type=UTF8Type())
                cf = ColumnFamily(pool, CF_MULTI_MESSAGELOGS)

                sys_mgr.create_index(settings.KEYSPACE,
                                     CF_MULTI_MESSAGELOGS,
                                     'meta:host',
                                     UTF8_TYPE,
                                     index_name='multimsg_host_index')
                sys_mgr.create_index(settings.KEYSPACE,
                                     CF_MULTI_MESSAGELOGS,
                                     'meta:application',
                                     UTF8_TYPE,
                                     index_name='multimsg_application_index')
                sys_mgr.create_index(settings.KEYSPACE,
                                     CF_MULTI_MESSAGELOGS,
                                     'meta:status',
                                     UTF8_TYPE,
                                     index_name='multimsg_finish_status_index')

        finally:
            if pool:
                pool.dispose()
            if sys_mgr:
                sys_mgr.close()
Exemple #9
0
    def create_cfs(self):
        """
        Creates the Cassandra Column Families (if not exist)
        """
        sys_mgr = None
        pool = None
        try:
            sys_mgr = SystemManager()
            pool = ConnectionPool(settings.KEYSPACE, server_list=settings.CASSANDRA_HOSTS)

            try:
                cf = ColumnFamily(pool, CF_LOGS)
            except:
                logger.info("create_cfs(): Creating column family %s", CF_LOGS)
                #========================================
                # Column key -> CompositeType
                #========================================
                # 1. UUID + Timestamp
                # 2. Host / Origin
                # 3. Application
                # 4. Severiry
                comparator = CompositeType(
                    TimeUUIDType(),
                    UTF8Type(),
                    UTF8Type(),
                    UTF8Type()
                )
                sys_mgr.create_column_family(settings.KEYSPACE,
                    CF_LOGS, comparator_type=comparator)
                cf = ColumnFamily(pool, CF_LOGS)
                # cf.get_count(str(uuid.uuid4()))

            try:
                cf = ColumnFamily(pool, CF_METADATA)
            except:
                logger.info("create_cfs(): Creating column family %s", CF_METADATA)
                sys_mgr.create_column_family(settings.KEYSPACE,
                    CF_METADATA, comparator_type=UTF8Type())
                cf = ColumnFamily(pool, CF_METADATA)
                cf.get_count(str(uuid.uuid4()))

            try:
                cf = ColumnFamily(pool, CF_TIMESTAMP_BITMAP)
            except:
                logger.info("create_cfs(): Creating column family %s", CF_TIMESTAMP_BITMAP)
                sys_mgr.create_column_family(settings.KEYSPACE,
                    CF_TIMESTAMP_BITMAP, comparator_type=IntegerType())
                cf = ColumnFamily(pool, CF_TIMESTAMP_BITMAP)

            try:
                cf = ColumnFamily(pool, CF_MULTI_MESSAGELOGS)
            except:
                logger.info("create_cfs(): Creating column family %s", CF_MULTI_MESSAGELOGS)
                sys_mgr.create_column_family(settings.KEYSPACE,
                    CF_MULTI_MESSAGELOGS, comparator_type=UTF8Type())
                cf = ColumnFamily(pool, CF_MULTI_MESSAGELOGS)

                sys_mgr.create_index(settings.KEYSPACE, CF_MULTI_MESSAGELOGS,
                    'meta:host', UTF8_TYPE, index_name='multimsg_host_index')
                sys_mgr.create_index(settings.KEYSPACE, CF_MULTI_MESSAGELOGS,
                    'meta:application', UTF8_TYPE, index_name='multimsg_application_index')
                sys_mgr.create_index(settings.KEYSPACE, CF_MULTI_MESSAGELOGS,
                    'meta:status', UTF8_TYPE, index_name='multimsg_finish_status_index')

        finally:
            if pool:
                pool.dispose()
            if sys_mgr:
                sys_mgr.close()
Exemple #10
0
 def get_count(self, colfam, key):
     cf = ColumnFamily(self.db, colfam)
     return cf.get_count(key)
Exemple #11
0
class TestColumnFamily(unittest.TestCase):

    def setUp(self):
        credentials = {'username': '******', 'password': '******'}
        self.pool = ConnectionPool(keyspace='Keyspace1', credentials=credentials)
        self.cf = ColumnFamily(self.pool, 'Standard2', dict_class=TestDict)

    def tearDown(self):
        for key, columns in self.cf.get_range():
            self.cf.remove(key)

    def test_empty(self):
        key = 'TestColumnFamily.test_empty'
        assert_raises(NotFoundException, self.cf.get, key)
        assert_equal(len(self.cf.multiget([key])), 0)
        for key, columns in self.cf.get_range():
            assert_equal(len(columns), 0)

    def test_insert_get(self):
        key = 'TestColumnFamily.test_insert_get'
        columns = {'1': 'val1', '2': 'val2'}
        assert_raises(NotFoundException, self.cf.get, key)
        self.cf.insert(key, columns)
        assert_equal(self.cf.get(key), columns)

    def test_insert_multiget(self):
        key1 = 'TestColumnFamily.test_insert_multiget1'
        columns1 = {'1': 'val1', '2': 'val2'}
        key2 = 'test_insert_multiget1'
        columns2 = {'3': 'val1', '4': 'val2'}
        missing_key = 'key3'

        self.cf.insert(key1, columns1)
        self.cf.insert(key2, columns2)
        rows = self.cf.multiget([key1, key2, missing_key])
        assert_equal(len(rows), 2)
        assert_equal(rows[key1], columns1)
        assert_equal(rows[key2], columns2)
        assert_true(missing_key not in rows)

    def test_insert_get_count(self):
        key = 'TestColumnFamily.test_insert_get_count'
        columns = {'1': 'val1', '2': 'val2'}
        self.cf.insert(key, columns)
        assert_equal(self.cf.get_count(key), 2)

        assert_equal(self.cf.get_count(key, column_start='1'), 2)
        assert_equal(self.cf.get_count(key, column_finish='2'), 2)
        assert_equal(self.cf.get_count(key, column_start='1', column_finish='2'), 2)
        assert_equal(self.cf.get_count(key, column_start='1', column_finish='1'), 1)
        assert_equal(self.cf.get_count(key, columns=['1','2']), 2)
        assert_equal(self.cf.get_count(key, columns=['1']), 1)

    def test_insert_multiget_count(self):
        keys = ['TestColumnFamily.test_insert_multiget_count1',
               'TestColumnFamily.test_insert_multiget_count2',
               'TestColumnFamily.test_insert_multiget_count3']
        columns = {'1': 'val1', '2': 'val2'}
        for key in keys:
            self.cf.insert(key, columns)
        result = self.cf.multiget_count(keys)
        assert_equal(result[keys[0]], 2)
        assert_equal(result[keys[1]], 2)
        assert_equal(result[keys[2]], 2)

        result = self.cf.multiget_count(keys, column_start='1')
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 2)

        result = self.cf.multiget_count(keys, column_finish='2')
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 2)

        result = self.cf.multiget_count(keys, column_start='1', column_finish='2')
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 2)

        result = self.cf.multiget_count(keys, column_start='1', column_finish='1')
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 1)

        result = self.cf.multiget_count(keys, columns=['1','2'])
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 2)

        result = self.cf.multiget_count(keys, columns=['1'])
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 1)

    def test_insert_get_range(self):
        keys = ['TestColumnFamily.test_insert_get_range%s' % i for i in xrange(5)]
        columns = {'1': 'val1', '2': 'val2'}
        for key in keys:
            self.cf.insert(key, columns)

        rows = list(self.cf.get_range(start=keys[0], finish=keys[-1]))
        assert_equal(len(rows), len(keys))
        for i, (k, c) in enumerate(rows):
            assert_equal(k, keys[i])
            assert_equal(c, columns)

    def test_get_range_batching(self):
        self.cf.truncate()

        keys = []
        columns = {'c': 'v'}
        for i in range(100, 201):
            keys.append('key%d' % i)
            self.cf.insert('key%d' % i, columns)

        for i in range(201, 301):
            self.cf.insert('key%d' % i, columns)

        count = 0
        for (k,v) in self.cf.get_range(row_count=100, buffer_size=10):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 100)

        count = 0
        for (k,v) in self.cf.get_range(row_count=100, buffer_size=1000):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 100)

        count = 0
        for (k,v) in self.cf.get_range(row_count=100, buffer_size=150):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 100)

        count = 0
        for (k,v) in self.cf.get_range(row_count=100, buffer_size=7):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 100)

        count = 0
        for (k,v) in self.cf.get_range(row_count=100, buffer_size=2):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 100)

        # Put the remaining keys in our list
        for i in range(201, 301):
            keys.append('key%d' % i)

        count = 0
        for (k,v) in self.cf.get_range(row_count=10000, buffer_size=2):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        count = 0
        for (k,v) in self.cf.get_range(row_count=10000, buffer_size=7):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        count = 0
        for (k,v) in self.cf.get_range(row_count=10000, buffer_size=200):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        count = 0
        for (k,v) in self.cf.get_range(row_count=10000, buffer_size=10000):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        # Don't give a row count
        count = 0
        for (k,v) in self.cf.get_range(buffer_size=2):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        count = 0
        for (k,v) in self.cf.get_range(buffer_size=77):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        count = 0
        for (k,v) in self.cf.get_range(buffer_size=200):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        count = 0
        for (k,v) in self.cf.get_range(buffer_size=10000):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        self.cf.truncate()

    def insert_insert_get_indexed_slices(self):
        indexed_cf = ColumnFamily(self.pool, 'Indexed1')

        columns = {'birthdate': 1L}

        keys = []
        for i in range(1,4):
            indexed_cf.insert('key%d' % i, columns)
            keys.append('key%d')

        expr = index.create_index_expression(column_name='birthdate', value=1L)
        clause = index.create_index_clause([expr])

        count = 0
        for key,cols in indexed_cf.get_indexed_slices(clause):
            assert_equal(cols, columns)
            assert key in keys
            count += 1
        assert_equal(count, 3)

    def test_get_indexed_slices_batching(self):
        indexed_cf = ColumnFamily(self.pool, 'Indexed1')

        columns = {'birthdate': 1L}

        for i in range(200):
            indexed_cf.insert('key%d' % i, columns)

        expr = index.create_index_expression(column_name='birthdate', value=1L)
        clause = index.create_index_clause([expr], count=10)

        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=2))
        assert_equal(len(result), 10)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=10))
        assert_equal(len(result), 10)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=77))
        assert_equal(len(result), 10)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=200))
        assert_equal(len(result), 10)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=1000))
        assert_equal(len(result), 10)

        clause = index.create_index_clause([expr], count=250)

        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=2))
        assert_equal(len(result), 200)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=10))
        assert_equal(len(result), 200)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=77))
        assert_equal(len(result), 200)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=200))
        assert_equal(len(result), 200)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=1000))
        assert_equal(len(result), 200)

    def test_remove(self):
        key = 'TestColumnFamily.test_remove'
        columns = {'1': 'val1', '2': 'val2'}
        self.cf.insert(key, columns)

        self.cf.remove(key, columns=['2'])
        del columns['2']
        assert_equal(self.cf.get(key), {'1': 'val1'})

        self.cf.remove(key)
        assert_raises(NotFoundException, self.cf.get, key)

    def test_dict_class(self):
        key = 'TestColumnFamily.test_dict_class'
        self.cf.insert(key, {'1': 'val1'})
        assert isinstance(self.cf.get(key), TestDict)