コード例 #1
0
ファイル: test_columnfamily.py プロジェクト: trhowe/pycassa
class TestSuperColumnFamily:
    def setUp(self):
        credentials = {'username': '******', 'password': '******'}
        self.pool = ConnectionPool(keyspace='Keyspace1', credentials=credentials)
        self.cf = ColumnFamily(self.pool, 'Super2')

    def tearDown(self):
        for key, columns in self.cf.get_range():
            self.cf.remove(key)

    def test_super(self):
        key = 'TestSuperColumnFamily.test_super'
        columns = {'1': {'sub1': 'val1', 'sub2': 'val2'}, '2': {'sub3': 'val3', 'sub4': 'val4'}}
        assert_raises(NotFoundException, self.cf.get, key)
        self.cf.insert(key, columns)
        assert_equal(self.cf.get(key), columns)
        assert_equal(self.cf.multiget([key]), {key: columns})
        assert_equal(list(self.cf.get_range(start=key, finish=key)), [(key, columns)])

    def test_super_column_argument(self):
        key = 'TestSuperColumnFamily.test_super_columns_argument'
        sub12 = {'sub1': 'val1', 'sub2': 'val2'}
        sub34 = {'sub3': 'val3', 'sub4': 'val4'}
        columns = {'1': sub12, '2': sub34}
        self.cf.insert(key, columns)
        assert_equal(self.cf.get(key, super_column='1'), sub12)
        assert_raises(NotFoundException, self.cf.get, key, super_column='3')
        assert_equal(self.cf.multiget([key], super_column='1'), {key: sub12})
        assert_equal(list(self.cf.get_range(start=key, finish=key, super_column='1')), [(key, sub12)])
コード例 #2
0
ファイル: test_columnfamily.py プロジェクト: dln/pycassa
class TestSuperColumnFamily:
    def setUp(self):
        credentials = {'username': '******', 'password': '******'}
        self.client = connect_thread_local('Keyspace1', credentials=credentials)
        self.cf = ColumnFamily(self.client, 'Super2',
                               write_consistency_level=ConsistencyLevel.ONE,
                               buffer_size=2, timestamp=self.timestamp,
                               super=True)

        try:
            self.timestamp_n = int(self.cf.get('meta')['meta']['timestamp'])
        except NotFoundException:
            self.timestamp_n = 0
        self.clear()

    def tearDown(self):
        self.cf.insert('meta', {'meta': {'timestamp': str(self.timestamp_n)}})

    # Since the timestamp passed to Cassandra will be in the same second
    # with the default timestamp function, causing problems with removing
    # and inserting (Cassandra doesn't know which is later), we supply our own
    def timestamp(self):
        self.timestamp_n += 1
        return self.timestamp_n

    def clear(self):
        for key, columns in self.cf.get_range(include_timestamp=True):
            for subcolumns in columns.itervalues():
                for value, timestamp in subcolumns.itervalues():
                    self.timestamp_n = max(self.timestamp_n, timestamp)
            self.cf.remove(key)

    def test_super(self):
        key = 'TestSuperColumnFamily.test_super'
        columns = {'1': {'sub1': 'val1', 'sub2': 'val2'}, '2': {'sub3': 'val3', 'sub4': 'val4'}}
        assert_raises(NotFoundException, self.cf.get, key)
        self.cf.insert(key, columns)
        assert self.cf.get(key) == columns
        assert self.cf.multiget([key]) == {key: columns}
        assert list(self.cf.get_range(start=key, finish=key)) == [(key, columns)]

    def test_super_column_argument(self):
        key = 'TestSuperColumnFamily.test_super_columns_argument'
        sub12 = {'sub1': 'val1', 'sub2': 'val2'}
        sub34 = {'sub3': 'val3', 'sub4': 'val4'}
        columns = {'1': sub12, '2': sub34}
        self.cf.insert(key, columns)
        assert self.cf.get(key, super_column='1') == sub12
        assert_raises(NotFoundException, self.cf.get, key, super_column='3')
        assert self.cf.multiget([key], super_column='1') == {key: sub12}
        assert list(self.cf.get_range(start=key, finish=key, super_column='1')) == [(key, sub12)]
コード例 #3
0
class TestSuperColumnFamily:
    def setUp(self):
        self.client = connect_thread_local()
        self.client.login('Keyspace1', {'username': '******', 'password': '******'})
        self.cf = ColumnFamily(self.client, 'Keyspace1', 'Super2',
                               write_consistency_level=ConsistencyLevel.ONE,
                               buffer_size=2, timestamp=self.timestamp,
                               super=True)

        try:
            self.timestamp_n = int(self.cf.get('meta')['meta']['timestamp'])
        except NotFoundException:
            self.timestamp_n = 0
        self.clear()

    def tearDown(self):
        self.cf.insert('meta', {'meta': {'timestamp': str(self.timestamp_n)}})

    # Since the timestamp passed to Cassandra will be in the same second
    # with the default timestamp function, causing problems with removing
    # and inserting (Cassandra doesn't know which is later), we supply our own
    def timestamp(self):
        self.timestamp_n += 1
        return self.timestamp_n

    def clear(self):
        for key, columns in self.cf.get_range(include_timestamp=True):
            for subcolumns in columns.itervalues():
                for value, timestamp in subcolumns.itervalues():
                    self.timestamp_n = max(self.timestamp_n, timestamp)
            self.cf.remove(key)

    def test_super(self):
        key = 'TestSuperColumnFamily.test_super'
        columns = {'1': {'sub1': 'val1', 'sub2': 'val2'}, '2': {'sub3': 'val3', 'sub4': 'val4'}}
        assert_raises(NotFoundException, self.cf.get, key)
        self.cf.insert(key, columns)
        assert self.cf.get(key) == columns
        assert self.cf.multiget([key]) == {key: columns}
        assert list(self.cf.get_range(start=key, finish=key)) == [(key, columns)]

    def test_super_column_argument(self):
        key = 'TestSuperColumnFamily.test_super_columns_argument'
        sub12 = {'sub1': 'val1', 'sub2': 'val2'}
        sub34 = {'sub3': 'val3', 'sub4': 'val4'}
        columns = {'1': sub12, '2': sub34}
        self.cf.insert(key, columns)
        assert self.cf.get(key, super_column='1') == sub12
        assert_raises(NotFoundException, self.cf.get, key, super_column='3')
        assert self.cf.multiget([key], super_column='1') == {key: sub12}
        assert list(self.cf.get_range(start=key, finish=key, super_column='1')) == [(key, sub12)]
コード例 #4
0
class TestSuperColumnFamilyMap:
    def setUp(self):
        credentials = {'username': '******', 'password': '******'}
        self.pool = ConnectionPool(keyspace='Keyspace1', credentials=credentials)
        self.cf = ColumnFamily(self.pool, 'Super2')
        self.map = ColumnFamilyMap(TestUTF8, self.cf)

    def tearDown(self):
        for key, columns in self.cf.get_range():
            self.cf.remove(key)

    def instance(self, key, super_column):
        instance = TestUTF8()
        instance.key = key
        instance.super_column = super_column
        instance.strcol = '1'
        instance.intcol = 2
        instance.floatcol = 3.5
        instance.datetimecol = datetime.now().replace(microsecond=0)
        instance.intstrcol = 8
        instance.floatstrcol = 4.6
        instance.datetimestrcol = datetime.now().replace(microsecond=0)

        return instance

    def test_super(self):
        instance = self.instance('TestSuperColumnFamilyMap.test_super', 'super1')
        assert_raises(NotFoundException, self.map.get, instance.key)
        self.map.insert(instance)
        res = self.map.get(instance.key)[instance.super_column]
        assert_equal(res, instance)
        assert_equal(self.map.multiget([instance.key])[instance.key][instance.super_column], instance)
        assert_equal(list(self.map.get_range(start=instance.key, finish=instance.key)), [{instance.super_column: instance}])
コード例 #5
0
class TestBigInt(unittest.TestCase):
    @classmethod
    def setup_class(cls):
        sys = SystemManager()
        sys.create_column_family(TEST_KS,
                                 'StdInteger',
                                 comparator_type=INT_TYPE)

    @classmethod
    def teardown_class(cls):
        sys = SystemManager()
        sys.drop_column_family(TEST_KS, 'StdInteger')

    def setUp(self):
        self.key = 'TestBigInt'
        self.cf = ColumnFamily(pool, 'StdInteger')

    def tearDown(self):
        self.cf.remove(self.key)

    def test_negative_integers(self):
        self.cf.insert(self.key, {-1: '-1'})
        self.cf.insert(self.key, {-12342390: '-12342390'})
        self.cf.insert(self.key, {-255: '-255'})
        self.cf.insert(self.key, {-256: '-256'})
        self.cf.insert(self.key, {-257: '-257'})
        for key, cols in self.cf.get_range():
            self.assertEquals(str(cols.keys()[0]), cols.values()[0])
コード例 #6
0
ファイル: test_autopacking.py プロジェクト: trhowe/pycassa
class TestDefaultValidators(unittest.TestCase):
    def setUp(self):
        credentials = {"username": "******", "password": "******"}
        self.pool = ConnectionPool(pool_size=5, keyspace="Keyspace1", credentials=credentials)
        self.cf_def_valid = ColumnFamily(self.pool, "DefaultValidator")

    def tearDown(self):
        for key, cols in self.cf_def_valid.get_range():
            self.cf_def_valid.remove(key)
        self.pool.dispose()

    def test_default_validated_columns(self):

        key = "key1"

        col_cf = {"aaaaaa": 1L}
        col_cm = {"subcol": TIME1}
        col_ncf = {"aaaaaa": TIME1}
        col_ncm = {"subcol": 1L}

        # Both of these inserts work, as cf allows
        #  longs and cm for 'subcol' allows TIMEUUIDs.
        self.cf_def_valid.insert(key, col_cf)
        self.cf_def_valid.insert(key, col_cm)
        assert self.cf_def_valid.get(key) == {"aaaaaa": 1L, "subcol": TIME1}

        assert_raises(TypeError, self.cf_def_valid.insert, key, col_ncf)
        assert_raises(TypeError, self.cf_def_valid.insert, key, col_ncm)
コード例 #7
0
ファイル: test_autopacking.py プロジェクト: savinos/pycassa
class TestBigInt(unittest.TestCase):

    @classmethod
    def setup_class(cls):
        sys = SystemManager()
        sys.create_column_family(TEST_KS, 'StdInteger', comparator_type=INT_TYPE)

    @classmethod
    def teardown_class(cls):
        sys = SystemManager()
        sys.drop_column_family(TEST_KS, 'StdInteger')

    def setUp(self):
        self.key = 'TestBigInt'
        self.cf = ColumnFamily(pool, 'StdInteger')

    def tearDown(self):
        self.cf.remove(self.key)

    def test_negative_integers(self):
        self.cf.insert(self.key, {-1: '-1'})
        self.cf.insert(self.key, {-12342390: '-12342390'})
        self.cf.insert(self.key, {-255: '-255'})
        self.cf.insert(self.key, {-256: '-256'})
        self.cf.insert(self.key, {-257: '-257'})
        for key, cols in self.cf.get_range():
            self.assertEquals(str(cols.keys()[0]), cols.values()[0])
コード例 #8
0
class TestSuperColumnFamilyMap:
    def setUp(self):
        credentials = {'username': '******', 'password': '******'}
        self.client = connect_thread_local('Keyspace1', credentials=credentials)
        self.cf = ColumnFamily(self.client, 'Super2',
                               write_consistency_level=ConsistencyLevel.ONE,
                               timestamp=self.timestamp,
                               super=True)
        self.map = ColumnFamilyMap(TestUTF8, self.cf)
        try:
            self.timestamp_n = int(self.cf.get('meta')['meta']['timestamp'])
        except NotFoundException:
            self.timestamp_n = 0
        self.clear()

    def tearDown(self):
        self.cf.insert('meta', {'meta': {'timestamp': str(self.timestamp_n)}})

    # Since the timestamp passed to Cassandra will be in the same second
    # with the default timestamp function, causing problems with removing
    # and inserting (Cassandra doesn't know which is later), we supply our own
    def timestamp(self):
        self.timestamp_n += 1
        return self.timestamp_n

    def clear(self):
        for key, columns in self.cf.get_range(include_timestamp=True):
            for subcolumns in columns.itervalues():
                for value, timestamp in subcolumns.itervalues():
                    self.timestamp_n = max(self.timestamp_n, timestamp)
            self.cf.remove(key)

    def instance(self, key, super_column):
        instance = TestUTF8()
        instance.key = key
        instance.super_column = super_column
        instance.strcol = '1'
        instance.intcol = 2
        instance.floatcol = 3.5
        instance.datetimecol = datetime.now().replace(microsecond=0)
        instance.intstrcol = 8
        instance.floatstrcol = 4.6
        instance.datetimestrcol = datetime.now().replace(microsecond=0)

        return instance

    def test_super(self):
        instance = self.instance('TestSuperColumnFamilyMap.test_super', 'super1')
        assert_raises(NotFoundException, self.map.get, instance.key)
        self.map.insert(instance)
        assert self.map.get(instance.key)[instance.super_column] == instance
        assert self.map.multiget([instance.key])[instance.key][instance.super_column] == instance
        assert list(self.map.get_range(start=instance.key, finish=instance.key)) == [{instance.super_column: instance}]
コード例 #9
0
class TestSuperColumnFamilyMap:
    def setUp(self):
        self.client = connect_thread_local()
        self.client.login('Keyspace1', {'username': '******', 'password': '******'})
        self.cf = ColumnFamily(self.client, 'Keyspace1', 'Super2',
                               write_consistency_level=ConsistencyLevel.ONE,
                               timestamp=self.timestamp,
                               super=True)
        self.map = ColumnFamilyMap(TestUTF8, self.cf)
        try:
            self.timestamp_n = int(self.cf.get('meta')['meta']['timestamp'])
        except NotFoundException:
            self.timestamp_n = 0
        self.clear()

    def tearDown(self):
        self.cf.insert('meta', {'meta': {'timestamp': str(self.timestamp_n)}})

    # Since the timestamp passed to Cassandra will be in the same second
    # with the default timestamp function, causing problems with removing
    # and inserting (Cassandra doesn't know which is later), we supply our own
    def timestamp(self):
        self.timestamp_n += 1
        return self.timestamp_n

    def clear(self):
        for key, columns in self.cf.get_range(include_timestamp=True):
            for subcolumns in columns.itervalues():
                for value, timestamp in subcolumns.itervalues():
                    self.timestamp_n = max(self.timestamp_n, timestamp)
            self.cf.remove(key)

    def instance(self, key, super_column):
        instance = TestUTF8()
        instance.key = key
        instance.super_column = super_column
        instance.strcol = '1'
        instance.intcol = 2
        instance.floatcol = 3.5
        instance.datetimecol = datetime.now().replace(microsecond=0)
        instance.intstrcol = 8
        instance.floatstrcol = 4.6
        instance.datetimestrcol = datetime.now().replace(microsecond=0)

        return instance

    def test_super(self):
        instance = self.instance('TestSuperColumnFamilyMap.test_super', 'super1')
        assert_raises(NotFoundException, self.map.get, instance.key)
        self.map.insert(instance)
        assert self.map.get(instance.key)[instance.super_column] == instance
        assert self.map.multiget([instance.key])[instance.key][instance.super_column] == instance
        assert list(self.map.get_range(start=instance.key, finish=instance.key)) == [{instance.super_column: instance}]
コード例 #10
0
ファイル: cassandradb.py プロジェクト: aaur0/migrateDB
 def exportdata(self,destination=None):
     ''' Method to export data to files '''            
     
     logging.info("inside export data method")
     #result={}       
     count = 0
     for keyspace in self.sysmanager.list_keyspaces():            
         if (keyspace != 'system'):  #check to skip the system database
             pool = ConnectionPool(keyspace,[self.address])
             columnfamilies = self.sysmanager.get_keyspace_column_families(keyspace)               
             #result[keyspace]={}
             result = {}
             # iterate through all the column family
             for columnfamilyname in columnfamilies.keys():
                 #result[keyspace][columnfamilyname]=[]
                 colfamily = ColumnFamily(pool,columnfamilyname)
                 cols = colfamily.get_range(column_reversed=True)
                 result['keyspace']=keyspace
                 result['columnfamily']= columnfamilyname
                 result['cols']=[]                    
                 for col in cols:                        
                     result['cols'].append(col)
                     count = count + 1
                     # check count if it 10000 then reset it flush result
                     if (count == 100000):
                         filename ="%s.out" % str(uuid1()).replace("-","")
                         filepath = "output/%s" % filename
                         file = open(filepath,"w")                            
                         pickle.dump(result, file, protocol=0)
                         file.close()                
                         #reset cols
                         print sys.getsizeof(result)#in mb     
                         result['cols']=[]                               
                         count = 0
                 
     if  count > 0:
             filename ="%s.out" % str(uuid1()).replace("-","")
             filepath = "output/%s" % filename
             file = open(filepath,"w")                            
             pickle.dump(result, file, protocol=0)
             file.close()                
コード例 #11
0
class TestAutoPacking:
    def setUp(self):
        credentials = {'username': '******', 'password': '******'}
        self.client = connect_thread_local('Keyspace1',
                                           credentials=credentials)

        self.cf = ColumnFamily(self.client, 'Standard2')

        self.cf_long = ColumnFamily(self.client, 'StdLong')
        self.cf_int = ColumnFamily(self.client, 'StdInteger')
        self.cf_time = ColumnFamily(self.client, 'StdTimeUUID')
        self.cf_lex = ColumnFamily(self.client, 'StdLexicalUUID')
        self.cf_ascii = ColumnFamily(self.client, 'StdAscii')
        self.cf_utf8 = ColumnFamily(self.client, 'StdUTF8')
        self.cf_bytes = ColumnFamily(self.client, 'StdBytes')

        self.cf_suplong = ColumnFamily(self.client, 'SuperLong', super=True)
        self.cf_supint = ColumnFamily(self.client, 'SuperInt', super=True)
        self.cf_suptime = ColumnFamily(self.client, 'SuperTime', super=True)
        self.cf_suplex = ColumnFamily(self.client, 'SuperLex', super=True)
        self.cf_supascii = ColumnFamily(self.client, 'SuperAscii', super=True)
        self.cf_suputf8 = ColumnFamily(self.client, 'SuperUTF8', super=True)
        self.cf_supbytes = ColumnFamily(self.client, 'SuperBytes', super=True)

        self.cf_suplong_sublong = ColumnFamily(self.client,
                                               'SuperLongSubLong',
                                               super=True)
        self.cf_suplong_subint = ColumnFamily(self.client,
                                              'SuperLongSubInt',
                                              super=True)
        self.cf_suplong_subtime = ColumnFamily(self.client,
                                               'SuperLongSubTime',
                                               super=True)
        self.cf_suplong_sublex = ColumnFamily(self.client,
                                              'SuperLongSubLex',
                                              super=True)
        self.cf_suplong_subascii = ColumnFamily(self.client,
                                                'SuperLongSubAscii',
                                                super=True)
        self.cf_suplong_subutf8 = ColumnFamily(self.client,
                                               'SuperLongSubUTF8',
                                               super=True)
        self.cf_suplong_subbytes = ColumnFamily(self.client,
                                                'SuperLongSubBytes',
                                                super=True)

        self.cf_valid_long = ColumnFamily(self.client, 'ValidatorLong')
        self.cf_valid_int = ColumnFamily(self.client, 'ValidatorInt')
        self.cf_valid_time = ColumnFamily(self.client, 'ValidatorTime')
        self.cf_valid_lex = ColumnFamily(self.client, 'ValidatorLex')
        self.cf_valid_ascii = ColumnFamily(self.client, 'ValidatorAscii')
        self.cf_valid_utf8 = ColumnFamily(self.client, 'ValidatorUTF8')
        self.cf_valid_bytes = ColumnFamily(self.client, 'ValidatorBytes')

        self.cf_def_valid = ColumnFamily(self.client, 'DefaultValidator')

        self.cfs = [
            self.cf_long,
            self.cf_int,
            self.cf_time,
            self.cf_lex,
            self.cf_ascii,
            self.cf_utf8,
            self.cf_bytes,
            #
            self.cf_suplong,
            self.cf_supint,
            self.cf_suptime,
            self.cf_suplex,
            self.cf_supascii,
            self.cf_suputf8,
            self.cf_supbytes,
            #
            self.cf_suplong_subint,
            self.cf_suplong_subint,
            self.cf_suplong_subtime,
            self.cf_suplong_sublex,
            self.cf_suplong_subascii,
            self.cf_suplong_subutf8,
            self.cf_suplong_subbytes,
            #
            self.cf_valid_long,
            self.cf_valid_int,
            self.cf_valid_time,
            self.cf_valid_lex,
            self.cf_valid_ascii,
            self.cf_valid_utf8,
            self.cf_valid_bytes,
            #
            self.cf_def_valid,
        ]

        try:
            self.timestamp_n = int(self.cf.get('meta')['timestamp'])
        except NotFoundException:
            self.timestamp_n = 0
        self.clear()

    def tearDown(self):
        self.cf.insert('meta', {'timestamp': str(self.timestamp_n)})

    def timestamp(self):
        self.timestamp_n += 1
        return self.timestamp_n

    def clear(self):
        for key, columns in self.cf.get_range(include_timestamp=True):
            for value, timestamp in columns.itervalues():
                self.timestamp_n = max(self.timestamp_n, timestamp)
            self.cf.remove(key)

        for cf in self.cfs:
            for key, columns in cf.get_range():
                cf.remove(key)

    def test_basic_inserts(self):

        long_col = {1111111111111111L: VALS[0]}
        int_col = {1: VALS[0]}
        time_col = {TIME1: VALS[0]}
        lex_col = {uuid.UUID(bytes='abc abc abc abcd'): VALS[0]}
        ascii_col = {'foo': VALS[0]}
        utf8_col = {u'\u0020': VALS[0]}
        bytes_col = {'bytes': VALS[0]}

        self.cf_long.insert(KEYS[0], long_col)
        self.cf_int.insert(KEYS[0], int_col)
        self.cf_time.insert(KEYS[0], time_col)
        self.cf_lex.insert(KEYS[0], lex_col)
        self.cf_ascii.insert(KEYS[0], ascii_col)
        self.cf_utf8.insert(KEYS[0], utf8_col)
        self.cf_bytes.insert(KEYS[0], bytes_col)

        assert self.cf_long.get(KEYS[0]) == long_col
        assert self.cf_int.get(KEYS[0]) == int_col
        assert self.cf_time.get(KEYS[0]) == time_col
        assert self.cf_lex.get(KEYS[0]) == lex_col
        assert self.cf_ascii.get(KEYS[0]) == ascii_col
        assert self.cf_utf8.get(KEYS[0]) == utf8_col
        assert self.cf_bytes.get(KEYS[0]) == bytes_col

        self.cf_suplong.insert(KEYS[0], {123L: bytes_col})
コード例 #12
0
 def tearDown(self):
     pool = ConnectionPool('PycassaTestKeyspace')
     cf = ColumnFamily(pool, 'Standard1')
     for key, cols in cf.get_range():
         cf.remove(key)
コード例 #13
0
    def db_export(self):
        db_contents = {'cassandra': {},
                       'zookeeper': {}}

        cassandra_contents = db_contents['cassandra']
        creds = None
        if self._api_args.cassandra_user and self._api_args.cassandra_password:
            creds = {'username': self._api_args.cassandra_user,
                     'password': self._api_args.cassandra_password}
        socket_factory = default_socket_factory
        if ('cassandra_use_ssl' in self._api_args and
                self._api_args.cassandra_use_ssl):
            socket_factory = self._make_ssl_socket_factory(
                self._api_args.cassandra_ca_certs, validate=False)
        sys_mgr = SystemManager(
            self._api_args.cassandra_server_list[0],
            credentials=creds,
            socket_factory=socket_factory)
        existing_keyspaces = sys_mgr.list_keyspaces()
        for ks_name in set(KEYSPACES) - set(self._args.omit_keyspaces or []):
            if self._api_args.cluster_id:
                full_ks_name = '%s_%s' %(self._api_args.cluster_id, ks_name)
            else:
                full_ks_name = ks_name
            if full_ks_name not in existing_keyspaces:
                continue
            cassandra_contents[ks_name] = {}

            pool = ConnectionPool(
                full_ks_name, self._api_args.cassandra_server_list,
                pool_timeout=120, max_retries=-1, timeout=5,
                socket_factory=socket_factory, credentials=creds)
            for cf_name in sys_mgr.get_keyspace_column_families(full_ks_name):
                cassandra_contents[ks_name][cf_name] = {}
                cf = ColumnFamily(pool, cf_name,
                                  buffer_size=self._args.buffer_size)
                for r,c in cf.get_range(column_count=10000000, include_timestamp=True):
                    cassandra_contents[ks_name][cf_name][r] = c
        logger.info("Cassandra DB dumped")

        def get_nodes(path):
            if path[:-1].rpartition('/')[-1] in self._zk_ignore_list:
                return []

            try:
                if not zk.get_children(path):
                    return [(path, zk.get(path))]
            except kazoo.exceptions.NoNodeError:
                return []

            nodes = []
            for child in zk.get_children(path):
                nodes.extend(get_nodes('%s%s/' %(path, child)))

            return nodes

        zk = kazoo.client.KazooClient(self._api_args.zk_server_ip)
        zk.start()
        nodes = get_nodes(self._api_args.cluster_id+'/')
        zk.stop()
        db_contents['zookeeper'] = json.dumps(nodes)
        logger.info("Zookeeper DB dumped")

        f = open(self._args.export_to, 'w')
        try:
            f.write(json.dumps(db_contents))
        finally:
            f.close()
        logger.info("DB dump wrote to file %s" % self._args.export_to)
コード例 #14
0
class TestAutoPacking:

    def setUp(self):
        credentials = {'username': '******', 'password': '******'}
        self.client = connect_thread_local('Keyspace1', credentials=credentials)

        self.cf       = ColumnFamily(self.client, 'Standard2')

        self.cf_long  = ColumnFamily(self.client, 'StdLong')
        self.cf_int   = ColumnFamily(self.client, 'StdInteger')
        self.cf_time  = ColumnFamily(self.client, 'StdTimeUUID')
        self.cf_lex   = ColumnFamily(self.client, 'StdLexicalUUID')
        self.cf_ascii = ColumnFamily(self.client, 'StdAscii')
        self.cf_utf8  = ColumnFamily(self.client, 'StdUTF8')
        self.cf_bytes = ColumnFamily(self.client, 'StdBytes')

        self.cf_suplong  = ColumnFamily(self.client, 'SuperLong', super=True)
        self.cf_supint   = ColumnFamily(self.client, 'SuperInt', super=True)
        self.cf_suptime  = ColumnFamily(self.client, 'SuperTime', super=True)
        self.cf_suplex   = ColumnFamily(self.client, 'SuperLex', super=True)
        self.cf_supascii = ColumnFamily(self.client, 'SuperAscii', super=True)
        self.cf_suputf8  = ColumnFamily(self.client, 'SuperUTF8', super=True)
        self.cf_supbytes = ColumnFamily(self.client, 'SuperBytes', super=True)

        self.cf_suplong_sublong  = ColumnFamily(self.client, 'SuperLongSubLong', super=True)
        self.cf_suplong_subint   = ColumnFamily(self.client, 'SuperLongSubInt', super=True)
        self.cf_suplong_subtime  = ColumnFamily(self.client, 'SuperLongSubTime', super=True)
        self.cf_suplong_sublex   = ColumnFamily(self.client, 'SuperLongSubLex', super=True)
        self.cf_suplong_subascii = ColumnFamily(self.client, 'SuperLongSubAscii', super=True)
        self.cf_suplong_subutf8  = ColumnFamily(self.client, 'SuperLongSubUTF8', super=True)
        self.cf_suplong_subbytes = ColumnFamily(self.client, 'SuperLongSubBytes', super=True)

        self.cf_valid_long = ColumnFamily(self.client, 'ValidatorLong')
        self.cf_valid_int = ColumnFamily(self.client, 'ValidatorInt')
        self.cf_valid_time = ColumnFamily(self.client, 'ValidatorTime')
        self.cf_valid_lex = ColumnFamily(self.client, 'ValidatorLex')
        self.cf_valid_ascii = ColumnFamily(self.client, 'ValidatorAscii')
        self.cf_valid_utf8 = ColumnFamily(self.client, 'ValidatorUTF8')
        self.cf_valid_bytes = ColumnFamily(self.client, 'ValidatorBytes')

        self.cfs = [self.cf_long, self.cf_int, self.cf_time, self.cf_lex,
                    self.cf_ascii, self.cf_utf8, self.cf_bytes,
                    self.cf_suplong, self.cf_supint, self.cf_suptime,
                    self.cf_suplex, self.cf_supascii, self.cf_suputf8,
                    self.cf_supbytes,
                    self.cf_suplong_subint, self.cf_suplong_subint,
                    self.cf_suplong_subtime, self.cf_suplong_sublex,
                    self.cf_suplong_subascii, self.cf_suplong_subutf8,
                    self.cf_suplong_subbytes,
                    self.cf_valid_long, self.cf_valid_int, self.cf_valid_time,
                    self.cf_valid_lex, self.cf_valid_ascii, self.cf_valid_utf8,
                    self.cf_valid_bytes]

        try:
            self.timestamp_n = int(self.cf.get('meta')['timestamp'])
        except NotFoundException:
            self.timestamp_n = 0
        self.clear()

    def tearDown(self):
        self.cf.insert('meta', {'timestamp': str(self.timestamp_n)})

    def timestamp(self):
        self.timestamp_n += 1
        return self.timestamp_n

    def clear(self):
        for key, columns in self.cf.get_range(include_timestamp=True):
            for value, timestamp in columns.itervalues():
                self.timestamp_n = max(self.timestamp_n, timestamp)
            self.cf.remove(key)

        for cf in self.cfs:
            for key, columns in cf.get_range():
                cf.remove(key)

    def test_basic_inserts(self):

        long_col = {1111111111111111L: VALS[0]}
        int_col = {1: VALS[0]}
        time_col = {TIME1: VALS[0]}
        lex_col = {uuid.UUID(bytes='abc abc abc abcd'): VALS[0]}
        ascii_col = {'foo': VALS[0]}
        utf8_col = {u'\u0020': VALS[0]}
        bytes_col = {'bytes': VALS[0]}

        self.cf_long.insert(KEYS[0], long_col)
        self.cf_int.insert(KEYS[0], int_col)
        self.cf_time.insert(KEYS[0], time_col)
        self.cf_lex.insert(KEYS[0], lex_col)
        self.cf_ascii.insert(KEYS[0], ascii_col)
        self.cf_utf8.insert(KEYS[0], utf8_col)
        self.cf_bytes.insert(KEYS[0], bytes_col)

        assert self.cf_long.get(KEYS[0]) == long_col
        assert self.cf_int.get(KEYS[0]) == int_col
        assert self.cf_time.get(KEYS[0]) == time_col
        assert self.cf_lex.get(KEYS[0]) == lex_col
        assert self.cf_ascii.get(KEYS[0]) == ascii_col
        assert self.cf_utf8.get(KEYS[0]) == utf8_col
        assert self.cf_bytes.get(KEYS[0]) == bytes_col

        self.cf_suplong.insert(KEYS[0],  {123L: bytes_col})
コード例 #15
0
class TestColumnFamily:
    def setUp(self):
        self.client = connect()
        self.client.login('Keyspace1', {'username': '******', 'password': '******'})
        self.cf = ColumnFamily(self.client, 'Keyspace1', 'Standard2',
                               write_consistency_level=ConsistencyLevel.ONE,
                               buffer_size=2, timestamp=self.timestamp,
                               dict_class=TestDict)
        try:
            self.timestamp_n = int(self.cf.get('meta')['timestamp'])
        except NotFoundException:
            self.timestamp_n = 0
        self.clear()

    def tearDown(self):
        self.cf.insert('meta', {'timestamp': str(self.timestamp_n)})

    # Since the timestamp passed to Cassandra will be in the same second
    # with the default timestamp function, causing problems with removing
    # and inserting (Cassandra doesn't know which is later), we supply our own
    def timestamp(self):
        self.timestamp_n += 1
        return self.timestamp_n

    def clear(self):
        for key, columns in self.cf.get_range(include_timestamp=True):
            for value, timestamp in columns.itervalues():
                self.timestamp_n = max(self.timestamp_n, timestamp)
            self.cf.remove(key)

    def test_empty(self):
        key = 'TestColumnFamily.test_empty'
        assert_raises(NotFoundException, self.cf.get, key)
        assert len(self.cf.multiget([key])) == 0
        for key, columns in self.cf.get_range():
            assert len(columns) == 0

    def test_insert_get(self):
        key = 'TestColumnFamily.test_insert_get'
        columns = {'1': 'val1', '2': 'val2'}
        assert_raises(NotFoundException, self.cf.get, key)
        self.cf.insert(key, columns)
        assert self.cf.get(key) == columns

    def test_insert_multiget(self):
        key1 = 'TestColumnFamily.test_insert_multiget1'
        columns1 = {'1': 'val1', '2': 'val2'}
        key2 = 'test_insert_multiget1'
        columns2 = {'3': 'val1', '4': 'val2'}
        missing_key = 'key3'

        self.cf.insert(key1, columns1)
        self.cf.insert(key2, columns2)
        rows = self.cf.multiget([key1, key2, missing_key])
        assert len(rows) == 2
        assert rows[key1] == columns1
        assert rows[key2] == columns2
        assert missing_key not in rows

    def test_insert_get_count(self):
        key = 'TestColumnFamily.test_insert_get_count'
        columns = {'1': 'val1', '2': 'val2'}
        self.cf.insert(key, columns)
        assert self.cf.get_count(key) == 2

    def test_insert_get_range(self):
        keys = ['TestColumnFamily.test_insert_get_range%s' % i for i in xrange(5)]
        columns = {'1': 'val1', '2': 'val2'}
        for key in keys:
            self.cf.insert(key, columns)

        rows = list(self.cf.get_range(start=keys[0], finish=keys[-1]))
        assert len(rows) == len(keys)
        for i, (k, c) in enumerate(rows):
            assert k == keys[i]
            assert c == columns

    def test_remove(self):
        key = 'TestColumnFamily.test_remove'
        columns = {'1': 'val1', '2': 'val2'}
        self.cf.insert(key, columns)

        self.cf.remove(key, columns=['2'])
        del columns['2']
        assert self.cf.get(key) == {'1': 'val1'}

        self.cf.remove(key)
        assert_raises(NotFoundException, self.cf.get, key)

    def test_dict_class(self):
        key = 'TestColumnFamily.test_dict_class'
        self.cf.insert(key, {'1': 'val1'})
        assert isinstance(self.cf.get(key), TestDict)
コード例 #16
0
class CassandraImporter:
    def __init__(self):
        parser = argparse.ArgumentParser(description='Process some integers.')
        parser.add_argument('-s', '--source',
                            help='Generally the prod cassandra path, list of machines: \
                            localhost:9162 localhost:9163', nargs='*',
                            required=True)
        parser.add_argument('-d', '--destination',
                            help='Cassandra path where you need your data: \
                            localhost:9160 localhost:9161', nargs='*',
                            required=True)
        parser.add_argument('-ks', '--keyspace',
                            help='The keyspace: myks',
                            required=True)
        parser.add_argument('-cf', '--column_family',
                            help='The Column family: mycf',
                            required=True)
        parser.add_argument('-k', '--key',
                            help='A specific key to be imported',
                            required=False)
        parser.add_argument('-c', '--count',
                            help='Total count of keys to be imported',
                            required=False)
        parser.add_argument('-a', '--all',
                            action='store_true',
                            help='Get all. Not recommended!',
                            required=False)
        args = vars(parser.parse_args())

        """Connection setting with cassandra
        The script is meant to sync data. So source and destination KS
        and CF shold be the same."""

        try:
            source_pool = ConnectionPool(args["keyspace"],
                                         args["source"])
            destination_pool = ConnectionPool(args["keyspace"],
                                              args["destination"])
            self.source_cf = ColumnFamily(source_pool,
                                          args["column_family"])
            self.source_cf.autopack_names = False
            self.source_cf.autopack_values = False
            self.source_cf.autopack_keys = False
            self.source_cf.default_validation_class = pycassa.types.UTF8Type()

            self.destination_cf = ColumnFamily(destination_pool,
                                               args["column_family"])
            self.destination_cf.autopack_names = False
            self.destination_cf.autopack_values = False
            self.destination_cf.autopack_keys = False
            self.destination_cf.default_validation_class = pycassa.types.UTF8Type()

        except Exception as e:
            print "ERROR: The keyspace or the column family does not exist or request is timing out!"
            sys.exit()

        # Optional data
        self.count = args["count"]
        if self.count:
            self.count = int(self.count)
        self.key = args["key"]
        self.all = args["all"]

    def importData(self):
        data = dict()
        # Get columns for a key
        if self.key:
            column_data = self.source_cf.get(self.key)
            data[self.key] = column_data

        # Get last x keys and their columns
        elif self.count:
            counter = 0
            error_count = 0
            for value in self.source_cf.get_range(column_count=0,
                                                  filter_empty=False):
                if(counter < self.count):
                    try:
                        column_data = self.source_cf.get(value[0], column_count=100)
                        data[value[0]] = column_data
                        counter += 1
                    except NotFoundException:
                        #Ignore keys with empty columns
                        pass
                    except Exception:
                        error_count += 1
                        if error_count > 10:
                            # Write the read data
                            self.insertData(data)
                            print "ERROR: Remote cassandra is too slow to read, exiting after writing..."
                            sys.exit()

                        # Use this to throttle reads from cassandra
                        time.sleep(0.2)
                else:
                    break

        # Get All, Not recommended
        elif self.all:
            for value in self.source_cf.get_range(column_count=0,
                                                  filter_empty=False):
                column_data = self.source_cf.get(value[0])
                key = value[0]
                data[key] = column_data
        else:
            print "Please pass -c or -k or -a arguments!"

        return data

    def insertData(self, data):
        print "Writing " + str(len(data.keys())) + " keys"
        for key, value in data.iteritems():
            self.destination_cf.insert(key, value)

    def run(self):
        self.update_progress(0)
        data = self.importData()
        self.update_progress(50)

        self.insertData(data)
        self.update_progress(100)
        print "Import complete!"

    def update_progress(self, progress):
        print '\r[{0}] {1}%'.format('#' * (progress / 10), progress)
コード例 #17
0
ファイル: test_columnfamily.py プロジェクト: dln/pycassa
class TestColumnFamily:
    def setUp(self):
        credentials = {'username': '******', 'password': '******'}
        self.client = connect('Keyspace1', credentials=credentials)
        self.cf = ColumnFamily(self.client, 'Standard2',
                               write_consistency_level=ConsistencyLevel.ONE,
                               buffer_size=2, timestamp=self.timestamp,
                               dict_class=TestDict)
        try:
            self.timestamp_n = int(self.cf.get('meta')['timestamp'])
        except NotFoundException:
            self.timestamp_n = 0
        self.clear()

    def tearDown(self):
        self.cf.insert('meta', {'timestamp': str(self.timestamp_n)})

    # Since the timestamp passed to Cassandra will be in the same second
    # with the default timestamp function, causing problems with removing
    # and inserting (Cassandra doesn't know which is later), we supply our own
    def timestamp(self):
        self.timestamp_n += 1
        return self.timestamp_n

    def clear(self):
        for key, columns in self.cf.get_range(include_timestamp=True):
            for value, timestamp in columns.itervalues():
                self.timestamp_n = max(self.timestamp_n, timestamp)
            self.cf.remove(key)

    def test_empty(self):
        key = 'TestColumnFamily.test_empty'
        assert_raises(NotFoundException, self.cf.get, key)
        assert len(self.cf.multiget([key])) == 0
        for key, columns in self.cf.get_range():
            assert len(columns) == 0

    def test_insert_get(self):
        key = 'TestColumnFamily.test_insert_get'
        columns = {'1': 'val1', '2': 'val2'}
        assert_raises(NotFoundException, self.cf.get, key)
        self.cf.insert(key, columns)
        assert self.cf.get(key) == columns

    def test_insert_multiget(self):
        key1 = 'TestColumnFamily.test_insert_multiget1'
        columns1 = {'1': 'val1', '2': 'val2'}
        key2 = 'test_insert_multiget1'
        columns2 = {'3': 'val1', '4': 'val2'}
        missing_key = 'key3'

        self.cf.insert(key1, columns1)
        self.cf.insert(key2, columns2)
        rows = self.cf.multiget([key1, key2, missing_key])
        assert len(rows) == 2
        assert rows[key1] == columns1
        assert rows[key2] == columns2
        assert missing_key not in rows

    def test_insert_get_count(self):
        key = 'TestColumnFamily.test_insert_get_count'
        columns = {'1': 'val1', '2': 'val2'}
        self.cf.insert(key, columns)
        assert self.cf.get_count(key) == 2

        assert_equal(self.cf.get_count(key, column_start='1'), 2)
        assert_equal(self.cf.get_count(key, column_finish='2'), 2)
        assert_equal(self.cf.get_count(key, column_start='1', column_finish='2'), 2)
        assert_equal(self.cf.get_count(key, column_start='1', column_finish='1'), 1)
        assert_equal(self.cf.get_count(key, columns=['1','2']), 2)
        assert_equal(self.cf.get_count(key, columns=['1']), 1)

    def test_insert_multiget_count(self):
        keys = ['TestColumnFamily.test_insert_multiget_count1',
               'TestColumnFamily.test_insert_multiget_count2',
               'TestColumnFamily.test_insert_multiget_count3']
        columns = {'1': 'val1', '2': 'val2'}
        for key in keys:
            self.cf.insert(key, columns)
        result = self.cf.multiget_count(keys)
        assert_equal(result[keys[0]], 2)
        assert_equal(result[keys[1]], 2)
        assert_equal(result[keys[2]], 2)

        result = self.cf.multiget_count(keys, column_start='1')
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 2)

        result = self.cf.multiget_count(keys, column_finish='2')
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 2)

        result = self.cf.multiget_count(keys, column_start='1', column_finish='2')
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 2)

        result = self.cf.multiget_count(keys, column_start='1', column_finish='1')
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 1)

        result = self.cf.multiget_count(keys, columns=['1','2'])
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 2)

        result = self.cf.multiget_count(keys, columns=['1'])
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 1)

    def test_insert_get_range(self):
        keys = ['TestColumnFamily.test_insert_get_range%s' % i for i in xrange(5)]
        columns = {'1': 'val1', '2': 'val2'}
        for key in keys:
            self.cf.insert(key, columns)

        rows = list(self.cf.get_range(start=keys[0], finish=keys[-1]))
        assert len(rows) == len(keys)
        for i, (k, c) in enumerate(rows):
            assert k == keys[i]
            assert c == columns

    def test_get_range_batching(self):
        self.cf.truncate()

        keys = []
        columns = {'c': 'v'}
        for i in range(100, 201):
            keys.append('key%d' % i)
            self.cf.insert('key%d' % i, columns)

        for i in range(201, 301):
            self.cf.insert('key%d' % i, columns)

        count = 0
        for (k,v) in self.cf.get_range(row_count=100, buffer_size=10):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 100)

        count = 0
        for (k,v) in self.cf.get_range(row_count=100, buffer_size=1000):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 100)

        count = 0
        for (k,v) in self.cf.get_range(row_count=100, buffer_size=150):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 100)

        count = 0
        for (k,v) in self.cf.get_range(row_count=100, buffer_size=7):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 100)

        count = 0
        for (k,v) in self.cf.get_range(row_count=100, buffer_size=2):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 100)

        # Put the remaining keys in our list
        for i in range(201, 301):
            keys.append('key%d' % i)

        count = 0
        for (k,v) in self.cf.get_range(row_count=10000, buffer_size=2):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        count = 0
        for (k,v) in self.cf.get_range(row_count=10000, buffer_size=7):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        count = 0
        for (k,v) in self.cf.get_range(row_count=10000, buffer_size=200):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        count = 0
        for (k,v) in self.cf.get_range(row_count=10000, buffer_size=10000):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        # Don't give a row count
        count = 0
        for (k,v) in self.cf.get_range(buffer_size=2):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        count = 0
        for (k,v) in self.cf.get_range(buffer_size=77):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        count = 0
        for (k,v) in self.cf.get_range(buffer_size=200):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        count = 0
        for (k,v) in self.cf.get_range(buffer_size=10000):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        self.cf.truncate()

    def insert_insert_get_indexed_slices(self):
        indexed_cf = ColumnFamily(self.client, 'Indexed1')

        columns = {'birthdate': 1L}

        keys = []
        for i in range(1,4):
            indexed_cf.insert('key%d' % i, columns)
            keys.append('key%d')

        expr = index.create_index_expression(column_name='birthdate', value=1L)
        clause = index.create_index_clause([expr])

        count = 0
        for key,cols in indexed_cf.get_indexed_slices(clause):
            assert cols == columns
            assert key in keys
            count += 1
        assert_equal(count, 3)

    def test_get_indexed_slices_batching(self):
        indexed_cf = ColumnFamily(self.client, 'Indexed1')

        columns = {'birthdate': 1L}

        for i in range(200):
            indexed_cf.insert('key%d' % i, columns)

        expr = index.create_index_expression(column_name='birthdate', value=1L)
        clause = index.create_index_clause([expr], count=10)

        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=2))
        assert_equal(len(result), 10)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=10))
        assert_equal(len(result), 10)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=77))
        assert_equal(len(result), 10)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=200))
        assert_equal(len(result), 10)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=1000))
        assert_equal(len(result), 10)

        clause = index.create_index_clause([expr], count=250)

        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=2))
        assert_equal(len(result), 200)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=10))
        assert_equal(len(result), 200)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=77))
        assert_equal(len(result), 200)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=200))
        assert_equal(len(result), 200)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=1000))
        assert_equal(len(result), 200)

    def test_remove(self):
        key = 'TestColumnFamily.test_remove'
        columns = {'1': 'val1', '2': 'val2'}
        self.cf.insert(key, columns)

        self.cf.remove(key, columns=['2'])
        del columns['2']
        assert self.cf.get(key) == {'1': 'val1'}

        self.cf.remove(key)
        assert_raises(NotFoundException, self.cf.get, key)

    def test_dict_class(self):
        key = 'TestColumnFamily.test_dict_class'
        self.cf.insert(key, {'1': 'val1'})
        assert isinstance(self.cf.get(key), TestDict)
コード例 #18
0
class TestColumnFamilyMap:
    def setUp(self):
        self.client = connect()
        self.client.login('Keyspace1', {'username': '******', 'password': '******'})
        self.cf = ColumnFamily(self.client, 'Keyspace1', 'Standard2',
                               write_consistency_level=ConsistencyLevel.ONE,
                               timestamp=self.timestamp)
        self.map = ColumnFamilyMap(TestUTF8, self.cf)
        self.empty_map = ColumnFamilyMap(TestEmpty, self.cf, raw_columns=True)
        try:
            self.timestamp_n = int(self.cf.get('meta')['timestamp'])
        except NotFoundException:
            self.timestamp_n = 0
        self.clear()

    def tearDown(self):
        self.cf.insert('meta', {'timestamp': str(self.timestamp_n)})

    # Since the timestamp passed to Cassandra will be in the same second
    # with the default timestamp function, causing problems with removing
    # and inserting (Cassandra doesn't know which is later), we supply our own
    def timestamp(self):
        self.timestamp_n += 1
        return self.timestamp_n

    def clear(self):
        for key, columns in self.cf.get_range(include_timestamp=True):
            for value, timestamp in columns.itervalues():
                self.timestamp_n = max(self.timestamp_n, timestamp)
            self.cf.remove(key)

    def instance(self, key):
        instance = TestUTF8()
        instance.key = key
        instance.strcol = '1'
        instance.intcol = 2
        instance.floatcol = 3.5
        instance.datetimecol = datetime.now().replace(microsecond=0)
        instance.intstrcol = 8
        instance.floatstrcol = 4.6
        instance.datetimestrcol = datetime.now().replace(microsecond=0)

        return instance

    def test_will_not_insert_none(self):
        for column in ('strcol', 'intcol', 'floatcol', 'datetimecol',
                       'intstrcol', 'floatstrcol', 'datetimestrcol'):
            instance = self.instance('TestColumnFamilyMap.test_will_not_insert_none')
            setattr(instance, column, None)
            assert_raises(TypeError, self.map.insert, instance)

    def test_empty(self):
        key = 'TestColumnFamilyMap.test_empty'
        assert_raises(NotFoundException, self.map.get, key)
        assert len(self.map.multiget([key])) == 0

    def test_insert_get(self):
        instance = self.instance('TestColumnFamilyMap.test_insert_get')
        assert_raises(NotFoundException, self.map.get, instance.key)
        self.map.insert(instance)
        assert self.map.get(instance.key) == instance
        assert self.empty_map.get(instance.key).raw_columns['intstrcol'] == str(instance.intstrcol)

    def test_insert_multiget(self):
        instance1 = self.instance('TestColumnFamilyMap.test_insert_multiget1')
        instance2 = self.instance('TestColumnFamilyMap.test_insert_multiget2')
        missing_key = 'TestColumnFamilyMap.test_insert_multiget3'

        self.map.insert(instance1)
        self.map.insert(instance2)
        rows = self.map.multiget([instance1.key, instance2.key, missing_key])
        assert len(rows) == 2
        assert rows[instance1.key] == instance1
        assert rows[instance2.key] == instance2
        assert missing_key not in rows
        assert self.empty_map.multiget([instance1.key])[instance1.key].raw_columns['intstrcol'] == str(instance1.intstrcol)

    def test_insert_get_count(self):
        instance = self.instance('TestColumnFamilyMap.test_insert_get_count')
        self.map.insert(instance)
        assert self.map.get_count(instance.key) == 7

    def test_insert_get_range(self):
        instances = []
        for i in xrange(5):
            instance = self.instance('TestColumnFamilyMap.test_insert_get_range%s' % i)
            instances.append(instance)

        for instance in instances:
            self.map.insert(instance)

        rows = list(self.map.get_range(start=instances[0].key, finish=instances[-1].key))
        assert len(rows) == len(instances)
        assert rows == instances
        assert list(self.empty_map.get_range(start=instances[0].key, finish=instances[0].key))[0].raw_columns['intstrcol'] == str(instances[0].intstrcol)

    def test_remove(self):
        instance = self.instance('TestColumnFamilyMap.test_remove')

        self.map.insert(instance)
        self.map.remove(instance)
        assert_raises(NotFoundException, self.map.get, instance.key)

    def test_does_not_insert_extra_column(self):
        instance = self.instance('TestColumnFamilyMap.test_does_not_insert_extra_column')
        instance.othercol = 'Test'

        self.map.insert(instance)

        get_instance = self.map.get(instance.key)
        assert get_instance.strcol == instance.strcol
        assert get_instance.intcol == instance.intcol
        assert get_instance.floatcol == instance.floatcol
        assert get_instance.datetimecol == instance.datetimecol
        assert_raises(AttributeError, getattr, get_instance, 'othercol')

    def test_has_defaults(self):
        key = 'TestColumnFamilyMap.test_has_defaults'
        self.cf.insert(key, {'strcol': '1'})
        instance = self.map.get(key)

        assert instance.intcol == TestUTF8.intcol.default
        assert instance.floatcol == TestUTF8.floatcol.default
        assert instance.datetimecol == TestUTF8.datetimecol.default
        assert instance.intstrcol == TestUTF8.intstrcol.default
        assert instance.floatstrcol == TestUTF8.floatstrcol.default
        assert instance.datetimestrcol == TestUTF8.datetimestrcol.default
コード例 #19
0
ファイル: test_columnfamily.py プロジェクト: trhowe/pycassa
class TestColumnFamily(unittest.TestCase):

    def setUp(self):
        credentials = {'username': '******', 'password': '******'}
        self.pool = ConnectionPool(keyspace='Keyspace1', credentials=credentials)
        self.cf = ColumnFamily(self.pool, 'Standard2', dict_class=TestDict)

    def tearDown(self):
        for key, columns in self.cf.get_range():
            self.cf.remove(key)

    def test_empty(self):
        key = 'TestColumnFamily.test_empty'
        assert_raises(NotFoundException, self.cf.get, key)
        assert_equal(len(self.cf.multiget([key])), 0)
        for key, columns in self.cf.get_range():
            assert_equal(len(columns), 0)

    def test_insert_get(self):
        key = 'TestColumnFamily.test_insert_get'
        columns = {'1': 'val1', '2': 'val2'}
        assert_raises(NotFoundException, self.cf.get, key)
        self.cf.insert(key, columns)
        assert_equal(self.cf.get(key), columns)

    def test_insert_multiget(self):
        key1 = 'TestColumnFamily.test_insert_multiget1'
        columns1 = {'1': 'val1', '2': 'val2'}
        key2 = 'test_insert_multiget1'
        columns2 = {'3': 'val1', '4': 'val2'}
        missing_key = 'key3'

        self.cf.insert(key1, columns1)
        self.cf.insert(key2, columns2)
        rows = self.cf.multiget([key1, key2, missing_key])
        assert_equal(len(rows), 2)
        assert_equal(rows[key1], columns1)
        assert_equal(rows[key2], columns2)
        assert_true(missing_key not in rows)

    def test_insert_get_count(self):
        key = 'TestColumnFamily.test_insert_get_count'
        columns = {'1': 'val1', '2': 'val2'}
        self.cf.insert(key, columns)
        assert_equal(self.cf.get_count(key), 2)

        assert_equal(self.cf.get_count(key, column_start='1'), 2)
        assert_equal(self.cf.get_count(key, column_finish='2'), 2)
        assert_equal(self.cf.get_count(key, column_start='1', column_finish='2'), 2)
        assert_equal(self.cf.get_count(key, column_start='1', column_finish='1'), 1)
        assert_equal(self.cf.get_count(key, columns=['1','2']), 2)
        assert_equal(self.cf.get_count(key, columns=['1']), 1)

    def test_insert_multiget_count(self):
        keys = ['TestColumnFamily.test_insert_multiget_count1',
               'TestColumnFamily.test_insert_multiget_count2',
               'TestColumnFamily.test_insert_multiget_count3']
        columns = {'1': 'val1', '2': 'val2'}
        for key in keys:
            self.cf.insert(key, columns)
        result = self.cf.multiget_count(keys)
        assert_equal(result[keys[0]], 2)
        assert_equal(result[keys[1]], 2)
        assert_equal(result[keys[2]], 2)

        result = self.cf.multiget_count(keys, column_start='1')
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 2)

        result = self.cf.multiget_count(keys, column_finish='2')
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 2)

        result = self.cf.multiget_count(keys, column_start='1', column_finish='2')
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 2)

        result = self.cf.multiget_count(keys, column_start='1', column_finish='1')
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 1)

        result = self.cf.multiget_count(keys, columns=['1','2'])
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 2)

        result = self.cf.multiget_count(keys, columns=['1'])
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 1)

    def test_insert_get_range(self):
        keys = ['TestColumnFamily.test_insert_get_range%s' % i for i in xrange(5)]
        columns = {'1': 'val1', '2': 'val2'}
        for key in keys:
            self.cf.insert(key, columns)

        rows = list(self.cf.get_range(start=keys[0], finish=keys[-1]))
        assert_equal(len(rows), len(keys))
        for i, (k, c) in enumerate(rows):
            assert_equal(k, keys[i])
            assert_equal(c, columns)

    def test_get_range_batching(self):
        self.cf.truncate()

        keys = []
        columns = {'c': 'v'}
        for i in range(100, 201):
            keys.append('key%d' % i)
            self.cf.insert('key%d' % i, columns)

        for i in range(201, 301):
            self.cf.insert('key%d' % i, columns)

        count = 0
        for (k,v) in self.cf.get_range(row_count=100, buffer_size=10):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 100)

        count = 0
        for (k,v) in self.cf.get_range(row_count=100, buffer_size=1000):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 100)

        count = 0
        for (k,v) in self.cf.get_range(row_count=100, buffer_size=150):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 100)

        count = 0
        for (k,v) in self.cf.get_range(row_count=100, buffer_size=7):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 100)

        count = 0
        for (k,v) in self.cf.get_range(row_count=100, buffer_size=2):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 100)

        # Put the remaining keys in our list
        for i in range(201, 301):
            keys.append('key%d' % i)

        count = 0
        for (k,v) in self.cf.get_range(row_count=10000, buffer_size=2):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        count = 0
        for (k,v) in self.cf.get_range(row_count=10000, buffer_size=7):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        count = 0
        for (k,v) in self.cf.get_range(row_count=10000, buffer_size=200):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        count = 0
        for (k,v) in self.cf.get_range(row_count=10000, buffer_size=10000):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        # Don't give a row count
        count = 0
        for (k,v) in self.cf.get_range(buffer_size=2):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        count = 0
        for (k,v) in self.cf.get_range(buffer_size=77):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        count = 0
        for (k,v) in self.cf.get_range(buffer_size=200):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        count = 0
        for (k,v) in self.cf.get_range(buffer_size=10000):
            assert_true(k in keys, 'key "%s" should be in keys' % k)
            count += 1
        assert_equal(count, 201)

        self.cf.truncate()

    def insert_insert_get_indexed_slices(self):
        indexed_cf = ColumnFamily(self.pool, 'Indexed1')

        columns = {'birthdate': 1L}

        keys = []
        for i in range(1,4):
            indexed_cf.insert('key%d' % i, columns)
            keys.append('key%d')

        expr = index.create_index_expression(column_name='birthdate', value=1L)
        clause = index.create_index_clause([expr])

        count = 0
        for key,cols in indexed_cf.get_indexed_slices(clause):
            assert_equal(cols, columns)
            assert key in keys
            count += 1
        assert_equal(count, 3)

    def test_get_indexed_slices_batching(self):
        indexed_cf = ColumnFamily(self.pool, 'Indexed1')

        columns = {'birthdate': 1L}

        for i in range(200):
            indexed_cf.insert('key%d' % i, columns)

        expr = index.create_index_expression(column_name='birthdate', value=1L)
        clause = index.create_index_clause([expr], count=10)

        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=2))
        assert_equal(len(result), 10)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=10))
        assert_equal(len(result), 10)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=77))
        assert_equal(len(result), 10)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=200))
        assert_equal(len(result), 10)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=1000))
        assert_equal(len(result), 10)

        clause = index.create_index_clause([expr], count=250)

        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=2))
        assert_equal(len(result), 200)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=10))
        assert_equal(len(result), 200)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=77))
        assert_equal(len(result), 200)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=200))
        assert_equal(len(result), 200)
        result = list(indexed_cf.get_indexed_slices(clause, buffer_size=1000))
        assert_equal(len(result), 200)

    def test_remove(self):
        key = 'TestColumnFamily.test_remove'
        columns = {'1': 'val1', '2': 'val2'}
        self.cf.insert(key, columns)

        self.cf.remove(key, columns=['2'])
        del columns['2']
        assert_equal(self.cf.get(key), {'1': 'val1'})

        self.cf.remove(key)
        assert_raises(NotFoundException, self.cf.get, key)

    def test_dict_class(self):
        key = 'TestColumnFamily.test_dict_class'
        self.cf.insert(key, {'1': 'val1'})
        assert isinstance(self.cf.get(key), TestDict)
コード例 #20
0
class TestColumnFamily:
    def setUp(self):
        credentials = {'username': '******', 'password': '******'}
        self.client = connect('Keyspace1', credentials=credentials)
        self.cf = ColumnFamily(self.client,
                               'Standard2',
                               write_consistency_level=ConsistencyLevel.ONE,
                               buffer_size=2,
                               timestamp=self.timestamp,
                               dict_class=TestDict)
        try:
            self.timestamp_n = int(self.cf.get('meta')['timestamp'])
        except NotFoundException:
            self.timestamp_n = 0
        self.clear()

    def tearDown(self):
        self.cf.insert('meta', {'timestamp': str(self.timestamp_n)})

    # Since the timestamp passed to Cassandra will be in the same second
    # with the default timestamp function, causing problems with removing
    # and inserting (Cassandra doesn't know which is later), we supply our own
    def timestamp(self):
        self.timestamp_n += 1
        return self.timestamp_n

    def clear(self):
        for key, columns in self.cf.get_range(include_timestamp=True):
            for value, timestamp in columns.itervalues():
                self.timestamp_n = max(self.timestamp_n, timestamp)
            self.cf.remove(key)

    def test_empty(self):
        key = 'TestColumnFamily.test_empty'
        assert_raises(NotFoundException, self.cf.get, key)
        assert len(self.cf.multiget([key])) == 0
        for key, columns in self.cf.get_range():
            assert len(columns) == 0

    def test_insert_get(self):
        key = 'TestColumnFamily.test_insert_get'
        columns = {'1': 'val1', '2': 'val2'}
        assert_raises(NotFoundException, self.cf.get, key)
        self.cf.insert(key, columns)
        assert self.cf.get(key) == columns

    def test_insert_multiget(self):
        key1 = 'TestColumnFamily.test_insert_multiget1'
        columns1 = {'1': 'val1', '2': 'val2'}
        key2 = 'test_insert_multiget1'
        columns2 = {'3': 'val1', '4': 'val2'}
        missing_key = 'key3'

        self.cf.insert(key1, columns1)
        self.cf.insert(key2, columns2)
        rows = self.cf.multiget([key1, key2, missing_key])
        assert len(rows) == 2
        assert rows[key1] == columns1
        assert rows[key2] == columns2
        assert missing_key not in rows

    def test_insert_get_count(self):
        key = 'TestColumnFamily.test_insert_get_count'
        columns = {'1': 'val1', '2': 'val2'}
        self.cf.insert(key, columns)
        assert self.cf.get_count(key) == 2

        assert_equal(self.cf.get_count(key, column_start='1'), 2)
        assert_equal(self.cf.get_count(key, column_finish='2'), 2)
        assert_equal(
            self.cf.get_count(key, column_start='1', column_finish='2'), 2)
        assert_equal(
            self.cf.get_count(key, column_start='1', column_finish='1'), 1)
        assert_equal(self.cf.get_count(key, columns=['1', '2']), 2)
        assert_equal(self.cf.get_count(key, columns=['1']), 1)

    def test_insert_multiget_count(self):
        keys = [
            'TestColumnFamily.test_insert_multiget_count1',
            'TestColumnFamily.test_insert_multiget_count2',
            'TestColumnFamily.test_insert_multiget_count3'
        ]
        columns = {'1': 'val1', '2': 'val2'}
        for key in keys:
            self.cf.insert(key, columns)
        result = self.cf.multiget_count(keys)
        assert_equal(result[keys[0]], 2)
        assert_equal(result[keys[1]], 2)
        assert_equal(result[keys[2]], 2)

        result = self.cf.multiget_count(keys, column_start='1')
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 2)

        result = self.cf.multiget_count(keys, column_finish='2')
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 2)

        result = self.cf.multiget_count(keys,
                                        column_start='1',
                                        column_finish='2')
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 2)

        result = self.cf.multiget_count(keys,
                                        column_start='1',
                                        column_finish='1')
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 1)

        result = self.cf.multiget_count(keys, columns=['1', '2'])
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 2)

        result = self.cf.multiget_count(keys, columns=['1'])
        assert_equal(len(result), 3)
        assert_equal(result[keys[0]], 1)

    def test_insert_get_range(self):
        keys = [
            'TestColumnFamily.test_insert_get_range%s' % i for i in xrange(5)
        ]
        columns = {'1': 'val1', '2': 'val2'}
        for key in keys:
            self.cf.insert(key, columns)

        rows = list(self.cf.get_range(start=keys[0], finish=keys[-1]))
        assert len(rows) == len(keys)
        for i, (k, c) in enumerate(rows):
            assert k == keys[i]
            assert c == columns

    def test_insert_get_indexed_slices(self):
        indexed_cf = ColumnFamily(self.client, 'Indexed1')

        columns = {'birthdate': 1L}

        key = 'key1'
        indexed_cf.insert(key,
                          columns,
                          write_consistency_level=ConsistencyLevel.ONE)

        key = 'key2'
        indexed_cf.insert(key,
                          columns,
                          write_consistency_level=ConsistencyLevel.ONE)

        key = 'key3'
        indexed_cf.insert(key,
                          columns,
                          write_consistency_level=ConsistencyLevel.ONE)

        expr = index.create_index_expression(column_name='birthdate', value=1L)
        clause = index.create_index_clause([expr])
        result = indexed_cf.get_indexed_slices(clause)
        assert len(result) == 3
        assert result.get('key1') == columns
        assert result.get('key2') == columns
        assert result.get('key3') == columns

    def test_remove(self):
        key = 'TestColumnFamily.test_remove'
        columns = {'1': 'val1', '2': 'val2'}
        self.cf.insert(key, columns)

        self.cf.remove(key, columns=['2'])
        del columns['2']
        assert self.cf.get(key) == {'1': 'val1'}

        self.cf.remove(key)
        assert_raises(NotFoundException, self.cf.get, key)

    def test_dict_class(self):
        key = 'TestColumnFamily.test_dict_class'
        self.cf.insert(key, {'1': 'val1'})
        assert isinstance(self.cf.get(key), TestDict)
コード例 #21
0
 count = 0
 try:
   os.mkdir('output')
 except Exception, e:
   pass 
 for keyspace in self.sysmanager.list_keyspaces():            
   if (keyspace != 'system'):  #check to skip the system database
     pool = ConnectionPool(keyspace,[self.address])
     columnfamilies = self.sysmanager.get_keyspace_column_families(keyspace)
     result = {}
   relt['cols']=[]
     # iterate through all the column family
     for columnfamilyname in columnfamilies.keys():
       #result[keyspace][columnfamilyname]=[]
       colfamily = ColumnFamily(pool,columnfamilyname)
       cols = colfamily.get_range(column_reversed=True)
       result['keyspace']=keyspace
       result['columnfamily']= columnfamilyname
       for col in cols:                        
         result['cols'].append(col)
         count = count + 1
         # check count if it 10000 then reset it flush result
         if (count == 100000):
           filename ="%s.out" % str(uuid1()).replace("-","")
           filepath = "output/%s" % filename
           file = open(filepath,"w")                            
           pickle.dump(result, file, protocol=-1)
           file.close()                
           #reset cols
           print sys.getsizeof(result)#in mb     
           result['cols']=[]                               
コード例 #22
0
class CassandraImporter:
    def __init__(self):
        parser = argparse.ArgumentParser(description="Process some integers.")
        parser.add_argument(
            "-s",
            "--source",
            help="Generally the prod cassandra path, list of machines: \
                            localhost:9162 localhost:9163",
            nargs="*",
            required=True,
        )
        parser.add_argument(
            "-d",
            "--destination",
            help="Cassandra path where you need your data: \
                            localhost:9160 localhost:9161",
            nargs="*",
            required=True,
        )
        parser.add_argument("-ks", "--keyspace", help="The keyspace: myks", required=True)
        parser.add_argument("-cf", "--column_family", help="The Column family: mycf", required=True)
        parser.add_argument("-k", "--key", help="A specific key to be imported", required=False)
        parser.add_argument("-c", "--count", help="Total count of keys to be imported", required=False)
        parser.add_argument("-a", "--all", action="store_true", help="Get all. Not recommended!", required=False)
        args = vars(parser.parse_args())

        """Connection setting with cassandra
        The script is meant to sync data. So source and destination KS
        and CF shold be the same."""

        try:
            source_pool = ConnectionPool(args["keyspace"], args["source"])
            destination_pool = ConnectionPool(args["keyspace"], args["destination"])
            self.source_cf = ColumnFamily(source_pool, args["column_family"])
            self.source_cf.autopack_names = False
            self.source_cf.autopack_values = False
            self.source_cf.autopack_keys = False
            self.source_cf.default_validation_class = pycassa.types.UTF8Type()

            self.destination_cf = ColumnFamily(destination_pool, args["column_family"])
            self.destination_cf.autopack_names = False
            self.destination_cf.autopack_values = False
            self.destination_cf.autopack_keys = False
            self.destination_cf.default_validation_class = pycassa.types.UTF8Type()

        except Exception as e:
            print "ERROR: The keyspace or the column family does not exist or request is timing out!"
            sys.exit()

        # Optional data
        self.count = args["count"]
        if self.count:
            self.count = int(self.count)
        self.key = args["key"]
        self.all = args["all"]

    def importData(self):
        data = dict()
        # Get columns for a key
        if self.key:
            column_data = self.source_cf.get(self.key)
            data[self.key] = column_data

        # Get last x keys and their columns
        elif self.count:
            counter = 0
            error_count = 0
            for value in self.source_cf.get_range(column_count=0, filter_empty=False):
                if counter < self.count:
                    try:
                        column_data = self.source_cf.get(value[0], column_count=100)
                        data[value[0]] = column_data
                        counter += 1
                    except NotFoundException:
                        # Ignore keys with empty columns
                        pass
                    except Exception:
                        error_count += 1
                        if error_count > 10:
                            # Write the read data
                            self.insertData(data)
                            print "ERROR: Remote cassandra is too slow to read, exiting after writing..."
                            sys.exit()

                        # Use this to throttle reads from cassandra
                        time.sleep(0.2)
                else:
                    break

        # Get All, Not recommended
        elif self.all:
            for value in self.source_cf.get_range(column_count=0, filter_empty=False):
                column_data = self.source_cf.get(value[0])
                key = value[0]
                data[key] = column_data
        else:
            print "Please pass -c or -k or -a arguments!"

        return data

    def insertData(self, data):
        print "Writing " + str(len(data.keys())) + " keys"
        for key, value in data.iteritems():
            self.destination_cf.insert(key, value)

    def run(self):
        self.update_progress(0)
        data = self.importData()
        self.update_progress(50)

        self.insertData(data)
        self.update_progress(100)
        print "Import complete!"

    def update_progress(self, progress):
        print "\r[{0}] {1}%".format("#" * (progress / 10), progress)
コード例 #23
0
ファイル: test_autopacking.py プロジェクト: trhowe/pycassa
class TestTimeUUIDs(unittest.TestCase):
    def setUp(self):
        credentials = {"username": "******", "password": "******"}
        self.pool = ConnectionPool(pool_size=5, keyspace="Keyspace1", credentials=credentials)
        self.cf_time = ColumnFamily(self.pool, "StdTimeUUID")

    def tearDown(self):
        for key, cols in self.cf_time.get_range():
            self.cf_time.remove(key)
        self.pool.dispose()

    def test_datetime_to_uuid(self):

        key = "key1"

        timeline = []

        timeline.append(datetime.now())
        time1 = uuid1()
        col1 = {time1: "0"}
        self.cf_time.insert(key, col1)
        time.sleep(1)

        timeline.append(datetime.now())
        time2 = uuid1()
        col2 = {time2: "1"}
        self.cf_time.insert(key, col2)
        time.sleep(1)

        timeline.append(datetime.now())

        cols = {time1: "0", time2: "1"}

        assert_equal(self.cf_time.get(key, column_start=timeline[0]), cols)
        assert_equal(self.cf_time.get(key, column_finish=timeline[2]), cols)
        assert_equal(self.cf_time.get(key, column_start=timeline[0], column_finish=timeline[2]), cols)
        assert_equal(self.cf_time.get(key, column_start=timeline[0], column_finish=timeline[2]), cols)
        assert_equal(self.cf_time.get(key, column_start=timeline[0], column_finish=timeline[1]), col1)
        assert_equal(self.cf_time.get(key, column_start=timeline[1], column_finish=timeline[2]), col2)

    def test_time_to_uuid(self):

        key = "key1"

        timeline = []

        timeline.append(time.time())
        time1 = uuid1()
        col1 = {time1: "0"}
        self.cf_time.insert(key, col1)
        time.sleep(0.1)

        timeline.append(time.time())
        time2 = uuid1()
        col2 = {time2: "1"}
        self.cf_time.insert(key, col2)
        time.sleep(0.1)

        timeline.append(time.time())

        cols = {time1: "0", time2: "1"}

        assert_equal(self.cf_time.get(key, column_start=timeline[0]), cols)
        assert_equal(self.cf_time.get(key, column_finish=timeline[2]), cols)
        assert_equal(self.cf_time.get(key, column_start=timeline[0], column_finish=timeline[2]), cols)
        assert_equal(self.cf_time.get(key, column_start=timeline[0], column_finish=timeline[2]), cols)
        assert_equal(self.cf_time.get(key, column_start=timeline[0], column_finish=timeline[1]), col1)
        assert_equal(self.cf_time.get(key, column_start=timeline[1], column_finish=timeline[2]), col2)

    def test_auto_time_to_uuid1(self):

        key = "key"

        t = time.time()
        col = {t: "foo"}
        self.cf_time.insert(key, col)
        uuid_res = self.cf_time.get(key).keys()[0]
        timestamp = convert_uuid_to_time(uuid_res)
        assert_almost_equal(timestamp, t, places=3)
コード例 #24
0
 def tearDown(self):
     pool = ConnectionPool('PycassaTestKeyspace')
     cf = ColumnFamily(pool, 'Standard1')
     for key, cols in cf.get_range():
         cf.remove(key)
コード例 #25
0
class TestColumnFamilyMap:
    def setUp(self):
        self.client = connect()
        self.client.login('Keyspace1', {
            'username': '******',
            'password': '******'
        })
        self.cf = ColumnFamily(self.client,
                               'Keyspace1',
                               'Standard2',
                               write_consistency_level=ConsistencyLevel.ONE,
                               timestamp=self.timestamp)
        self.map = ColumnFamilyMap(TestUTF8, self.cf)
        self.empty_map = ColumnFamilyMap(TestEmpty, self.cf, raw_columns=True)
        try:
            self.timestamp_n = int(self.cf.get('meta')['timestamp'])
        except NotFoundException:
            self.timestamp_n = 0
        self.clear()

    def tearDown(self):
        self.cf.insert('meta', {'timestamp': str(self.timestamp_n)})

    # Since the timestamp passed to Cassandra will be in the same second
    # with the default timestamp function, causing problems with removing
    # and inserting (Cassandra doesn't know which is later), we supply our own
    def timestamp(self):
        self.timestamp_n += 1
        return self.timestamp_n

    def clear(self):
        for key, columns in self.cf.get_range(include_timestamp=True):
            for value, timestamp in columns.itervalues():
                self.timestamp_n = max(self.timestamp_n, timestamp)
            self.cf.remove(key)

    def instance(self, key):
        instance = TestUTF8()
        instance.key = key
        instance.strcol = '1'
        instance.intcol = 2
        instance.floatcol = 3.5
        instance.datetimecol = datetime.now().replace(microsecond=0)
        instance.intstrcol = 8
        instance.floatstrcol = 4.6
        instance.datetimestrcol = datetime.now().replace(microsecond=0)

        return instance

    def test_will_not_insert_none(self):
        for column in ('strcol', 'intcol', 'floatcol', 'datetimecol',
                       'intstrcol', 'floatstrcol', 'datetimestrcol'):
            instance = self.instance(
                'TestColumnFamilyMap.test_will_not_insert_none')
            setattr(instance, column, None)
            assert_raises(TypeError, self.map.insert, instance)

    def test_empty(self):
        key = 'TestColumnFamilyMap.test_empty'
        assert_raises(NotFoundException, self.map.get, key)
        assert len(self.map.multiget([key])) == 0

    def test_insert_get(self):
        instance = self.instance('TestColumnFamilyMap.test_insert_get')
        assert_raises(NotFoundException, self.map.get, instance.key)
        self.map.insert(instance)
        assert self.map.get(instance.key) == instance
        assert self.empty_map.get(
            instance.key).raw_columns['intstrcol'] == str(instance.intstrcol)

    def test_insert_multiget(self):
        instance1 = self.instance('TestColumnFamilyMap.test_insert_multiget1')
        instance2 = self.instance('TestColumnFamilyMap.test_insert_multiget2')
        missing_key = 'TestColumnFamilyMap.test_insert_multiget3'

        self.map.insert(instance1)
        self.map.insert(instance2)
        rows = self.map.multiget([instance1.key, instance2.key, missing_key])
        assert len(rows) == 2
        assert rows[instance1.key] == instance1
        assert rows[instance2.key] == instance2
        assert missing_key not in rows
        assert self.empty_map.multiget([
            instance1.key
        ])[instance1.key].raw_columns['intstrcol'] == str(instance1.intstrcol)

    def test_insert_get_count(self):
        instance = self.instance('TestColumnFamilyMap.test_insert_get_count')
        self.map.insert(instance)
        assert self.map.get_count(instance.key) == 7

    def test_insert_get_range(self):
        instances = []
        for i in xrange(5):
            instance = self.instance(
                'TestColumnFamilyMap.test_insert_get_range%s' % i)
            instances.append(instance)

        for instance in instances:
            self.map.insert(instance)

        rows = list(
            self.map.get_range(start=instances[0].key,
                               finish=instances[-1].key))
        assert len(rows) == len(instances)
        assert rows == instances
        assert list(
            self.empty_map.get_range(
                start=instances[0].key,
                finish=instances[0].key))[0].raw_columns['intstrcol'] == str(
                    instances[0].intstrcol)

    def test_remove(self):
        instance = self.instance('TestColumnFamilyMap.test_remove')

        self.map.insert(instance)
        self.map.remove(instance)
        assert_raises(NotFoundException, self.map.get, instance.key)

    def test_does_not_insert_extra_column(self):
        instance = self.instance(
            'TestColumnFamilyMap.test_does_not_insert_extra_column')
        instance.othercol = 'Test'

        self.map.insert(instance)

        get_instance = self.map.get(instance.key)
        assert get_instance.strcol == instance.strcol
        assert get_instance.intcol == instance.intcol
        assert get_instance.floatcol == instance.floatcol
        assert get_instance.datetimecol == instance.datetimecol
        assert_raises(AttributeError, getattr, get_instance, 'othercol')

    def test_has_defaults(self):
        key = 'TestColumnFamilyMap.test_has_defaults'
        self.cf.insert(key, {'strcol': '1'})
        instance = self.map.get(key)

        assert instance.intcol == TestUTF8.intcol.default
        assert instance.floatcol == TestUTF8.floatcol.default
        assert instance.datetimecol == TestUTF8.datetimecol.default
        assert instance.intstrcol == TestUTF8.intstrcol.default
        assert instance.floatstrcol == TestUTF8.floatstrcol.default
        assert instance.datetimestrcol == TestUTF8.datetimestrcol.default
コード例 #26
0
class TestColumnFamilyMap:
    def setUp(self):
        credentials = {'username': '******', 'password': '******'}
        self.pool = ConnectionPool(keyspace='Keyspace1', credentials=credentials)
        self.cf = ColumnFamily(self.pool, 'Standard2',
                               autopack_names=False,
                               autopack_values=False)
        self.indexed_cf = ColumnFamily(self.pool, 'Indexed1',
                                       autopack_names=False,
                                       autopack_values=False)
        self.map = ColumnFamilyMap(TestUTF8, self.cf)
        self.indexed_map = ColumnFamilyMap(TestIndex, self.indexed_cf)
        self.empty_map = ColumnFamilyMap(TestEmpty, self.cf, raw_columns=True)

    def tearDown(self):
        for key, columns in self.cf.get_range():
            self.cf.remove(key)
        for key, columns in self.indexed_cf.get_range():
            self.cf.remove(key)

    def instance(self, key):
        instance = TestUTF8()
        instance.key = key
        instance.strcol = '1'
        instance.intcol = 2
        instance.floatcol = 3.5
        instance.datetimecol = datetime.now().replace(microsecond=0)
        instance.intstrcol = 8
        instance.floatstrcol = 4.6
        instance.datetimestrcol = datetime.now().replace(microsecond=0)

        return instance

    def test_empty(self):
        key = 'TestColumnFamilyMap.test_empty'
        assert_raises(NotFoundException, self.map.get, key)
        assert_equal(len(self.map.multiget([key])), 0)

    def test_insert_get(self):
        instance = self.instance('TestColumnFamilyMap.test_insert_get')
        assert_raises(NotFoundException, self.map.get, instance.key)
        self.map.insert(instance)
        assert_equal(self.map.get(instance.key), instance)
        assert_equal(self.empty_map.get(instance.key).raw_columns['intstrcol'], str(instance.intstrcol))

    def test_insert_get_indexed_slices(self):
        instance = TestIndex()
        instance.key = 'key'
        instance.birthdate = 1L
        self.indexed_map.insert(instance)
        instance.key = 'key2'
        self.indexed_map.insert(instance)
        instance.key = 'key3'
        self.indexed_map.insert(instance)

        expr = index.create_index_expression(column_name='birthdate', value=1L)
        clause = index.create_index_clause([expr])
        result = self.indexed_map.get_indexed_slices(instance, index_clause=clause)
        assert_equal(len(result), 3)
        assert_equal(result.get('key3'), instance)

    def test_insert_multiget(self):
        instance1 = self.instance('TestColumnFamilyMap.test_insert_multiget1')
        instance2 = self.instance('TestColumnFamilyMap.test_insert_multiget2')
        missing_key = 'TestColumnFamilyMap.test_insert_multiget3'

        self.map.insert(instance1)
        self.map.insert(instance2)
        rows = self.map.multiget([instance1.key, instance2.key, missing_key])
        assert_equal(len(rows), 2)
        assert_equal(rows[instance1.key], instance1)
        assert_equal(rows[instance2.key], instance2)
        assert_true(missing_key not in rows)
        assert_equal(self.empty_map.multiget([instance1.key])[instance1.key].raw_columns['intstrcol'], str(instance1.intstrcol))

    def test_insert_get_count(self):
        instance = self.instance('TestColumnFamilyMap.test_insert_get_count')
        self.map.insert(instance)
        assert_equal(self.map.get_count(instance.key), 7)

    def test_insert_get_range(self):
        instances = []
        for i in xrange(5):
            instance = self.instance('TestColumnFamilyMap.test_insert_get_range%s' % i)
            instances.append(instance)

        for instance in instances:
            self.map.insert(instance)

        rows = list(self.map.get_range(start=instances[0].key, finish=instances[-1].key))
        assert_equal(len(rows), len(instances))
        assert_equal(rows, instances)
        assert_equal(list(self.empty_map.get_range(start=instances[0].key, finish=instances[0].key))[0].raw_columns['intstrcol'], str(instances[0].intstrcol))

    def test_remove(self):
        instance = self.instance('TestColumnFamilyMap.test_remove')

        self.map.insert(instance)
        self.map.remove(instance)
        assert_raises(NotFoundException, self.map.get, instance.key)

    def test_does_not_insert_extra_column(self):
        instance = self.instance('TestColumnFamilyMap.test_does_not_insert_extra_column')
        instance.othercol = 'Test'

        self.map.insert(instance)

        get_instance = self.map.get(instance.key)
        assert_equal(get_instance.strcol, instance.strcol)
        assert_equal(get_instance.intcol, instance.intcol)
        assert_equal(get_instance.floatcol, instance.floatcol)
        assert_equal(get_instance.datetimecol, instance.datetimecol)
        assert_raises(AttributeError, getattr, get_instance, 'othercol')

    def test_has_defaults(self):
        key = 'TestColumnFamilyMap.test_has_defaults'
        self.cf.insert(key, {'strcol': '1'})
        instance = self.map.get(key)

        assert_equal(instance.intcol, TestUTF8.intcol.default)
        assert_equal(instance.floatcol, TestUTF8.floatcol.default)
        assert_equal(instance.datetimecol, TestUTF8.datetimecol.default)
        assert_equal(instance.intstrcol, TestUTF8.intstrcol.default)
        assert_equal(instance.floatstrcol, TestUTF8.floatstrcol.default)
        assert_equal(instance.datetimestrcol, TestUTF8.datetimestrcol.default)
コード例 #27
0
ファイル: test_batch_mutation.py プロジェクト: trhowe/pycassa
class TestMutator(unittest.TestCase):
    def setUp(self):
        credentials = {"username": "******", "password": "******"}
        self.pool = ConnectionPool(keyspace="Keyspace1", credentials=credentials)
        self.cf = ColumnFamily(self.pool, "Standard2")
        self.scf = ColumnFamily(self.pool, "Super1")

    def tearDown(self):
        for key, cols in self.cf.get_range():
            self.cf.remove(key)
        for key, cols in self.scf.get_range():
            self.scf.remove(key)

    def test_insert(self):
        batch = self.cf.batch()
        for key, cols in ROWS.iteritems():
            batch.insert(key, cols)
        batch.send()
        for key, cols in ROWS.items():
            assert self.cf.get(key) == cols

    def test_insert_supercolumns(self):
        batch = self.scf.batch()
        batch.insert("one", ROWS)
        batch.insert("two", ROWS)
        batch.insert("three", ROWS)
        batch.send()
        assert self.scf.get("one") == ROWS
        assert self.scf.get("two") == ROWS
        assert self.scf.get("three") == ROWS

    def test_queue_size(self):
        batch = self.cf.batch(queue_size=2)
        batch.insert("1", ROWS["1"])
        batch.insert("2", ROWS["2"])
        batch.insert("3", ROWS["3"])
        assert self.cf.get("1") == ROWS["1"]
        assert_raises(NotFoundException, self.cf.get, "3")
        batch.send()
        for key, cols in ROWS.items():
            assert self.cf.get(key) == cols

    def test_remove_key(self):
        batch = self.cf.batch()
        batch.insert("1", ROWS["1"])
        batch.remove("1")
        batch.send()
        assert_raises(NotFoundException, self.cf.get, "1")

    def test_remove_columns(self):
        batch = self.cf.batch()
        batch.insert("1", {"a": "123", "b": "123"})
        batch.remove("1", ["a"])
        batch.send()
        assert self.cf.get("1") == {"b": "123"}

    def test_remove_supercolumns(self):
        batch = self.scf.batch()
        batch.insert("one", ROWS)
        batch.insert("two", ROWS)
        batch.insert("three", ROWS)
        batch.remove("two", ["b"], "2")
        batch.send()
        assert self.scf.get("one") == ROWS
        assert self.scf.get("two")["2"] == {"a": "234"}
        assert self.scf.get("three") == ROWS

    def test_chained(self):
        batch = self.cf.batch()
        batch.insert("1", ROWS["1"]).insert("2", ROWS["2"]).insert("3", ROWS["3"]).send()
        assert self.cf.get("1") == ROWS["1"]
        assert self.cf.get("2") == ROWS["2"]
        assert self.cf.get("3") == ROWS["3"]

    def test_contextmgr(self):
        if sys.version_info < (2, 5):
            raise SkipTest("No context managers in Python < 2.5")
        exec """with self.cf.batch(queue_size=2) as b:
    b.insert('1', ROWS['1'])
    b.insert('2', ROWS['2'])
    b.insert('3', ROWS['3'])
assert self.cf.get('3') == ROWS['3']"""

    def test_multi_column_family(self):
        batch = batch_mod.Mutator(self.pool)
        cf2 = self.cf
        batch.insert(self.cf, "1", ROWS["1"])
        batch.insert(self.cf, "2", ROWS["2"])
        batch.remove(cf2, "1", ROWS["1"])
        batch.send()
        assert self.cf.get("2") == ROWS["2"]
        assert_raises(NotFoundException, self.cf.get, "1")