def test_big_batched_writes(): ## this is an m1.xlarge doing nothing but supporting this test server = 'localhost:9160' keyspace = 'testkeyspace_' + getpass.getuser().replace('-', '_') family = 'testcf' sm = SystemManager(server) try: sm.drop_keyspace(keyspace) except pycassa.InvalidRequestException: pass sm.create_keyspace(keyspace, SIMPLE_STRATEGY, {'replication_factor': '1'}) sm.create_column_family(keyspace, family, super=False, key_validation_class = LEXICAL_UUID_TYPE, default_validation_class = LEXICAL_UUID_TYPE, column_name_class = ASCII_TYPE) sm.alter_column(keyspace, family, 'test', ASCII_TYPE) sm.close() pool = ConnectionPool(keyspace, [server], max_retries=10, pool_timeout=0, pool_size=10, timeout=120) pool.fill() pool.add_listener( Listener() ) ## assert that we are using framed transport conn = pool._q.get() assert isinstance(conn.transport, thrift.transport.TTransport.TFramedTransport) pool._q.put(conn) try: for num_rows in range(14, 20): ## write some data to cassandra using increasing data sizes one_mb = ' ' * 2**20 rows = [] for i in xrange(num_rows): key = uuid.uuid4() rows.append((key, dict(test=one_mb))) testcf = pycassa.ColumnFamily(pool, family) with testcf.batch() as batch: for (key, data_dict) in rows: data_size = len(data_dict.values()[0]) logger.critical('adding %r with %.6f MB' % (key, float(data_size)/2**20)) batch.insert(key, data_dict) logger.critical('%d rows written' % num_rows) finally: sm = SystemManager(server) try: sm.drop_keyspace(keyspace) except pycassa.InvalidRequestException: pass sm.close() logger.critical('clearing test keyspace: %r' % keyspace)
def test_default_validated_columns(self): sys = SystemManager() sys.create_column_family(TEST_KS, 'DefaultValidator', default_validation_class=LongType()) sys.alter_column(TEST_KS, 'DefaultValidator', 'subcol', TimeUUIDType()) sys.close() cf = ColumnFamily(pool, 'DefaultValidator') key = 'key1' col_cf = {'aaaaaa': 1L} col_cm = {'subcol': TIME1} col_ncf = {'aaaaaa': TIME1} # Both of these inserts work, as cf allows # longs and cm for 'subcol' allows TIMEUUIDs. cf.insert(key, col_cf) cf.insert(key, col_cm) assert_equal(cf.get(key), {'aaaaaa': 1L, 'subcol': TIME1})
def _create_column_family(self, family, bytes_columns=[], key_validation_class=TIME_UUID_TYPE): ''' Creates a column family of the name 'family' and sets any of the names in the bytes_column list to have the BYTES_TYPE. key_validation_class defaults to TIME_UUID_TYPE and could also be ASCII_TYPE for md5 hash keys, like we use for 'inbound' ''' sm = SystemManager(random.choice(self.server_list)) # sys.create_column_family(self.namespace, family, super=False) sm.create_column_family(self.namespace, family, super=False, key_validation_class = key_validation_class, default_validation_class = TIME_UUID_TYPE, column_name_class = ASCII_TYPE) for column in bytes_columns: sm.alter_column(self.namespace, family, column, BYTES_TYPE) sm.close()
def _create_column_family(self, family, bytes_columns=[], key_validation_class=TIME_UUID_TYPE): ''' Creates a column family of the name 'family' and sets any of the names in the bytes_column list to have the BYTES_TYPE. key_validation_class defaults to TIME_UUID_TYPE and could also be ASCII_TYPE for md5 hash keys, like we use for 'inbound' ''' sm = SystemManager(random.choice(self.server_list)) # sys.create_column_family(self.namespace, family, super=False) sm.create_column_family(self.namespace, family, super=False, key_validation_class=key_validation_class, default_validation_class=TIME_UUID_TYPE, column_name_class=ASCII_TYPE) for column in bytes_columns: sm.alter_column(self.namespace, family, column, BYTES_TYPE) sm.close()
def test_validation_with_packed_names(self): """ Make sure that validated columns are packed correctly when the column names themselves must be packed """ sys = SystemManager() sys.create_column_family(TEST_KS, 'Validators2', comparator_type=LongType(), default_validation_class=LongType()) sys.alter_column(TEST_KS, 'Validators2', 1, TimeUUIDType()) sys.close() my_uuid = uuid.uuid1() cf = ColumnFamily(pool, 'Validators2') cf.insert('key', {0: 0}) assert_equal(cf.get('key'), {0: 0}) cf.insert('key', {1: my_uuid}) assert_equal(cf.get('key'), {0: 0, 1: my_uuid}) cf.insert('key', {0: 0, 1: my_uuid}) assert_equal(cf.get('key'), {0: 0, 1: my_uuid})
def handle_noargs(self, **options): sys = SystemManager(server=settings.CASSANDRA_SERVERS[0]) REPLICATION_STRATEGY = getattr(pycassa.system_manager, settings.CASSANDRA_REPLICATION_STRATEGY) existing_cfs = sys.get_keyspace_column_families(settings.CASSANDRA_KEYSPACE).keys() if 'APIConsumers' not in existing_cfs: print 'Creating missing column family: APIConsumers' sys.create_column_family(settings.CASSANDRA_KEYSPACE, 'APIConsumers', comparator_type=UTF8_TYPE) sys.alter_column(settings.CASSANDRA_KEYSPACE, 'APIConsumers', 'consumer_key', UTF8_TYPE) sys.alter_column(settings.CASSANDRA_KEYSPACE, 'APIConsumers', 'consumer_secret', UTF8_TYPE) sys.alter_column(settings.CASSANDRA_KEYSPACE, 'APIConsumers', 'username', UTF8_TYPE) print 'All done!'
def test_validated_columns(self): sys = SystemManager() sys.create_column_family(TEST_KS, 'Validators',) sys.alter_column(TEST_KS, 'Validators', 'long', LongType()) sys.alter_column(TEST_KS, 'Validators', 'int', IntegerType()) sys.alter_column(TEST_KS, 'Validators', 'time', TimeUUIDType()) sys.alter_column(TEST_KS, 'Validators', 'lex', LexicalUUIDType()) sys.alter_column(TEST_KS, 'Validators', 'ascii', AsciiType()) sys.alter_column(TEST_KS, 'Validators', 'utf8', UTF8Type()) sys.alter_column(TEST_KS, 'Validators', 'bytes', BytesType()) sys.close() cf = ColumnFamily(pool, 'Validators') key = 'key1' col = {'long': 1L} cf.insert(key, col) assert_equal(cf.get(key)['long'], 1L) col = {'int': 1} cf.insert(key, col) assert_equal(cf.get(key)['int'], 1) col = {'time': TIME1} cf.insert(key, col) assert_equal(cf.get(key)['time'], TIME1) col = {'lex': uuid.UUID(bytes='aaa aaa aaa aaaa')} cf.insert(key, col) assert_equal(cf.get(key)['lex'], uuid.UUID(bytes='aaa aaa aaa aaaa')) col = {'ascii': 'aaa'} cf.insert(key, col) assert_equal(cf.get(key)['ascii'], 'aaa') col = {'utf8': u'a\u0020'} cf.insert(key, col) assert_equal(cf.get(key)['utf8'], u'a\u0020') col = {'bytes': 'aaa'} cf.insert(key, col) assert_equal(cf.get(key)['bytes'], 'aaa') cf.remove(key)
""" Create the Cassandra database. """ import pycassa from pycassa.system_manager import SystemManager mgr = SystemManager() mgr.create_keyspace('drought', strategy_options={'replication_factor': '1'}) mgr.create_column_family('drought', 'cmip5') mgr.create_column_family('drought', 'zipcodes') mgr.alter_column('drought', 'zipcodes', 'ZIPCODE', pycassa.types.IntegerType()) mgr.create_index('drought', 'zipcodes', 'ZIPCODE', pycassa.types.IntegerType()) mgr.alter_column('drought', 'zipcodes', 'CENTER_LATITUDE', pycassa.types.FloatType()) mgr.alter_column('drought', 'zipcodes', 'CENTER_LONGITUDE', pycassa.types.FloatType()) mgr.create_column_family('drought', 'counties') mgr.close()