def test_static_composite_slicing(self): cf = ColumnFamily(pool, "StaticComposite") u1 = uuid.uuid1() u4 = uuid.uuid4() col0 = (0, 1, u1, u4, "", "", "") col1 = (1, 1, u1, u4, "", "", "") col2 = (1, 2, u1, u4, "", "", "") col3 = (1, 3, u1, u4, "", "", "") col4 = (2, 1, u1, u4, "", "", "") cf.insert("key2", {col0: "", col1: "", col2: "", col3: "", col4: ""}) result = cf.get("key2", column_start=((1, True),), column_finish=((1, True),)) assert_equal(result, {col1: "", col2: "", col3: ""}) result = cf.get("key2", column_start=(1,), column_finish=((2, False),)) assert_equal(result, {col1: "", col2: "", col3: ""}) result = cf.get("key2", column_start=((1, True),), column_finish=((2, False),)) assert_equal(result, {col1: "", col2: "", col3: ""}) result = cf.get("key2", column_start=(1,), column_finish=((2, False),)) assert_equal(result, {col1: "", col2: "", col3: ""}) result = cf.get("key2", column_start=((0, False),), column_finish=((2, False),)) assert_equal(result, {col1: "", col2: "", col3: ""}) result = cf.get("key2", column_start=(1, 1), column_finish=(1, 3)) assert_equal(result, {col1: "", col2: "", col3: ""}) result = cf.get("key2", column_start=(1, 1), column_finish=(1, (3, True))) assert_equal(result, {col1: "", col2: "", col3: ""}) result = cf.get("key2", column_start=(1, (1, True)), column_finish=((2, False),)) assert_equal(result, {col1: "", col2: "", col3: ""})
def mass_insert(pool): cf_logs = ColumnFamily(pool, CF_LOGS) rnd_inst = random.Random() rnd_inst.seed(1) start = time.time() count = 0 try: for item in log_generator(1): msg = item[0] app = item[1] # http://pycassa.github.com/pycassa/assorted/time_uuid.html # http://www.slideshare.net/jeremiahdjordan/pycon-2012-apache-cassandra # http://www.slideshare.net/rbranson/how-do-i-cassandra @ slide 80 # https://github.com/pycassa/pycassa/issues/135 cf_logs.insert(app, { uuid.uuid1(): msg, }) count += 1 if count % 100 == 0: logging.info("Inserted %d columns", count) except KeyboardInterrupt: logging.info("Stopping...") end = time.time() avg = float(count) / (end - start) logging.info("Avg: %f insert/sec", avg)
def store_file(self, filename, content, mtime=-1, compression_state=None, compressed_size=None): cf = ColumnFamily(self.pool, 'files') compression_state = compression_state or 'unknown' assert compression_state in ('none', 'unknown', 'gzip') sha1 = self.store_blob(filename, content) cols = { 'version': 1, 'sha1': sha1, 'size': len(content), 'mtime': mtime, 'compression_state': compression_state, } if compressed_size is not None: cols['compressed_size'] = compressed_size cf.insert(filename, cols) indices = ColumnFamily(self.pool, 'simple_indices') indices.insert('files', {filename: ''})
def test_static_composite_slicing(self): cf = ColumnFamily(pool, 'StaticComposite') u1 = uuid.uuid1() u4 = uuid.uuid4() col0 = (0, 1, u1, u4, '', '', '') col1 = (1, 1, u1, u4, '', '', '') col2 = (1, 2, u1, u4, '', '', '') col3 = (1, 3, u1, u4, '', '', '') col4 = (2, 1, u1, u4, '', '', '') cf.insert('key2', {col0: '', col1: '', col2: '', col3: '', col4: ''}) result = cf.get('key2', column_start=((1, True),), column_finish=((1, True),)) assert_equal(result, {col1: '', col2: '', col3: ''}) result = cf.get('key2', column_start=(1,), column_finish=((2, False), )) assert_equal(result, {col1: '', col2: '', col3: ''}) result = cf.get('key2', column_start=((1, True),), column_finish=((2, False), )) assert_equal(result, {col1: '', col2: '', col3: ''}) result = cf.get('key2', column_start=(1, ), column_finish=((2, False), )) assert_equal(result, {col1: '', col2: '', col3: ''}) result = cf.get('key2', column_start=((0, False), ), column_finish=((2, False), )) assert_equal(result, {col1: '', col2: '', col3: ''}) result = cf.get('key2', column_start=(1, 1), column_finish=(1, 3)) assert_equal(result, {col1: '', col2: ''}) result = cf.get('key2', column_start=(1, 1), column_finish=(1, (3, True))) assert_equal(result, {col1: '', col2: '', col3: ''}) result = cf.get('key2', column_start=(1, (1, True)), column_finish=((2, False), )) assert_equal(result, {col1: '', col2: '', col3: ''})
def loadData(): con = util.getConnection() cf = ColumnFamily(con, 'videos') tagCF = ColumnFamily(con, 'tag_videos_composite') movies = util.readCSV('data/movies') for movie in movies: title = movie[0] uploader = movie[1] runtime = int(movie[2]) #convert to match column validator tags = movie[3] rowKey = title+":"+uploader print "Inserting in videos: {}.".format(str(movie)) row = \ { 'title':title, 'user_name':uploader, 'runtime_in_sec':runtime, 'tags_csv': tags } cf.insert(rowKey, row) print 'inserting tags: {}'.format(tags) for tag in tags.split(','): tagCF.insert( tag.strip().lower(), #row-key = tag name { (uploader, rowKey): title #(uploader,videoId)=title } ); print 'finishished insertion.' con.dispose()
def loadData(): con = util.getConnection() cf = ColumnFamily(con, 'videos_denorm') tagCF = ColumnFamily(con, 'tag_videos_composite') movies = util.readCSV('data/movies') for movie in movies: title = movie[0] uploader = movie[1] runtime = int(movie[2]) #convert to match column validator tags = movie[3] rowKey = title+":"+uploader print "Inserting in videos: {}.".format(str(movie)) row = \ { 'title':title, 'user_name':uploader, 'runtime_in_sec':runtime, } for tag in tags.split(','): print 'adding tag: {0} for movie: {1}'.format(tag, title) row['tag:{}'.format(tag.strip().lower())] = tag.strip() print 'inserting denorm: {}'.format(row) cf.insert(rowKey, row) print 'finishished insertion.' con.dispose()
def _update_analytics_start_time(self, start_times): try: col_family = ColumnFamily(self._pool, SYSTEM_OBJECT_TABLE) col_family.insert(SYSTEM_OBJECT_ANALYTICS, start_times) except Exception as e: self._logger.error("Exception: update_analytics_start_time " "Connection Failure %s" % e)
class TestCustomTypes(unittest.TestCase): class IntString(types.CassandraType): @staticmethod def pack(intval): return str(intval) @staticmethod def unpack(strval): return int(strval) class IntString2(types.CassandraType): def __init__(self, *args, **kwargs): self.pack = lambda val: str(val) self.unpack = lambda val: int(val) def test_staticmethod_funcs(self): self.cf = ColumnFamily(pool, 'Standard1') self.cf.key_validation_class = TestCustomTypes.IntString() self.cf.insert(1234, {'col': 'val'}) assert_equal(self.cf.get(1234), {'col': 'val'}) def test_constructor_lambdas(self): self.cf = ColumnFamily(pool, 'Standard1') self.cf.key_validation_class = TestCustomTypes.IntString2() self.cf.insert(1234, {'col': 'val'}) assert_equal(self.cf.get(1234), {'col': 'val'})
def _update_analytics_start_time(self, start_time): if mockcassandra.use_cql(): cluster = Cluster(['127.0.0.1'], port=int(self.__class__.cassandra_port)) session = cluster.connect(COLLECTOR_KEYSPACE_CQL) query = "INSERT INTO {0} (key, \"{1}\") VALUES ('{2}', {3})".format( SYSTEM_OBJECT_TABLE, SYSTEM_OBJECT_START_TIME, SYSTEM_OBJECT_ANALYTICS, start_time) try: session.execute(query) except Exception as e: logging.error("INSERT INTO %s: Key %s Column %s Value %d " "FAILED: %s" % (SYSTEM_OBJECT_TABLE, SYSTEM_OBJECT_ANALYTICS, SYSTEM_OBJECT_START_TIME, start_time, str(e))) assert False else: cluster.shutdown() else: pool = ConnectionPool( COLLECTOR_KEYSPACE, ['127.0.0.1:%s' % (self.__class__.cassandra_port)]) col_family = ColumnFamily(pool, SYSTEM_OBJECT_TABLE) col_family.insert(SYSTEM_OBJECT_ANALYTICS, {SYSTEM_OBJECT_START_TIME: start_time})
def insert(self, data, return_id=False): """ Creates a new entity to represent a model. :param data: Model object represented by a list of (field, value) pairs. Each value is prepared for the insert operation. :param return_id: Value whether to return the id or key of newly created entity. """ pool = self.connection column_family_name = get_column_family() col_fam = CF(pool, column_family_name) col_fam_data = {} for field, value in data.iteritems(): col_fam_data[field] = value key = data.get(pk_column) if not key: key = str(uuid4()) try: col_fam.insert(key=key, columns=col_fam_data, write_consistency_level=self.connection. write_consistency_level) except Exception, e: print str(e)
def loadData(): con = util.getConnection() cf = ColumnFamily(con, 'videos') tagCF = ColumnFamily(con, 'tag_videos_sup') movies = util.readCSV('data/movies') for movie in movies: title = movie[0] uploader = movie[1] runtime = int(movie[2]) #convert to match column validator tags = movie[3] rowKey = title+":"+uploader print "Inserting in videos: {}.".format(str(movie)) cf.insert( rowKey, { 'title':title, 'user_name':uploader, 'runtime_in_sec':runtime, 'tags_csv': tags }) for tag in tags.split(','): print 'adding tag: {0} for movie: {1}'.format(tag, title) tagCF.insert( tag.strip().lower(), # row-key = tag name { uploader: { # level 1 nesting = uploader name rowKey: title # level 2 nesting = videoId, value = title } } ); print 'finishished insertion.' con.dispose()
class ClientCassandra(): def __init__(self,keySpace): self.pool = ConnectionPool(keySpace, ['localhost:9160']) self.col_fam_page = ColumnFamily(self.pool, 'Page') self.col_fam_publication = ColumnFamily(self.pool, 'Publication') self.col_fam_company = ColumnFamily(self.pool, 'Company') self.col_fam_location = ColumnFamily(self.pool, 'Location') self.col_fam_category = ColumnFamily(self.pool, 'Category') #pycassaShell #SYSTEM_MANAGER.create_keyspace('BlwData', strategy_options={"replication_factor": "1"}); #SYSTEM_MANAGER.create_column_family('BlwData', 'Page'); #SYSTEM_MANAGER.create_column_family('BlwData', 'Publication'); #SYSTEM_MANAGER.create_column_family('BlwData', 'Company'); #SYSTEM_MANAGER.create_column_family('BlwData', 'Location'); #SYSTEM_MANAGER.create_column_family('BlwData', 'Category'); def insertPage(self,page): timestamp= self.col_fam_page.insert(page.getUrl(), page.toJson()) print "sizeof category " + page.category.name + " is " + str(self.col_fam_category.get_count(page.category.name)) self.col_fam_category.insert(page.category.name,{'url': page.getUrl()}) print "sizeof category " + page.category.name + " is " + str(self.col_fam_category.get_count(page.category.name)) return timestamp # should raise an exception for an immutable sequence #self.assertRaises(TypeError, random.shuffle, (1,2,3)) def getPages(self,url,column): readData = self.col_fam_page .get(url,columns=[column]) return readData def getCountCategory(self,category): return self.col_fam_category.get_count(category)
def get_row_key_id(domain): counter_column, counter_lock = domain_counter_map[domain] ## acquire lock before getting value of counter_lock.acquire() try: client = db_connection.get_client() cf = ColumnFamily(client, CONFIG_DOMAIN) ## get new key id id_key = cf.get(CONFIG_ROW, counter_column)[counter_column] ## increment value if not None if id_key: new_id_key = id_key + 1 cf.insert(CONFIG_ROW, {counter_column: new_id_key}, write_consistency_level=ConsistencyLevel.ALL) return id_key """ if id_key: str_id_key = str(id_key) str_id_key.zfill(MAX_PADDING_RANGE) return str_id_key else: return None """ finally: ## release lock before returning from this function counter_lock.release()
def insert(self, data, return_id = False): """ Creates a new entity to represent a model. :param data: Model object represented by a list of (field, value) pairs. Each value is prepared for the insert operation. :param return_id: Value whether to return the id or key of newly created entity. """ pool = self.connection column_family_name = get_column_family() col_fam = CF(pool, column_family_name) col_fam_data = {} for field, value in data.iteritems(): col_fam_data[field] = value key = data.get(pk_column) if not key: key = str(uuid4()) try: col_fam.insert(key=key, columns=col_fam_data, write_consistency_level=self.connection.write_consistency_level) except Exception, e: print str(e)
def _update_analytics_start_time(self, start_time): pool = ConnectionPool( COLLECTOR_KEYSPACE, ['127.0.0.1:%s' % (self.__class__.cassandra_port)]) col_family = ColumnFamily(pool, SYSTEM_OBJECT_TABLE) col_family.insert(SYSTEM_OBJECT_ANALYTICS, {SYSTEM_OBJECT_START_TIME: start_time})
def test_single_component_composite(self): sys = SystemManager() sys.create_column_family(TEST_KS, 'SingleComposite', comparator_type=CompositeType(IntegerType())) cf = ColumnFamily(pool, 'SingleComposite') cf.insert('key', {(123456,): 'val'}) assert_equal(cf.get('key'), {(123456,): 'val'})
def main(filename): data = file(filename) # Set up the connection pool pool = ConnectionPool('tuitterdb',['localhost:9160']) # CF connections user_family = ColumnFamily(pool, 'user') tweet_family = ColumnFamily(pool, 'tweet') user_tweets_family = ColumnFamily(pool, 'userTweets') followers = ColumnFamily(pool, 'followers') followerTweets = ColumnFamily(pool, 'followsTweets') # Batch Definitions user_batch = user_family.batch(queue_size=1000) followers_batch = followers.batch(queue_size=500) user_tweets_batch = user_tweets_family.batch(queue_size = 500) followerTweets_batch = followerTweets.batch(queue_size = 500) while True: # loop line = data.readline() if line == "": break # This isn't handled properly else: tweet = tweet_get(line) try: tweet_data = get_tweet_data(tweet) if check_user(tweet[u"from_user_id_str"]) == False: # check in script if user is there. sender = get_sender(tweet) user_batch.insert(sender.user_id,{'user_name':sender.user_name, 'screen_name':sender.from_user}) # create user entry for sender user_tweets_batch.insert(sender.user_id,{line:''}) # insert the whole tweet into a userTweets column header if tweet[u"to_user"] is not None and check_user(tweet[u"to_user_id"]) == False: to_user = get_to_user(tweet) user_batch.insert(to_user.user_id,{'user_name':to_user.user_name, 'screen_name':to_user.from_user}) followers_batch.insert(to_user.user_id,{sender.user_id:'follower_id'}) followerTweets_batch.insert(to_user.user_id,{line:''}) # insert the whole tweet into a followeTweets column header for the to user. if u"entities" in tweet: # iterate over the users in mentions and add them to users and follows if necessary if tweet[u"entities"][u"user_mentions"] != []: user_mentions = get_mentions(tweet) for obj in user_mentions: if check_user(obj.user_id) == False: user_batch.insert(obj.user_id,{'user_name':obj.user_name, 'screen_name':obj.from_user}) followers_batch.insert(obj.user_id,{'user_id':sender.user_id}) followerTweets_batch.insert(obj.user_id,{line:''}) # insert the whole tweet to a followerTweet entry for the mentioned user else: continue tweet_family.insert(tweet_data.tweet_id,{'text':tweet_data.textbody,'user_id':sender.user_id,'timeanddate':tweet_data.timestamp}) except Exception: err = sys.exc_info() print "Broken cos %s %s %s" % (err[0],err[1], traceback.print_tb(err[2])) #print the exception data with traceback and continue. continue # Pools Closed. pool.dispose()
def test_retrieve_with_custom_composite(self): cf_std = ColumnFamily(pool, "CustomComposite1") cf_cust = ColumnFamily(pool, "CustomComposite1") cf_cust.column_name_class = CompositeType(TestCustomComposite.IntDateType(), TestCustomComposite.IntString()) std_col = (20120312, "321") cust_col = (date(2012, 3, 12), 321) cf_std.insert("cust_insert_key_2", {std_col: "cust_insert_val_2"}) assert_equal(cf_cust.get("cust_insert_key_2"), {cust_col: "cust_insert_val_2"})
def main(filename): data = file(filename) # Set up the connection pool pool = ConnectionPool('tuitterdb',['localhost:9160']) # CF connections user_family = ColumnFamily(pool, 'user') tweet_family = ColumnFamily(pool, 'tweet') user_tweets_family = ColumnFamily(pool, 'userTweets') #follows_tweets_family = ColumnFamily(pool, 'follows.tweets') followers = ColumnFamily(pool, 'followers') # Batch Definitions user_batch = user_family.batch(queue_size=1000) followers_batch = followers.batch(queue_size=500) user_tweets_batch = user_tweets_family.batch(queue_size = 500) while True: line = data.readline() if line is None: break else: tweet = tweet_get(line) try: tweet_data = get_tweet_data(tweet) if check_user(tweet[u"from_user_id_str"]) == False: sender = get_sender(tweet) user_batch.insert(sender.user_id,{'user_name':sender.user_name, 'screen_name':sender.from_user}) user_tweets_batch.insert(sender.user_id,{tweet_data.tweet_id:tweet_data.timestamp}) if tweet[u"to_user"] is not None and check_user(tweet[u"to_user_id"]) == False: to_user = get_to_user(tweet) user_batch.insert(to_user.user_id,{'user_name':to_user.user_name, 'screen_name':to_user.from_user}) followers_batch.insert(to_user.user_id,{'user_id':sender.user_id}) if u"entities" in tweet: if tweet[u"entities"][u"user_mentions"] != []: user_mentions = get_mentions(tweet) for obj in user_mentions: user_batch.insert(obj.user_id,{'user_name':obj.user_name, 'screen_name':obj.from_user}) followers_batch.insert(obj.user_id,{'user_id':sender.user_id}) else: continue tweet_family.insert(tweet_data.tweet_id,{'text':tweet_data.textbody,'user_id':sender.user_id,'timeanddate':tweet_data.timestamp}) except Exception: err = sys.exc_info() print "Broken cos %s %s %s" % (err[0],err[1], traceback.print_tb(err[2])) continue # Pools Closed. pool.dispose() #if __name__ == "__main__": #unittest.main()
def update_thread_status(self, thread_id): threads = ColumnFamily(self.conn, 'threads') dt = datetime.datetime.today() str_dt = dt.strftime('%Y-%m-%d %H:%M:%S') ret = threads.get(str(thread_id), columns=['post_count']) post_count = int(ret['post_count']) + 1 threads.insert(thread_id, {'post_count': str(post_count), 'update_time': str_dt})
def insert(self, row): pool = ConnectionPool('HandsetLogKS', ['batt1.nuance.com:9160', 'batt2.nuance.com:9160']) col_fam = ColumnFamily(pool, 'HandsetLogEntriesCF') if len(row) >= 1: print row key = self.hashString(row['s-ip']+row['c-query']) col_fam.insert(key, row)
def test_static_composite(cls): sys = SystemManager() have_composites = sys._conn.version != CASSANDRA_07 if not have_composites: raise SkipTest("Cassandra < 0.8 does not composite types") sys.create_column_family(TEST_KS, 'StaticComposite', comparator_type=CompositeType(LongType(), IntegerType(), TimeUUIDType(reversed=True), LexicalUUIDType(reversed=False), AsciiType(), UTF8Type(), BytesType())) cf = ColumnFamily(pool, 'StaticComposite') colname = (127312831239123123, 1, uuid.uuid1(), uuid.uuid4(), 'foo', u'ba\u0254r', 'baz') cf.insert('key', {colname: 'val'}) assert_equal(cf.get('key'), {colname: 'val'}) u1 = uuid.uuid1() u4 = uuid.uuid4() col0 = (0, 1, u1, u4, '', '', '') col1 = (1, 1, u1, u4, '', '', '') col2 = (1, 2, u1, u4, '', '', '') col3 = (1, 3, u1, u4, '', '', '') col4 = (2, 1, u1, u4, '', '', '') cf.insert('key2', {col0: '', col1: '', col2: '', col3: '', col4: ''}) result = cf.get('key2', column_start=((1, True),), column_finish=((1, True),)) assert_equal(result, {col1: '', col2: '', col3: ''}) result = cf.get('key2', column_start=(1,), column_finish=((2, False), )) assert_equal(result, {col1: '', col2: '', col3: ''}) result = cf.get('key2', column_start=((1, True),), column_finish=((2, False), )) assert_equal(result, {col1: '', col2: '', col3: ''}) result = cf.get('key2', column_start=(1, ), column_finish=((2, False), )) assert_equal(result, {col1: '', col2: '', col3: ''}) result = cf.get('key2', column_start=((0, False), ), column_finish=((2, False), )) assert_equal(result, {col1: '', col2: '', col3: ''}) result = cf.get('key2', column_start=(1, 1), column_finish=(1, 3)) assert_equal(result, {col1: '', col2: ''}) result = cf.get('key2', column_start=(1, 1), column_finish=(1, (3, True))) assert_equal(result, {col1: '', col2: '', col3: ''}) result = cf.get('key2', column_start=(1, (1, True)), column_finish=((2, False), )) assert_equal(result, {col1: '', col2: '', col3: ''}) sys.drop_column_family(TEST_KS, 'StaticComposite')
def update_cassandra(self, conn, val, keys, table): l_id = val print l_id # updating the column last_logid with the increasing values of l_id col_fam = ColumnFamily(conn, table) for key in keys: col_fam.insert(str(key), {"logid": l_id}) print l_id l_id += 1 return l_id
def colum_family_insert(self,machine_id,keyspace_name,column_family_name,user_content): """Insert into a column family for a given keyspace """ if (self.keyspace_contains(keyspace_name,column_family_name) == False): print "Error : Keyspace:column family could not be found." return False pool = ConnectionPool(keyspace = keyspace_name, server_list = keyspace.server_ips, prefill=False) col_fam = ColumnFamily(pool, column_family_name) for content in user_content: col_fam.insert(content,user_content[content]) #col_fam.insert('Key2', {'name':'mayur', 'age':'23'}) return True
def test_alter_column_non_bytes_type(self): sys.create_column_family(TEST_KS, 'LongCF', comparator_type=LONG_TYPE) sys.create_index(TEST_KS, 'LongCF', 3, LONG_TYPE) pool = ConnectionPool(TEST_KS) cf = ColumnFamily(pool, 'LongCF') cf.insert('key', {3: 3}) assert_equal(cf.get('key')[3], 3) sys.alter_column(TEST_KS, 'LongCF', 2, LONG_TYPE) cf = ColumnFamily(pool, 'LongCF') cf.insert('key', {2: 2}) assert_equal(cf.get('key')[2], 2)
def test_composite_slicing(self): cf_std = ColumnFamily(pool, 'CustomComposite1') cf_cust = ColumnFamily(pool, 'CustomComposite1') cf_cust.column_name_class = CompositeType( TestCustomComposite.IntDateType(), TestCustomComposite.IntString2()) col0 = (20120101, '123') col1 = (20120102, '123') col2 = (20120102, '456') col3 = (20120102, '789') col4 = (20120103, '123') dt0 = date(2012, 1, 1) dt1 = date(2012, 1, 2) dt2 = date(2012, 1, 3) col0_cust = (dt0, 123) col1_cust = (dt1, 123) col2_cust = (dt1, 456) col3_cust = (dt1, 789) col4_cust = (dt2, 123) cf_std.insert('key2', {col0: '', col1: '', col2: '', col3: '', col4: ''}) def check(column_start, column_finish, col_reversed=False): result = cf_cust.get('key2', column_start=column_start, column_finish=column_finish, column_reversed=col_reversed) assert_equal(result, {col1_cust: '', col2_cust: '', col3_cust: ''}) # Defaults should be inclusive on both ends check((dt1,), (dt1,)) check((dt1,), (dt1,), True) check(((dt1, True),), ((dt1, True),)) check((dt1,), ((dt2, False),)) check(((dt1, True),), ((dt2, False),)) check(((dt0, False),), ((dt2, False),)) check((dt1, 123), (dt1, 789)) check((dt1, 123), (dt1, (789, True))) check((dt1, (123, True)), ((dt2, False),)) # Test inclusive ends for reversed check(((dt1, True),), ((dt1, True),), True) check( (dt1,), ((dt1, True),), True) check(((dt1, True),), (dt1,), True) # Test exclusive ends for reversed check(((dt2, False),), ((dt0, False),), True) check(((dt2, False),), (dt1,), True) check((dt1,), ((dt0, False),), True)
def test_uuid_composites(self): sys = SystemManager() sys.create_column_family(TEST_KS, 'UUIDComposite', comparator_type=CompositeType(IntegerType(reversed=True), TimeUUIDType()), key_validation_class=TimeUUIDType(), default_validation_class=UTF8Type()) key, u1, u2 = uuid.uuid1(), uuid.uuid1(), uuid.uuid1() cf = ColumnFamily(pool, 'UUIDComposite') cf.insert(key, {(123123, u1): 'foo'}) cf.insert(key, {(123123, u1): 'foo', (-1, u2): 'bar', (-123123123, u1): 'baz'}) assert_equal(cf.get(key), {(123123, u1): 'foo', (-1, u2): 'bar', (-123123123, u1): 'baz'})
def test_insert_with_custom_composite(self): cf_std = ColumnFamily(pool, 'CustomComposite1') cf_cust = ColumnFamily(pool, 'CustomComposite1') cf_cust.column_name_class = CompositeType( TestCustomComposite.IntDateType(), TestCustomComposite.IntString()) std_col = (20120311, '321') cust_col = (date(2012, 3, 11), 321) cf_cust.insert('cust_insert_key_1', {cust_col: 'cust_insert_val_1'}) assert_equal(cf_std.get('cust_insert_key_1'), {std_col: 'cust_insert_val_1'})
def test_column_validator(self): cf = ColumnFamily(pool, 'CompositeOverrideCF') cf.column_validators[('a', 'b')] = BooleanType() cf.insert('key', {('a', 'a'): 'foo', ('a', 'b'): True}) assert_equal(cf.get('key'), {('a', 'a'): 'foo', ('a', 'b'): True}) assert_equal(cf.column_validators[('a', 'b')].__class__, BooleanType) keys = cf.column_validators.keys() assert_equal(keys, [('a', 'b')]) del cf.column_validators[('a', 'b')] assert_raises(KeyError, cf.column_validators.__getitem__, ('a', 'b'))
def test_column_validator(self): cf = ColumnFamily(pool, "CompositeOverrideCF") cf.column_validators[("a", "b")] = BooleanType() cf.insert("key", {("a", "a"): "foo", ("a", "b"): True}) assert_equal(cf.get("key"), {("a", "a"): "foo", ("a", "b"): True}) assert_equal(cf.column_validators[("a", "b")].__class__, BooleanType) keys = cf.column_validators.keys() assert_equal(keys, [("a", "b")]) del cf.column_validators[("a", "b")] assert_raises(KeyError, cf.column_validators.__getitem__, ("a", "b"))
def store_blob(self, key, content, chunk_size=DEFAULT_BLOB_CHUNK_SIZE): cf = ColumnFamily(self.pool, 'blobs') chunks = len(content) / chunk_size + 1 sha1 = hashlib.sha1() sha1.update(content) offset = 0 i = 1 while True: b = content[offset:offset + chunk_size] # We prefix each part with "z" so the big chunks come at the end of # the row and our initial read for all the metadata doesn't span # excessive pages on disk. cf.insert(key, {'z:%04d' % i: b}) if len(b) < chunk_size: break offset += chunk_size i += 1 cf.insert(key, { 'version': 1, 'sha1': sha1.digest(), 'size': len(content), 'chunk_size': chunk_size, 'chunk_count': chunks, }) indices = ColumnFamily(self.pool, 'simple_indices') indices.insert('blobs', {key: ''}) indices.insert('blob_size', {key: str(len(content))}) return sha1.digest()
def test_get_indexed_slices(self): sys = SystemManager() for cf, keys in self.type_groups: sys.create_index(TEST_KS, cf.column_family, 'birthdate', LongType()) cf = ColumnFamily(pool, cf.column_family) for key in keys: cf.insert(key, {'birthdate': 1}) expr = create_index_expression('birthdate', 1) clause = create_index_clause([expr]) rows = list(cf.get_indexed_slices(clause)) assert_equal(len(rows), len(keys)) for k, c in rows: assert_true(k in keys) assert_equal(c, {'birthdate': 1})
class TestBigInt(unittest.TestCase): @classmethod def setup_class(cls): sys = SystemManager() sys.create_column_family(TEST_KS, 'StdInteger', comparator_type=IntegerType()) @classmethod def teardown_class(cls): sys = SystemManager() sys.drop_column_family(TEST_KS, 'StdInteger') def setUp(self): self.key = 'TestBigInt' self.cf = ColumnFamily(pool, 'StdInteger') def tearDown(self): self.cf.remove(self.key) def test_negative_integers(self): self.cf.insert(self.key, {-1: '-1'}) self.cf.insert(self.key, {-12342390: '-12342390'}) self.cf.insert(self.key, {-255: '-255'}) self.cf.insert(self.key, {-256: '-256'}) self.cf.insert(self.key, {-257: '-257'}) for key, cols in self.cf.get_range(): self.assertEquals(str(cols.keys()[0]), cols.values()[0])
def test_column_validators(self): validators = {'name': UTF8_TYPE, 'age': LONG_TYPE} sys.create_column_family(TEST_KS, 'ValidatedCF', column_validation_classes=validators) pool = ConnectionPool(TEST_KS) cf = ColumnFamily(pool, 'ValidatedCF') cf.insert('key', {'name': 'John', 'age': 40}) self.assertEquals(cf.get('key'), {'name': 'John', 'age': 40}) validators = {'name': ASCII_TYPE, 'age': INT_TYPE} sys.alter_column_family(TEST_KS, 'ValidatedCF', column_validation_classes=validators) cf.load_schema() self.assertEquals(cf.get('key'), {'name': 'John', 'age': 40})
class SimpleHandler(tornado.web.RequestHandler): """ Handles /api/1/simple-handler """ def initialize( self, cassandra_session, ): """ Initializer of the received request. Args: cassandra_session(pycassa.pool.ConnectionPool) """ self.column_family = ColumnFamily( cassandra_session, COLUMN_FAMILY, ) def post( self, key, ): """ Handle POST /simple-handler requests. Save the received data into Cassandra. Args: key(str) URL key where the data has to be stored data(json) posted data in format: `{ "first_value": "your value", "second_value": "your value", "third_value": "your value", }` """ first_value = json.loads(self.request.body)['first_value'] second_value = json.loads(self.request.body)['second_value'] third_value = json.loads(self.request.body)['third_value'] self.column_family.insert( key, { ('c1_first_value', 'c2_first_value'): str(first_value), ('c1_second_value', 'c2_second_value'): str(second_value), ('c1_third_value', 'c2_third_value'): str(third_value), } )
def colum_family_insert(self, machine_id, keyspace_name, column_family_name, user_content): """Insert into a column family for a given keyspace """ if (self.keyspace_contains(keyspace_name, column_family_name) == False): print "Error : Keyspace:column family could not be found." return False pool = ConnectionPool(keyspace=keyspace_name, server_list=keyspace.server_ips, prefill=False) col_fam = ColumnFamily(pool, column_family_name) for content in user_content: col_fam.insert(content, user_content[content]) #col_fam.insert('Key2', {'name':'mayur', 'age':'23'}) return True
def insert(self, instance, columns=None, timestamp=None, ttl=None, write_consistency_level=None): """ Insert or update stored instances. `instance` should be an instance of `cls` to store. The `columns` parameter allows to you specify which attributes of `instance` should be inserted or updated. If left as ``None``, all attributes will be inserted. """ if columns is None: fields = self.fields else: fields = columns insert_dict = self._get_instance_as_dict(instance, columns=fields) return ColumnFamily.insert( self, instance.key, insert_dict, timestamp=timestamp, ttl=ttl, write_consistency_level=write_consistency_level)
def insert(self, instance, columns=None, timestamp=None, ttl=None, write_consistency_level=None): """ Insert or update stored instances. `instance` should be an instance of `cls` to store. The `columns` parameter allows to you specify which attributes of `instance` should be inserted or updated. If left as ``None``, all attributes will be inserted. """ if columns is None: fields = self.fields else: fields = columns insert_dict = {} for field in fields: val = getattr(instance, field, None) if val is not None and not isinstance(val, CassandraType): insert_dict[field] = val if self.super: insert_dict = {instance.super_column: insert_dict} return ColumnFamily.insert(self, instance.key, insert_dict, timestamp=timestamp, ttl=ttl, write_consistency_level=write_consistency_level)
class TestTypeErrors(unittest.TestCase): def test_packing_enabled(self): self.cf = ColumnFamily(pool, 'Standard1') self.cf.insert('key', {'col': 'val'}) assert_raises(TypeError, self.cf.insert, args=('key', {123: 'val'})) assert_raises(TypeError, self.cf.insert, args=('key', {'col': 123})) assert_raises(TypeError, self.cf.insert, args=('key', {123: 123})) self.cf.remove('key') def test_packing_disabled(self): self.cf = ColumnFamily(pool, 'Standard1', autopack_names=False, autopack_values=False) self.cf.insert('key', {'col': 'val'}) assert_raises(TypeError, self.cf.insert, args=('key', {123: 'val'})) assert_raises(TypeError, self.cf.insert, args=('key', {'col': 123})) assert_raises(TypeError, self.cf.insert, args=('key', {123: 123})) self.cf.remove('key')
def test_default_validated_columns(self): sys = SystemManager() sys.create_column_family(TEST_KS, 'DefaultValidator', default_validation_class=LongType()) sys.alter_column(TEST_KS, 'DefaultValidator', 'subcol', TimeUUIDType()) sys.close() cf = ColumnFamily(pool, 'DefaultValidator') key = 'key1' col_cf = {'aaaaaa': 1L} col_cm = {'subcol': TIME1} col_ncf = {'aaaaaa': TIME1} col_ncm = {'subcol': 1L} # Both of these inserts work, as cf allows # longs and cm for 'subcol' allows TIMEUUIDs. cf.insert(key, col_cf) cf.insert(key, col_cm) assert_equal(cf.get(key), {'aaaaaa': 1L, 'subcol': TIME1})
def mass_insert(pool): cf_logs = ColumnFamily(pool, CF_LOGS) cf_logs_by_app = ColumnFamily(pool, CF_LOGS_BY_APP) cf_logs_by_host = ColumnFamily(pool, CF_LOGS_BY_HOST) cf_logs_by_severity = ColumnFamily(pool, CF_LOGS_BY_SEVERITY) rnd_inst = random.Random() rnd_inst.seed(1) start = time.time() count = 0 try: for item in log_generator(1): msg = item[0] app = item[1] host = item[2] severity = item[3] # http://pycassa.github.com/pycassa/assorted/time_uuid.html # http://www.slideshare.net/jeremiahdjordan/pycon-2012-apache-cassandra # http://www.slideshare.net/rbranson/how-do-i-cassandra @ slide 80 # https://github.com/pycassa/pycassa/issues/135 # Save on <CF> CF_LOGS event_uuid = uuid.uuid1() row_key = ymd_from_uuid1(event_uuid) cf_logs.insert(str(row_key), { event_uuid: msg, }) # Save on <CF> CF_LOGS_BY_APP cf_logs_by_app.insert(app, { event_uuid: msg, }) # Save on <CF> CF_LOGS_BY_HOST cf_logs_by_host.insert(host, { event_uuid: msg, }) # Save on <CF> CF_LOGS_BY_SEVERITY cf_logs_by_severity.insert(severity, { event_uuid: msg, }) count += 4 if count % 400 == 0: avg = float(count) / (time.time() - start) logging.info("Inserted %d columns, %f insert/sec", count, avg) except KeyboardInterrupt: logging.info("Stopping...") end = time.time() avg = float(count) / (end - start) logging.info("%d columns inserted. Avg: %f insert/sec", count, avg)
class Processor(Llama): def __init__(self, client, qname): super(Processor, self).__init__(client, qname) self.pool = ConnectionPool('processing_llama_Processor') self.trends = ColumnFamily(self.pool, 'Trend') def get_sleep_time(): return 60 def do_message(self, message): if not isinstance(message, tuple) or len(message) != 4: return woeid, as_of, trend_name, query = message try: trend = self.trends.get(trend_name, super_column=woeid) trend['lastseen'] = as_of trend['number_seen'] += 1 trend = { woeid: trend } self.trends.insert(trend_name, trend) except ttypes.NotFoundException: self.trends.insert(trend_name, { woeid: { 'firstseen': as_of, 'lastseen': as_of, 'number_seen': 1}}) self.trends.insert(trend_name, {'data': { 'query': query, 'tracking': "False" }}) def has_many_woeids(self, x): key, values = x return len(values) > 2 # one woeid plus general data def do_action(self): try: for trend_name, country_specifics in filter(self.has_many_woeids, self.trends.get_range()): if country_specifics['data']['tracking'] == "False": self.track_trend(trend_name, country_specifics['data']['query'], filter(lambda x: x != "data", country_specifics.keys())) except: exc, value, exctraceback = sys.exc_info() logging.error("Error in processing_llama.processor.Processor.do_action:\n" + traceback.format_exc()) traceback.print_tb(exctraceback) def track_trend(self, trend_name, query, woeids): logging.info("Tracking %s from %s" % (trend_name, woeids)) self.mark_tracking(trend_name) self.publish((trend_name, query, woeids), "trend_to_track") def mark_tracking(self, trend_name): try: trend = self.trends.get(trend_name, super_column='data') trend['tracking'] = "True" self.trends.insert(trend_name, { 'data': trend }) except ttypes.NotFoundException: pass
def do(self): if type == InsertCommand.INS_BASIC: ## I know that data for a basic insert is of this tuple type domain, row_key, basic_type_dict = self.data client = db_connection.get_client() cf = ColumnFamily(client, domain) cf.insert(row_key, basic_type_dict) elif type == InsertCommand.INS_OBJECT: ## call the save operation for the object if self.data: self.data.save() elif type == InsertCommand.INS_BATCH: ## Again, I know data for a batch insert will be of the following tuple type domain, basic_type_item_dict = self.data client = db_connection.get_client() cf = ColumnFamily(client, domain) b = cf.batch() for row_key in basic_type_item_dict.keys(): b.insert(row_key, basic_type_item_dict[row_key]) b.send()
def test_validation_with_packed_names(self): """ Make sure that validated columns are packed correctly when the column names themselves must be packed """ sys = SystemManager() sys.create_column_family(TEST_KS, 'Validators2', comparator_type=LongType(), default_validation_class=LongType()) sys.alter_column(TEST_KS, 'Validators2', 1, TimeUUIDType()) sys.close() my_uuid = uuid.uuid1() cf = ColumnFamily(pool, 'Validators2') cf.insert('key', {0: 0}) assert_equal(cf.get('key'), {0: 0}) cf.insert('key', {1: my_uuid}) assert_equal(cf.get('key'), {0: 0, 1: my_uuid}) cf.insert('key', {0: 0, 1: my_uuid}) assert_equal(cf.get('key'), {0: 0, 1: my_uuid})
def test_static_composite_get_partial_composite(self): cf = ColumnFamily(pool, 'StaticComposite') cf.insert('key3', {(123123, 1): 'val'}) assert_equal(cf.get('key3'), {(123123, 1): 'val'})
def test_static_composite_basic(self): cf = ColumnFamily(pool, 'StaticComposite') colname = (127312831239123123, 1, uuid.uuid1(), uuid.uuid4(), 'foo', u'ba\u0254r', 'baz') cf.insert('key', {colname: 'val'}) assert_equal(cf.get('key'), {colname: 'val'})
class Buyer(Llama): def __init__(self, client, qname, trend=5): super(Buyer, self).__init__(client, uuid.uuid4().hex) self.holdings = {} self.cash = 100000.0 self.history = {} self.trend = trend self.pool = ConnectionPool('example_consumer_Buyer') self.stored_holdings = ColumnFamily(self.pool, 'Holdings') self.quote_history = ColumnFamily(self.pool, 'Quotes') self.stored_cash = ColumnFamily(self.pool, 'Cash') try: cash = self.stored_cash.get('current') self.cash = cash['amount'] except ttypes.NotFoundException: self.stored_cash.insert('current', {'amount': self.cash}) for symbol, columns in self.stored_holdings.get_range(): self.holdings[symbol] = (columns['number_of_shares'], columns['price'], columns['cost']) date_expression = create_index_expression('timestamp', datetime.date.today(), GT) date_clause = create_index_clause([date_expression], count=1000) for key, columns in self.quote_history.get_range(): symbol = columns['symbol'] price = columns['price'] self.add_quote(symbol, price) def add_quote(self, symbol, price): if symbol not in self.history: self.history[symbol] = [price] else: self.history[symbol].append(price) if len(self.history[symbol]) >= self.trend: price_low = min(self.history[symbol][-self.trend:]) price_max = max(self.history[symbol][-self.trend:]) price_avg = sum(self.history[symbol][-self.trend:]) / self.trend #print "Recent history of %s is %s" % (symbol, self.history[symbol][-self.trend:]) else: price_low, price_max, price_avg = (-1, -1, -1) print "%s quotes until we start deciding whether to buy or sell %s" % ( self.trend - len(self.history[symbol]), symbol) #print "Recent history of %s is %s" % (symbol, self.history[symbol]) return (price_low, price_max, price_avg) def do_message(self, quote): symbol, price, date, counter = quote #print "Thinking about whether to buy or sell %s at %s" % (symbol, price) price_low, price_max, price_avg = self.add_quote(symbol, price) self.save_quote(symbol, price) if price_low == -1: return #print "Trending minimum/avg/max of %s is %s-%s-%s" % (symbol, price_low, price_avg, price_max) #for symbol in self.holdings.keys(): # print "self.history[symbol][-1] = %s" % self.history[symbol][-1] # print "self.holdings[symbol][0] = %s" % self.holdings[symbol][0] # print "Value of %s is %s" % (symbol, float(self.holdings[symbol][0])*self.history[symbol][-1]) value = sum([ self.holdings[symbol][0] * self.history[symbol][-1] for symbol in self.holdings.keys() ]) print "Net worth is %s + %s = %s" % (self.cash, value, self.cash + value) if symbol not in self.holdings: if price < 1.01 * price_low: shares_to_buy = random.choice([10, 15, 20, 25, 30]) print "I don't own any %s yet, and the price is below the trending minimum of %s so I'm buying %s shares." % ( symbol, price_low, shares_to_buy) cost = shares_to_buy * price print "Cost is %s, cash is %s" % (cost, self.cash) if cost < self.cash: self.buy_holdings(symbol, shares_to_buy, price, cost) self.update_cash(-cost) print "Cash is now %s" % self.cash else: print "Unfortunately, I don't have enough cash at this time." else: if price > self.holdings[symbol][1] and price > 0.99 * price_max: print "+++++++ Price of %s is higher than my holdings, so I'm going to sell!" % symbol sale_value = self.holdings[symbol][0] * price print "Sale value is %s" % sale_value print "Holdings value is %s" % self.holdings[symbol][2] print "Total net is %s" % (sale_value - self.holdings[symbol][2]) self.update_cash(sale_value) print "Cash is now %s" % self.cash self.sell_holdings(symbol) def update_cash(self, change): self.cash += change cash = self.stored_cash.get('current') cash['amount'] = self.cash self.stored_cash.insert('current', cash) def buy_holdings(self, symbol, shares_to_buy, price, cost): self.holdings[symbol] = (shares_to_buy, price, cost) stored_holding = { 'number_of_shares': shares_to_buy, 'price': price, 'cost': cost } self.stored_holdings.insert(symbol, stored_holding) def sell_holdings(self, symbol): del self.holdings[symbol] self.stored_holdings.remove(symbol) def save_quote(self, symbol, price): key = str(uuid.uuid4()) self.quote_history.insert(key, {'symbol': symbol, 'price': price})
def fetch(key): pool = ConnectionPool(KEY_SPACE, [DB_URI]) col_fam = ColumnFamily(pool, COLUMN_FAMILY) col_fam.insert('row_key', {'col_name': 'col_val'}) return col_fam.get(str(key))
# connect to cassandra pool = ConnectionPool('metrink') # get the column family col_fam = ColumnFamily(pool, 'metrics') # you must create a Cursor object. It will let # you execute all the query you need cur = db.cursor() # Use all the SQL you like cur.execute( 'select company, client, device, groupName, name, time_stamp, value from metrics join metrics_devices on metrics.device_id = metrics_devices.device_id join metrics_groups on metrics.group_id = metrics_groups.group_id join metrics_names on metrics.name_id = metrics_names.name_id join metrics_owners on metrics.ownerId = metrics_owners.ownerId' ) # print all the first cell of all the rows for row in cur.fetchall(): time = datetime.datetime.fromtimestamp(row[5] // 1000) time_str = str(time.strftime("%Y%m")) row_key = str(row[0]) + ":" + str(row[1]) + ":" + time_str + ":" + str( row[2]) + ":" + str(row[3]) + ":" + str(row[4]) print row_key col_fam.insert(row_key, {row[5]: row[6]}) # close our cassandra connection pool.dispose() # close our connection to mysql db.close()
def test_validated_columns(self): sys = SystemManager() sys.create_column_family(TEST_KS, 'Validators',) sys.alter_column(TEST_KS, 'Validators', 'long', LongType()) sys.alter_column(TEST_KS, 'Validators', 'int', IntegerType()) sys.alter_column(TEST_KS, 'Validators', 'time', TimeUUIDType()) sys.alter_column(TEST_KS, 'Validators', 'lex', LexicalUUIDType()) sys.alter_column(TEST_KS, 'Validators', 'ascii', AsciiType()) sys.alter_column(TEST_KS, 'Validators', 'utf8', UTF8Type()) sys.alter_column(TEST_KS, 'Validators', 'bytes', BytesType()) sys.close() cf = ColumnFamily(pool, 'Validators') key = 'key1' col = {'long':1L} cf.insert(key, col) assert_equal(cf.get(key)['long'], 1L) col = {'int':1} cf.insert(key, col) assert_equal(cf.get(key)['int'], 1) col = {'time':TIME1} cf.insert(key, col) assert_equal(cf.get(key)['time'], TIME1) col = {'lex':uuid.UUID(bytes='aaa aaa aaa aaaa')} cf.insert(key, col) assert_equal(cf.get(key)['lex'], uuid.UUID(bytes='aaa aaa aaa aaaa')) col = {'ascii':'aaa'} cf.insert(key, col) assert_equal(cf.get(key)['ascii'], 'aaa') col = {'utf8':u'a\u0020'} cf.insert(key, col) assert_equal(cf.get(key)['utf8'], u'a\u0020') col = {'bytes':'aaa'} cf.insert(key, col) assert_equal(cf.get(key)['bytes'], 'aaa') cf.remove(key)
import csv from pycassa.pool import ConnectionPool import sys from pycassa.columnfamily import ColumnFamily pool = ConnectionPool('mykeyspace', ['localhost:9042']) cf = ColumnFamily(pool, "NBD") with open('bank-full.csv', 'rb') as csvfile: reader = csv.DictReader(csvfile) for row in reader: print(str(row)) key = row['id'] del row['id'] cf.insert(key, row) pool.dispose()
cursor2.execute('select url from url limit ' + str(limit) + ', 5000') resultSet2 = cursor2.fetchall() for results in resultSet2: id = results[0] rts.append(id) except Exception, err: print 'Error get_pages:', err return rts conn3 = MySQLdb.connect(host, user, passwd, db) rt = get_urls_to_index(conn3, user) if len(rt) > 0: return rt pool = ConnectionPool('ubuntu13', ['localhost:9160']) col_fam = ColumnFamily(pool, 'teste') count = 0 while count <= total: #for results in get_urls('dbmy0031.whservidor.com', 'esyns1', 'acc159753', 'esyns1', count): for results in get_urls('dbmy0035.whservidor.com', 'esyns1_2', 'acc159753', 'esyns1_2', count): #for results in get_urls('dbmy0053.whservidor.com', 'esyns1_1', 'acc159753', 'esyns1_1', count): col_fam.insert(unicode(results, errors="ignore"), {'url': unicode(results, errors="ignore")}) count = count + 1 if count % 5000 == 0: print "Count: ", count
import pycassa from pycassa.pool import ConnectionPool from pycassa.columnfamily import ColumnFamily pool = ConnectionPool('keyspace2', ['localhost:9160']) col_fam = ColumnFamily(pool, 'cassandraGroup') col_fam.insert('Chad', {'middleInit': 'm', 'lastName': 'Tolleson'})