def columnfamily_dump(host, port, keyspace, columnfamily, columns, limit, outfile, header, delimiter): pool = ConnectionPool(keyspace, ['{}:{}'.format(host, port)], timeout=None) col_fam = ColumnFamily(pool, columnfamily) if columns: keys = set(columns.split(u',')) else: rows = col_fam.get_range(row_count=limit) keys = set(key for key in itertools.chain.from_iterable(row[1].iterkeys() for row in rows)) keys.add(u'{}_id'.format(columnfamily)) writer = csv.DictWriter(outfile, keys, extrasaction=u'ignore', delimiter=delimiter) if header: writer.writeheader() rows = col_fam.get_range(columns=keys, row_count=limit) row_buffer_count = 0 csv_rows = [] for (id, data) in rows: d = {u'{}_id'.format(columnfamily): id} d.update(data) csv_rows.append(d) row_buffer_count += 1 if row_buffer_count >= col_fam.buffer_size: writer.writerows(csv_rows) csv_rows = [] row_buffer_count = 0 else: writer.writerows(csv_rows)
def colum_family_content(self,machine_id,keyspace_name,column_family_name): """Returns content of column family of given keyspace """ print "->>>in column family content function" pool = ConnectionPool(keyspace_name, [machine_id]) col_fam = ColumnFamily(pool, column_family_name) result = col_fam.get_range(start='', finish='') return result
class CassandraRepository(object): def __init__(self, keyspace, column_family_name): self.pool = ConnectionPool(keyspace, cassandra_settings.NODE_POOL) self.cf = ColumnFamily(self.pool, column_family_name) self.batch = {} def add_batch(self, batch, start_time=None): """ :param batch: """ self.cf.batch_insert(batch) if start_time is not None: print 'time to insert batch: %s ms' % (int(time.time() * 1000) - start_time) def get(self, timestamp): return self.cf.get(str(timestamp)) def get_range(self, start, end): return list(self.cf.get_range(start=str(start), finish=str(end))) def close(self): self.sys.close()
def truncate_log_metadata(self): for cf in ['build_timelines']: cf = ColumnFamily(self.pool, cf) cf.truncate() cf = ColumnFamily(self.pool, 'indices') for key in LOG_METADATA_INDICES: cf.remove(key) cf = ColumnFamily(self.pool, 'counters') for key in LOG_METADATA_COUNTERS: cf.remove(key) cf = ColumnFamily(self.pool, 'super_counters') for key in LOG_METADATA_SUPER_COUNTERS: cf.remove(key) cf = ColumnFamily(self.pool, 'builds') batch = cf.batch() # Remove log parsing state from builds. for key, cols in cf.get_range(columns=['log_parsing_version']): if 'log_parsing_version' not in cols: continue batch.remove(key, ['log_parsing_version']) batch.send()
def slaves(self): """Obtain basic metadata about all slaves.""" cf = ColumnFamily(self.pool, 'slaves') for key, cols in cf.get_range(columns=['name']): yield key, cols['name']
class TestBigInt(unittest.TestCase): @classmethod def setup_class(cls): sys = SystemManager() sys.create_column_family(TEST_KS, 'StdInteger', comparator_type=IntegerType()) @classmethod def teardown_class(cls): sys = SystemManager() sys.drop_column_family(TEST_KS, 'StdInteger') def setUp(self): self.key = 'TestBigInt' self.cf = ColumnFamily(pool, 'StdInteger') def tearDown(self): self.cf.remove(self.key) def test_negative_integers(self): self.cf.insert(self.key, {-1: '-1'}) self.cf.insert(self.key, {-12342390: '-12342390'}) self.cf.insert(self.key, {-255: '-255'}) self.cf.insert(self.key, {-256: '-256'}) self.cf.insert(self.key, {-257: '-257'}) for key, cols in self.cf.get_range(): self.assertEquals(str(cols.keys()[0]), cols.values()[0])
def colum_family_content(self, keyspace_name, column_family_name): """Returns content of column family of given keyspace """ keyspace.cf_result = [] keyspace.error = "Unknown error : May be one of node in your cluster is down please check?" if not self.keyspace_contains(keyspace.local_system, keyspace_name, column_family_name): keyspace.error = "Desired Keyspace,Column Family pair could not be found." return False try: pool = ConnectionPool(keyspace=keyspace_name, server_list=keyspace.server_ips, prefill=False) except Exception as e: print e return False try: col_fam = ColumnFamily(pool, column_family_name) except Exception as e: print e return False result = [] try: tmp_result = col_fam.get_range(start='', finish='', buffer_size=10) for i in tmp_result: result.append(i) except Exception as e: print e return False keyspace.cf_result = result keyspace.tempks = keyspace_name keyspace.tempcf = column_family_name print result return result
def query(pool): cf_logs = ColumnFamily(pool, CF_LOGS) row_key = ymd_from_epoch() try: cf_logs.get(row_key, column_count=0) except NotFoundException: # FIXME: this is extremely inefficient! row_key = cf_logs.get_range().next()[0] logging.info("-" * 120) # ------------------------------ logging.info("Querying for key %s", row_key) logging.info("-" * 120) # ------------------------------ count = 20 for k, v in cf_logs.get(row_key, column_reversed=True).iteritems(): # @UnusedVariable logging.info(v) count -= 1 if count == 0: break del cf_logs logging.info("-" * 120) # ------------------------------ cf_logs_by_app = ColumnFamily(pool, CF_LOGS_BY_APP) row_key = EXAMPLE_APPS[0] logging.info("Querying for key %s", row_key) logging.info("-" * 120) # ------------------------------ count = 20 for k, v in cf_logs_by_app.get(row_key, column_reversed=True).iteritems(): # @UnusedVariable logging.info(v) count -= 1 if count == 0: break del cf_logs_by_app logging.info("-" * 120) # ------------------------------ cf_logs_by_host = ColumnFamily(pool, CF_LOGS_BY_HOST) row_key = EXAMPLE_HOSTS[0] logging.info("Querying for key %s", row_key) logging.info("-" * 120) # ------------------------------ count = 20 for k, v in cf_logs_by_host.get(row_key, column_reversed=True).iteritems(): # @UnusedVariable logging.info(v) count -= 1 if count == 0: break del cf_logs_by_host logging.info("-" * 120) # ------------------------------ cf_logs_by_severity = ColumnFamily(pool, CF_LOGS_BY_SEVERITY) row_key = "WARN" logging.info("Querying for key %s", row_key) logging.info("-" * 120) # ------------------------------ count = 20 for k, v in cf_logs_by_severity.get(row_key, column_reversed=True).iteritems(): # @UnusedVariable logging.info(v) count -= 1 if count == 0: break del cf_logs_by_severity
def query(pool): cf_logs = ColumnFamily(pool, CF_LOGS) row_key = ymd_from_epoch() try: cf_logs.get(row_key, column_count=0) except NotFoundException: # FIXME: this is extremely inefficient! row_key = cf_logs.get_range().next()[0] logging.info("-" * 120) # ------------------------------ logging.info("Querying for key %s", row_key) logging.info("-" * 120) # ------------------------------ count = 20 for k, v in cf_logs.get(row_key, column_reversed=True).iteritems(): #@UnusedVariable logging.info(v) count -= 1 if count == 0: break del cf_logs logging.info("-" * 120) # ------------------------------ cf_logs_by_app = ColumnFamily(pool, CF_LOGS_BY_APP) row_key = EXAMPLE_APPS[0] logging.info("Querying for key %s", row_key) logging.info("-" * 120) # ------------------------------ count = 20 for k, v in cf_logs_by_app.get(row_key, column_reversed=True).iteritems(): #@UnusedVariable logging.info(v) count -= 1 if count == 0: break del cf_logs_by_app logging.info("-" * 120) # ------------------------------ cf_logs_by_host = ColumnFamily(pool, CF_LOGS_BY_HOST) row_key = EXAMPLE_HOSTS[0] logging.info("Querying for key %s", row_key) logging.info("-" * 120) # ------------------------------ count = 20 for k, v in cf_logs_by_host.get(row_key, column_reversed=True).iteritems(): #@UnusedVariable logging.info(v) count -= 1 if count == 0: break del cf_logs_by_host logging.info("-" * 120) # ------------------------------ cf_logs_by_severity = ColumnFamily(pool, CF_LOGS_BY_SEVERITY) row_key = 'WARN' logging.info("Querying for key %s", row_key) logging.info("-" * 120) # ------------------------------ count = 20 for k, v in cf_logs_by_severity.get(row_key, column_reversed=True).iteritems(): #@UnusedVariable logging.info(v) count -= 1 if count == 0: break del cf_logs_by_severity
def query(pool): logging.info("-" * 120) # ------------------------------ logging.info("-" * 120) # ------------------------------ cf_logs = ColumnFamily(pool, CF_LOGS) for obj in cf_logs.get_range(): #@UnusedVariable print "Key: {0}".format(obj[0]) # print dir(obj[1]) for k, v in obj[1].iteritems(): print " {0} -> {1}".format(k, v)
def simple_select(self, columnfamily, *args): slice = ['', '', self.max_rows] key = None if args and args[1]: if ':' not in args[1]: key = args[1] for i, part in enumerate(args[1].split(':', 2)): slice[i] = part try: cf = ColumnFamily(self.pool, columnfamily) except NotFoundException: return super(CCli, self).default(' '.join([columnfamily] + list(args))) if key: pt = PrettyTable() pt.field_names = ['Key', key] pt.align["Key"] = "l" pt.align[key] = 'r' for k, v in cf.get(key).items(): pt.add_row([k, (v[:self.max_data_size - 3] + '...' if self.max_data_size and len(v) > self.max_data_size else v)]) print pt.get_string(sortby='Key') return data = dict(cf.get_range(start=slice[0], finish=slice[1], row_count=int(slice[2]))) columns = [] for key, row in data.items(): columns.extend(row.keys()) columns = list(set(columns)) columns.sort() pt = PrettyTable() pt.field_names = ['Key / Column'] + columns pt.align["Key / Column"] = "l" for column in columns: pt.align[column] = "r" for key, row in data.items(): prow = [key] for column in columns: value = row.get(column, '---') if len(value) > self.max_data_size: value = value[:self.max_data_size - 3] + '...' prow.append(value) pt.add_row(prow) print pt.get_string(sortby='Key / Column')
def colum_family_content(self,machine_id,keyspace_name,column_family_name): """Returns content of column family of given keyspace """ if (self.keyspace_contains(keyspace_name,column_family_name) == False): print "Error : Keyspace:column family could not be found." return False pool = ConnectionPool(keyspace = keyspace_name, server_list = keyspace.server_ips, prefill = False) col_fam = ColumnFamily(pool, column_family_name) tmp_result = col_fam.get_range(start='', finish='',row_count=5,buffer_size=10) result = [] for i in tmp_result: result.append(i) #print sum(1 for _ in result) # for count return result
def columnfamily_dump(host, port, keyspace, columnfamily, columns, limit, outfile, header, delimiter): pool = ConnectionPool(keyspace, ['{}:{}'.format(host, port)], timeout=None) col_fam = ColumnFamily(pool, columnfamily) if columns: keys = set(columns.split(u',')) else: rows = col_fam.get_range(row_count=limit) keys = set(key for key in itertools.chain.from_iterable( row[1].iterkeys() for row in rows)) keys.add(u'{}_id'.format(columnfamily)) writer = csv.DictWriter(outfile, keys, extrasaction=u'ignore', delimiter=delimiter) if header: writer.writeheader() rows = col_fam.get_range(columns=keys, row_count=limit) row_buffer_count = 0 csv_rows = [] for (id, data) in rows: d = {u'{}_id'.format(columnfamily): id} d.update(data) csv_rows.append(d) row_buffer_count += 1 if row_buffer_count >= col_fam.buffer_size: writer.writerows(csv_rows) csv_rows = [] row_buffer_count = 0 else: writer.writerows(csv_rows)
class Processor(Llama): def __init__(self, client, qname): super(Processor, self).__init__(client, qname) self.pool = ConnectionPool('processing_llama_Processor') self.trends = ColumnFamily(self.pool, 'Trend') def get_sleep_time(): return 60 def do_message(self, message): if not isinstance(message, tuple) or len(message) != 4: return woeid, as_of, trend_name, query = message try: trend = self.trends.get(trend_name, super_column=woeid) trend['lastseen'] = as_of trend['number_seen'] += 1 trend = { woeid: trend } self.trends.insert(trend_name, trend) except ttypes.NotFoundException: self.trends.insert(trend_name, { woeid: { 'firstseen': as_of, 'lastseen': as_of, 'number_seen': 1}}) self.trends.insert(trend_name, {'data': { 'query': query, 'tracking': "False" }}) def has_many_woeids(self, x): key, values = x return len(values) > 2 # one woeid plus general data def do_action(self): try: for trend_name, country_specifics in filter(self.has_many_woeids, self.trends.get_range()): if country_specifics['data']['tracking'] == "False": self.track_trend(trend_name, country_specifics['data']['query'], filter(lambda x: x != "data", country_specifics.keys())) except: exc, value, exctraceback = sys.exc_info() logging.error("Error in processing_llama.processor.Processor.do_action:\n" + traceback.format_exc()) traceback.print_tb(exctraceback) def track_trend(self, trend_name, query, woeids): logging.info("Tracking %s from %s" % (trend_name, woeids)) self.mark_tracking(trend_name) self.publish((trend_name, query, woeids), "trend_to_track") def mark_tracking(self, trend_name): try: trend = self.trends.get(trend_name, super_column='data') trend['tracking'] = "True" self.trends.insert(trend_name, { 'data': trend }) except ttypes.NotFoundException: pass
def get_range(self, *args, **kwargs): """ Get an iterator over instances in a specified key range. Like :meth:`multiget()`, whether a single instance or multiple instances are returned per-row when the column family is a super column family depends on what parameters are passed. For an explanation of how :meth:`get_range` works and a description of the parameters, see :meth:`.ColumnFamily.get_range()`. Example usage with a standard column family: .. code-block:: python >>> pool = pycassa.ConnectionPool('Keyspace1') >>> usercf = pycassa.ColumnFamily(pool, 'Users') >>> cfmap = pycassa.ColumnFamilyMap(MyClass, usercf) >>> users = cfmap.get_range(row_count=2, columns=['name', 'age']) >>> for key, user in users: ... print user.name, user.age Miles Davis 84 Winston Smith 42 """ if 'columns' not in kwargs and not self.super and not self.raw_columns: kwargs['columns'] = self.fields for key, columns in ColumnFamily.get_range(self, *args, **kwargs): if self.super: if 'super_column' not in kwargs: vals = self.dict_class() for super_column, subcols in columns.iteritems(): combined = self.combine_columns(subcols) vals[super_column] = create_instance( self.cls, key=key, super_column=super_column, **combined) yield vals else: combined = self.combine_columns(columns) yield create_instance(self.cls, key=key, super_column=kwargs['super_column'], **combined) else: combined = self.combine_columns(columns) yield create_instance(self.cls, key=key, **combined)
def add_new_post(self, thread_id, name, content): post = ColumnFamily(self.conn, thread_id) ret = list(post.get_range()) if len(ret) > 1000: return False key = len(ret) + 1 key = '%s' % key dt = datetime.datetime.today() str_dt = dt.strftime('%Y-%m-%d %H:%M:%S') post.insert(key, {'name': name, 'content': content, 'post_time': str_dt}) return True
def do_query(opt, start, finish): pool = ConnectionPool('CrashData', ['localhost:9160']) cassandra = ColumnFamily(pool, opt.column_family_counter) graph = Pyasciigraph() for key, contents in cassandra.get_range(column_count=60,filter_empty=False): # get_range returns: # key: OrderedDict() [made of: (minute, count of instances) tuples] things = '' try: jsonkey = json.loads(key) things = jsonkey.items() except: continue for line in graph.graph(str(things), sorted(contents.items())): print(line)
def create_new_thread(self, thread_name): threads = ColumnFamily(self.conn, 'threads') ret = list(threads.get_range()) if len(ret) > 99: oldest_thread = self._get_oldest_thread() oldest_thread_id = str(oldest_thread['thread_id']) threads.remove(oldest_thread_id) self._drop_cf(oldest_thread_id) thread_id = '%s' % random.randint(1,sys.maxint) dt = datetime.datetime.today() str_dt = dt.strftime('%Y-%m-%d %H:%M:%S') threads.insert(thread_id, {'thread_name': thread_name, 'post_count': '1', 'create_time': str_dt, 'update_time': str_dt}) self._create_cf(thread_id) return thread_id
def get_range(self, *args, **kwargs): """ Get an iterator over instances in a specified key range. Like :meth:`multiget()`, whether a single instance or multiple instances are returned per-row when the column family is a super column family depends on what parameters are passed. For an explanation of how :meth:`get_range` works and a description of the parameters, see :meth:`.ColumnFamily.get_range()`. Example usage with a standard column family: .. code-block:: python >>> pool = pycassa.ConnectionPool('Keyspace1') >>> usercf = pycassa.ColumnFamily(pool, 'Users') >>> cfmap = pycassa.ColumnFamilyMap(MyClass, usercf) >>> users = cfmap.get_range(row_count=2, columns=['name', 'age']) >>> for key, user in users: ... print user.name, user.age Miles Davis 84 Winston Smith 42 """ if 'columns' not in kwargs and not self.super and not self.raw_columns: kwargs['columns'] = self.fields for key, columns in ColumnFamily.get_range(self, *args, **kwargs): if self.super: if 'super_column' not in kwargs: vals = self.dict_class() for super_column, subcols in columns.iteritems(): combined = self.combine_columns(subcols) vals[super_column] = create_instance(self.cls, key=key, super_column=super_column, **combined) yield vals else: combined = self.combine_columns(columns) yield create_instance(self.cls, key=key, super_column=kwargs['super_column'], **combined) else: combined = self.combine_columns(columns) yield create_instance(self.cls, key=key, **combined)
def colum_family_content(self, machine_id, keyspace_name, column_family_name): """Returns content of column family of given keyspace """ if (self.keyspace_contains(keyspace_name, column_family_name) == False): print "Error : Keyspace:column family could not be found." return False pool = ConnectionPool(keyspace=keyspace_name, server_list=keyspace.server_ips, prefill=False) col_fam = ColumnFamily(pool, column_family_name) tmp_result = col_fam.get_range(start='', finish='', row_count=5, buffer_size=10) result = [] for i in tmp_result: result.append(i) #print sum(1 for _ in result) # for count return result
def get_all_posts_in_thread(self, thread_id): posts = ColumnFamily(self.conn, str(thread_id)) # get all row keys row_keys = [] ret = list(posts.get_range()) for v in ret: row_keys.append(v[0]) # get all row data result = [] ret = posts.multiget(row_keys) for key in row_keys: row = {} row['key'] = int(key) row['name'] = ret[key]['name'] row['content'] = ret[key]['content'] row['post_time'] = ret[key]['post_time'] result.append(row) result.sort(cmp=lambda x,y: cmp(x['key'], y['key'])) return result
def _get_oldest_thread(self): threads = ColumnFamily(self.conn, 'threads') # get all row keys row_keys = [] ret = list(threads.get_range()) for v in ret: row_keys.append(v[0]) result = [] ret = threads.multiget(row_keys) for key in row_keys: row = {} row['thread_id'] = int(key) row['thread_name'] = int(key) row['post_count'] = ret[key]['post_count'] row['create_time'] = ret[key]['create_time'] row['update_time'] = ret[key]['update_time'] result.append(row) result.sort(cmp=lambda x,y: cmp(x['update_time'], y['update_time'])) return result[0]
def get_all(columnFamily) : "select values in a given column family" column = ColumnFamily(pool, columnFamily) result = column.get_range() return result
def builders(self): """Obtain info about all builders.""" cf = ColumnFamily(self.pool, 'builders') for key, cols in cf.get_range(columns=['category', 'master', 'name']): yield key, cols['name'], cols['category'], cols['master']
#!/usr/bin/python #-*- coding:utf-8 -*- from pycassa.pool import ConnectionPool from pycassa.columnfamily import ColumnFamily pool = ConnectionPool('MyKeyspace') cf = ColumnFamily(pool, 'MyCF') #cf.insert('row_key', {'col_name': 'col_val'}) #cf.insert('row_key', {'col_name':'col_val', 'col_name2':'col_val2'}) #cf.batch_insert({'row1': {'name1': 'val1', 'name2': 'val2'},'row2': {'foo': 'bar'}}) print cf.get('row_key') print cf.get('row_key', columns=['col_name', 'col_name2']) #for i in xrange(10): # cf.insert('row_key', {str(i): 'val'}) print cf.get('row_key', column_start='5', column_finish='7') print cf.get('row_key', column_reversed=True, column_count=3) print cf.multiget(['row1', 'row2']) result = cf.get_range(start='row_key5', finish='row_key7') for key, columns in result: print key, '=>', columns
class Buyer(Llama): def __init__(self, client, qname, trend=5): super(Buyer, self).__init__(client, uuid.uuid4().hex) self.holdings = {} self.cash = 100000.0 self.history = {} self.trend = trend self.pool = ConnectionPool('example_consumer_Buyer') self.stored_holdings = ColumnFamily(self.pool, 'Holdings') self.quote_history = ColumnFamily(self.pool, 'Quotes') self.stored_cash = ColumnFamily(self.pool, 'Cash') try: cash = self.stored_cash.get('current') self.cash = cash['amount'] except ttypes.NotFoundException: self.stored_cash.insert('current', { 'amount': self.cash }) for symbol, columns in self.stored_holdings.get_range(): self.holdings[symbol] = (columns['number_of_shares'], columns['price'], columns['cost']) date_expression = create_index_expression('timestamp', datetime.date.today(), GT) date_clause = create_index_clause([date_expression], count=1000) for key, columns in self.quote_history.get_range(): symbol = columns['symbol'] price = columns['price'] self.add_quote(symbol, price) def add_quote(self, symbol, price): if symbol not in self.history: self.history[symbol] = [price] else: self.history[symbol].append(price) if len(self.history[symbol]) >= self.trend: price_low = min(self.history[symbol][-self.trend:]) price_max = max(self.history[symbol][-self.trend:]) price_avg = sum(self.history[symbol][-self.trend:])/self.trend #print "Recent history of %s is %s" % (symbol, self.history[symbol][-self.trend:]) else: price_low, price_max, price_avg = (-1, -1, -1) print "%s quotes until we start deciding whether to buy or sell %s" % (self.trend - len(self.history[symbol]), symbol) #print "Recent history of %s is %s" % (symbol, self.history[symbol]) return (price_low, price_max, price_avg) def do_message(self, quote): symbol, price, date, counter = quote #print "Thinking about whether to buy or sell %s at %s" % (symbol, price) price_low, price_max, price_avg = self.add_quote(symbol, price) self.save_quote(symbol, price) if price_low == -1: return #print "Trending minimum/avg/max of %s is %s-%s-%s" % (symbol, price_low, price_avg, price_max) #for symbol in self.holdings.keys(): # print "self.history[symbol][-1] = %s" % self.history[symbol][-1] # print "self.holdings[symbol][0] = %s" % self.holdings[symbol][0] # print "Value of %s is %s" % (symbol, float(self.holdings[symbol][0])*self.history[symbol][-1]) value = sum([self.holdings[symbol][0]*self.history[symbol][-1] for symbol in self.holdings.keys()]) print "Net worth is %s + %s = %s" % (self.cash, value, self.cash + value) if symbol not in self.holdings: if price < 1.01*price_low: shares_to_buy = random.choice([10, 15, 20, 25, 30]) print "I don't own any %s yet, and the price is below the trending minimum of %s so I'm buying %s shares." % (symbol, price_low, shares_to_buy) cost = shares_to_buy * price print "Cost is %s, cash is %s" % (cost, self.cash) if cost < self.cash: self.buy_holdings(symbol, shares_to_buy, price, cost) self.update_cash(-cost) print "Cash is now %s" % self.cash else: print "Unfortunately, I don't have enough cash at this time." else: if price > self.holdings[symbol][1] and price > 0.99*price_max: print "+++++++ Price of %s is higher than my holdings, so I'm going to sell!" % symbol sale_value = self.holdings[symbol][0] * price print "Sale value is %s" % sale_value print "Holdings value is %s" % self.holdings[symbol][2] print "Total net is %s" % (sale_value - self.holdings[symbol][2]) self.update_cash(sale_value) print "Cash is now %s" % self.cash self.sell_holdings(symbol) def update_cash(self, change): self.cash += change cash = self.stored_cash.get('current') cash['amount'] = self.cash self.stored_cash.insert('current', cash) def buy_holdings(self, symbol, shares_to_buy, price, cost): self.holdings[symbol] = (shares_to_buy, price, cost) stored_holding = {'number_of_shares': shares_to_buy, 'price': price, 'cost': cost} self.stored_holdings.insert(symbol, stored_holding) def sell_holdings(self, symbol): del self.holdings[symbol] self.stored_holdings.remove(symbol) def save_quote(self, symbol, price): key = str(uuid.uuid4()) self.quote_history.insert(key, { 'symbol': symbol, 'price': price })
# Get the colums for the row key and column key authors = author_cf.get('sacharya1', columns=['first_name', 'last_name']) print authors authors = author_cf.multiget('sacharya', 'sacharya1') print authors print "Printing the keys" keys = authors.keys() for k in keys: print authors.get(k) print "Keys printed" #authors = list(author.get_range().get_keys()) for value in author_cf.get_range(): print value[0] # Only if using OrderPreservingPartitioner. Default is RandomPartitioner, which # does md5 on the key #for value in author_cf.get_range(start='sacharya5', finish='sacharya10'): # print value[0] ################################# UPDATE ###################################### # UPDATE a column for an existing row author_cf.insert('sacharya1', {'first_name': 'sudarshan_updated'}) print "Updating first_name for row key sacharya1" authors = author_cf.get('sacharya1') print authors for author in authors:
def get_range(self, *args, **kwargs): col_fam = ColumnFamily(self.pool, self.__column_family__) return col_fam.get_range(*args, **kwargs)
station_col_fam = ColumnFamily(pool, 'stations') detector_col_fam = ColumnFamily(pool, 'detectors') loop_col_fam = ColumnFamily(pool, 'loopdata_new') timesFile = '/home/highway_data/csv_fies/2011_09_22_times.txt' query3_results = open('query3_results.txt', 'a') query3_results.write('\nquery run time: ') query3_results.write(str(time.ctime(int(time.time())))) query3_results.write('\n') fosterNBID = '' fosterNBLength = 0.0 #length of station NB Foster stationids = [] for key, columns in station_col_fam.get_range(): stationids.append(key) if columns['locationtext'] == 'Foster NB': fosterNBID = key fosterNBLength = float(columns['length']) # for row in stationids: # print(row) # print(station_col_fam.get(row)) detectorids = [] for key, columns in detector_col_fam.get_range(): if columns['stationid'] == fosterNBID: detectorids.append(key) # for row in detectorids: # print(row)
#print statuses try: numjobs = len(statuses) except: return 0.0 for status in statuses.values(): #if days == 7: print status['status'] if status['status'] == 0: statusSum += 1 #if days == 7: print statusSum, numjobs if statusSum != 0 and numjobs != 0: success = (float(statusSum) / float(numjobs)) * 100 else: success = 0.0 return success j = {} jobs = jl.get_range(column_count=0, filter_empty=False) for job in jobs: rk = job[0] # print rk last = getLast(rk) success7 = getSuccess(rk, 7) success14 = getSuccess(rk, 14) success30 = getSuccess(rk, 30) # print rk, last, success7, success14, success30 if success7 != None and success14 != None and success30 != None: j[rk] = {'last': last, 'success7': success7, 'success14': success14, 'success30': success30} print j
def execute(self): client = db_connection.get_client() cf = ColumnFamily(client, self.domain) try: #### SELECT QUERY #### if self.op_type == CassandraQuery.OP_SELECT: if not self.where_node: ## treat this as a simple key get query if self.limit == 1: result = cf.get(self.offset) if result: return (True, result, None) else: return (False, None, DatabaseError("No " + self.domain + "entry matching row_key: " + self.offset)) else: return (False, None, DatabaseError( "Limit for SELECT operation must be 1")) else: ## treat this as an indexed_slices query if self.limit == 1: ## we consider the assumption that there is only a single AND node with filtering children index_expressions = [] for field_predicate, value in self.where_node.children: field_predicate_list = field_predicate.split("__") field = field_predicate_list[0] predicate = EQ if len(field_predicate_list) == 2: try: predicate = SelectManager.predicate_map[ field_predicate_list[1]] except: predicate = EQ index_exp = create_index_expression( field, value, predicate) index_expressions.append(index_exp) index_clause = create_index_clause( index_expressions, start_key=self.offset, count=self.limit) result = cf.get_indexed_slices(index_clause) if result: return (True, result, None) else: return (False, None, DatabaseError("No " + self.domain + "entry matching query: " + self.where_node)) else: return (False, None, DatabaseError( "Limit for SELECT operation must be 1")) #### FETCH QUERY #### elif self.op_type == CassandraQuery.OP_FETCH: if self.limit > SelectManager.MAX_FETCH_LIMIT: return ( False, None, DatabaseError( "LIMIT for FETCH operation exceeds MAX_FETCH_LIMIT(1000)" )) if not self.where_node: ## Treat this as a key range query key_offset = self.offset limit = self.limit result = {} while True: if limit < SelectManager.REGULAR_FETCH_LIMIT: res = cf.get_range(key_offset, row_count=limit) result.update(res) break else: res = cf.get_range( key_offset, row_count=SelectManager.REGULAR_FETCH_LIMIT) result.update(res) if len(res) < SelectManager.REGULAR_FETCH_LIMIT: break else: max_key = sorted(res.keys(), reverse=True)[0] key_offset = max_key + 1 limit -= SelectManager.REGULAR_FETCH_LIMIT return (True, result, None) else: ## Treat this as a fetch query ## first create index expressions index_expressions = [] for field_predicate, value in self.where_node.children: field_predicate_list = field_predicate.split("__") field = field_predicate_list[0] predicate = EQ if len(field_predicate_list) == 2: try: predicate = SelectManager.predicate_map[ field_predicate_list[1]] except: predicate = EQ index_exp = create_index_expression( field, value, predicate) index_expressions.append(index_exp) key_offset = self.offset limit = self.limit result = {} while True: if limit < SelectManager.REGULAR_FETCH_LIMIT: index_clause = create_index_clause( index_expressions, start_key=key_offset, count=limit) res = cf.get_indexed_slices(index_clause) result.update(res) break else: index_clause = create_index_clause( index_expressions, start_key=key_offset, count=SelectManager.REGULAR_FETCH_LIMIT) res = cf.get_indexed_slices(index_clause) result.update(res) if len(res) < SelectManager.REGULAR_FETCH_LIMIT: break else: max_key = sorted(res.keys(), reverse=True)[0] key_offset = max_key + 1 limit -= SelectManager.REGULAR_FETCH_LIMIT return (True, result, None) except Exception, ex: return (False, None, ex)
class CassandraDemo(object): def __init__(self, database, table): self.database = database self.table = table def create_connections(self): self.pool = ConnectionPool(self.database) self.cf = ColumnFamily(self.pool, self.table) def create_database_and_table(self): super_cf = False # consider super columns to be deprecated s = SystemManager() # create keyspace if it doesn't exist if database not in s.list_keyspaces(): s.create_keyspace(database, SIMPLE_STRATEGY, {'replication_factor': '1'}) # delete column family from the keyspace if it does exist. if table in s.get_keyspace_column_families(database): s.drop_column_family(database, table) # create coulmn family in the keyspace if table not in s.get_keyspace_column_families(database): print("table is creating...") s.create_column_family(database, table, super = super_cf, comparator_type = ASCII_TYPE) s.close() return True def insert_data(self): print '\nemployee data is inserting...' self.cf.insert('1', {'fn':'yogesh', 'ln':'kumar', 'ct': 'Ajmer', 'em': '*****@*****.**'}) self.cf.insert('2', {'fn':'amit', 'ln':'pandita', 'ct': 'Delhi', 'em': '*****@*****.**'}) self.cf.insert('3', {'fn':'sandeep', 'ln':'tak', 'ct': 'Ajmer', 'em': '*****@*****.**', 'mb': '8890467032'}) def get_data(self): print '\nemployee data is featching...' data1 = self.cf.get('1') data2 = self.cf.get('2', columns = ['fn', 'ln', 'em']) data3 = self.cf.get('3', column_start = 'ct', column_finish = 'fn') data4 = self.cf.get('1', column_reversed = False, column_count = 3) data5 = self.cf.get('1', column_reversed = True, column_count = 3) print data1 print data2 print data3 print data4 print data5 def get_multiple_data(self): print '\ngetting multiple employees data...' row_keys = ['1','2','3'] data = self.cf.multiget(row_keys) print data def get_data_by_range(self): ''' if you get an error don't worry about this, it's a Cassandra limitation Issue ''' print '\ngetting employees data by range...' start_row_key = '1' end_row_key = '3' data = self.cf.get_range(start = start_row_key, finish = end_row_key) for key, columns in data: print key,coulmns def get_count(self): print '\nget employee row\'s colunm count' print self.cf.get_count('1') print self.cf.get_count('1', columns = ['fn', 'ln']) print self.cf.get_count('1', column_start = 'em') def get_multi_count(self): print '\nget multiple employees row\'s colunm count' row_keys = ['1','2','3'] columns = ['fn', 'ln', 'mb'] column_start = 'ct' column_finish = 'fn' print self.cf.multiget_count(row_keys) print self.cf.multiget_count(row_keys, columns = columns) print self.cf.multiget_count(row_keys, column_start = column_start, column_finish = column_finish) def update_data(self): print '\nemployee data is updating...' self.cf.insert('1', {'pwd':'yoku@2010', 'ct':'Noida'}) def delete_data(self): print '\ndelete data from employee' row = '2' self.cf.remove(row) def get_all_rows(self): print '\ngetting rows name...' print [v[0] for v in self.cf.get_range()] def get_all_columns_of_row(self): print '\ngetting columns name of a row' row = '1' data = self.cf.get(row) print data.keys()
class Buyer(Llama): def __init__(self, client, qname, trend=5): super(Buyer, self).__init__(client, uuid.uuid4().hex) self.holdings = {} self.cash = 100000.0 self.history = {} self.trend = trend self.pool = ConnectionPool('example_consumer_Buyer') self.stored_holdings = ColumnFamily(self.pool, 'Holdings') self.quote_history = ColumnFamily(self.pool, 'Quotes') self.stored_cash = ColumnFamily(self.pool, 'Cash') try: cash = self.stored_cash.get('current') self.cash = cash['amount'] except ttypes.NotFoundException: self.stored_cash.insert('current', {'amount': self.cash}) for symbol, columns in self.stored_holdings.get_range(): self.holdings[symbol] = (columns['number_of_shares'], columns['price'], columns['cost']) date_expression = create_index_expression('timestamp', datetime.date.today(), GT) date_clause = create_index_clause([date_expression], count=1000) for key, columns in self.quote_history.get_range(): symbol = columns['symbol'] price = columns['price'] self.add_quote(symbol, price) def add_quote(self, symbol, price): if symbol not in self.history: self.history[symbol] = [price] else: self.history[symbol].append(price) if len(self.history[symbol]) >= self.trend: price_low = min(self.history[symbol][-self.trend:]) price_max = max(self.history[symbol][-self.trend:]) price_avg = sum(self.history[symbol][-self.trend:]) / self.trend #print "Recent history of %s is %s" % (symbol, self.history[symbol][-self.trend:]) else: price_low, price_max, price_avg = (-1, -1, -1) print "%s quotes until we start deciding whether to buy or sell %s" % ( self.trend - len(self.history[symbol]), symbol) #print "Recent history of %s is %s" % (symbol, self.history[symbol]) return (price_low, price_max, price_avg) def do_message(self, quote): symbol, price, date, counter = quote #print "Thinking about whether to buy or sell %s at %s" % (symbol, price) price_low, price_max, price_avg = self.add_quote(symbol, price) self.save_quote(symbol, price) if price_low == -1: return #print "Trending minimum/avg/max of %s is %s-%s-%s" % (symbol, price_low, price_avg, price_max) #for symbol in self.holdings.keys(): # print "self.history[symbol][-1] = %s" % self.history[symbol][-1] # print "self.holdings[symbol][0] = %s" % self.holdings[symbol][0] # print "Value of %s is %s" % (symbol, float(self.holdings[symbol][0])*self.history[symbol][-1]) value = sum([ self.holdings[symbol][0] * self.history[symbol][-1] for symbol in self.holdings.keys() ]) print "Net worth is %s + %s = %s" % (self.cash, value, self.cash + value) if symbol not in self.holdings: if price < 1.01 * price_low: shares_to_buy = random.choice([10, 15, 20, 25, 30]) print "I don't own any %s yet, and the price is below the trending minimum of %s so I'm buying %s shares." % ( symbol, price_low, shares_to_buy) cost = shares_to_buy * price print "Cost is %s, cash is %s" % (cost, self.cash) if cost < self.cash: self.buy_holdings(symbol, shares_to_buy, price, cost) self.update_cash(-cost) print "Cash is now %s" % self.cash else: print "Unfortunately, I don't have enough cash at this time." else: if price > self.holdings[symbol][1] and price > 0.99 * price_max: print "+++++++ Price of %s is higher than my holdings, so I'm going to sell!" % symbol sale_value = self.holdings[symbol][0] * price print "Sale value is %s" % sale_value print "Holdings value is %s" % self.holdings[symbol][2] print "Total net is %s" % (sale_value - self.holdings[symbol][2]) self.update_cash(sale_value) print "Cash is now %s" % self.cash self.sell_holdings(symbol) def update_cash(self, change): self.cash += change cash = self.stored_cash.get('current') cash['amount'] = self.cash self.stored_cash.insert('current', cash) def buy_holdings(self, symbol, shares_to_buy, price, cost): self.holdings[symbol] = (shares_to_buy, price, cost) stored_holding = { 'number_of_shares': shares_to_buy, 'price': price, 'cost': cost } self.stored_holdings.insert(symbol, stored_holding) def sell_holdings(self, symbol): del self.holdings[symbol] self.stored_holdings.remove(symbol) def save_quote(self, symbol, price): key = str(uuid.uuid4()) self.quote_history.insert(key, {'symbol': symbol, 'price': price})
col_fam.insert('row_key', {'col_name':'col_val', 'col_name2':'col_val2'}) col_fam.batch_insert({'row1': {'name1': 'val1', 'name2': 'val2'},'row2': {'foo': 'bar'}}) #col_fam.insert('super_key', {'key':{'col_name':'col_val', 'col_name2':'col_val2'}}) print col_fam.get_count('row_key', columns=['foo', 'bar']) print col_fam.get_count('row_key', column_start='foo') print col_fam.multiget_count(['fib0', 'fib1', 'fib2', 'fib3', 'fib4']) print col_fam.multiget_count(['fib0', 'fib1', 'fib2', 'fib3', 'fib4'],columns=['col1', 'col2', 'col3']) print col_fam.multiget_count(['fib0', 'fib1', 'fib2', 'fib3', 'fib4'],column_start='col1', column_finish='col3') print col_fam.get_count('row_key') print col_fam.get('row_key') print col_fam.get('author') print col_fam.get('row_key', columns=['col_name', 'col_name2']) print col_fam.get('row_key', column_reversed=True, column_count=3) print col_fam.multiget(['row1', 'row2']) for i in range(1, 10): col_fam.insert('row_key', {str(i): 'val'}) print col_fam.get('row_key', column_start='5', column_finish='7') result = col_fam.get_range(start='row_key5', finish='row_key7') for key, columns in result: print key, '=>', columns #Supper column # col_fam = pycassa.ColumnFamily(pool, 'Super1') # col_fam.insert('row_key', {'supercol_name': {'col_name': 'col_val'}}) print col_fam.get('row_key') # col_fam = pycassa.ColumnFamily(pool, 'Letters') # col_fam.insert('row_key', {'super': {'a': '1', 'b': '2', 'c': '3'}}) # print col_fam.get('row_key', super_column='super') # print col_fam.get('row_key', super_column='super', columns=['a', 'b']) # print col_fam.get('row_key', super_column='super', column_start='b') # print col_fam.get('row_key',super_column='super',column_finish='b',column_reversed=True)
from pycassa.index import * from pycassa.cassandra import ttypes import json import datetime class JSONDateTimeEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, (datetime.date, datetime.datetime)): return obj.isoformat() else: return json.JSONEncoder.default(self, obj) def has_many_woeids(x): key, values = x return len(values) > 1 pool = ConnectionPool('processing_llama_Processor') trends = ColumnFamily(pool, 'Trend') for trend_name, country_specifics in filter(has_many_woeids, trends.get_range()): print json.dumps(country_specifics, sort_keys=True, indent=4, separators=(',', ': '), cls=JSONDateTimeEncoder) #track_trend(trend_name, country_specifics['query'], country_specifics.keys)
from pycassa.pool import ConnectionPool from pycassa.columnfamily import ColumnFamily from pycassa.index import * from pycassa.cassandra import ttypes import json import datetime class JSONDateTimeEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, (datetime.date, datetime.datetime)): return obj.isoformat() else: return json.JSONEncoder.default(self, obj) def has_many_woeids(x): key, values = x return len(values) > 1 pool = ConnectionPool('processing_llama_Processor') trends = ColumnFamily(pool, 'Trend') for trend_name, country_specifics in filter(has_many_woeids, trends.get_range()): print json.dumps(country_specifics, sort_keys=True, indent=4, separators=(',', ': '), cls=JSONDateTimeEncoder) #track_trend(trend_name, country_specifics['query'], country_specifics.keys)
# print user.age # # 18 # users = cfmap.multiget([key1, key2]) # print users[0].name # # "John" # for user in cfmap.get_range(): # print user.name # # "John" # # "Bob" # # "Alex" # cfmap.remove(user) # cfmap.get(user.key) # # cassandra.ttypes.NotFoundException: NotFoundException() #Key Slices readData = col_fam.get_range(start='', finish='') readData = list(readData) print len(readData) print readData #prettyprint.pp(readData) readData = col_fam.get_range(start='', finish='', row_count=2) readData = list(readData) print len(readData) #prettyprint.pp(readData) readData = col_fam.get_range(start=list(readData)[-1][0], finish='', row_count=2) readData = list(readData) print len(readData) #prettyprint.pp(readData) readData = col_fam.get_range(start=list(readData)[-1][0], finish='', row_count=2) readData = list(readData) print len(readData)