class CassandraRepository(object): def __init__(self, keyspace, column_family_name): self.pool = ConnectionPool(keyspace, cassandra_settings.NODE_POOL) self.cf = ColumnFamily(self.pool, column_family_name) self.batch = {} def add_batch(self, batch, start_time=None): """ :param batch: """ self.cf.batch_insert(batch) if start_time is not None: print 'time to insert batch: %s ms' % (int(time.time() * 1000) - start_time) def get(self, timestamp): return self.cf.get(str(timestamp)) def get_range(self, start, end): return list(self.cf.get_range(start=str(start), finish=str(end))) def close(self): self.sys.close()
class CassandraRepository(object): def __init__(self, keyspace, column_family_name): self.pool = ConnectionPool(keyspace, cassandra_settings.NODE_POOL) self.cf = ColumnFamily(self.pool, column_family_name) self.batch = {} def add_batch(self, batch, start_time=None): """ :param batch: """ self.cf.batch_insert(batch) if start_time is not None: print 'time to insert batch: %s ms' % (int(time.time() * 1000) - start_time) def get(self, timestamp): return self.cf.get(str(timestamp)) def get_range(self, start, end): return list(self.cf.get_range(start=str(start), finish=str(end))) def close(self): self.sys.close()
def batch_insert(self, instances, timestamp=None, ttl=None, write_consistency_level=None): """ Insert or update stored instances. `instances` should be a list containing instances of `cls` to store. """ insert_dict = dict( [(instance.key, self._get_instance_as_dict(instance)) for instance in instances] ) return ColumnFamily.batch_insert(self, insert_dict, timestamp=timestamp, ttl=ttl, write_consistency_level=write_consistency_level)
from pycassa.pool import ConnectionPool from pycassa.columnfamilymap import ColumnFamilyMap from pycassa.columnfamily import ColumnFamily if __name__ == '__main__': #['10.15.62.100:9160','10.15.62.101:9160','10.15.62.102:9160'] pool = ConnectionPool('Cassandra_Test',['10.107.4.187:9160']) print pool # cf_map = ColumnFamilyMap(User, pool, 'Users') col_fam = ColumnFamily(pool, 'Users') print col_fam.get('author') print col_fam.get_count('author') col_fam.insert('row_key', {'col_name': 'col_val'}) col_fam.insert('row_key', {'col_name':'col_val', 'col_name2':'col_val2'}) col_fam.batch_insert({'row1': {'name1': 'val1', 'name2': 'val2'},'row2': {'foo': 'bar'}}) #col_fam.insert('super_key', {'key':{'col_name':'col_val', 'col_name2':'col_val2'}}) print col_fam.get_count('row_key', columns=['foo', 'bar']) print col_fam.get_count('row_key', column_start='foo') print col_fam.multiget_count(['fib0', 'fib1', 'fib2', 'fib3', 'fib4']) print col_fam.multiget_count(['fib0', 'fib1', 'fib2', 'fib3', 'fib4'],columns=['col1', 'col2', 'col3']) print col_fam.multiget_count(['fib0', 'fib1', 'fib2', 'fib3', 'fib4'],column_start='col1', column_finish='col3') print col_fam.get_count('row_key') print col_fam.get('row_key') print col_fam.get('author') print col_fam.get('row_key', columns=['col_name', 'col_name2']) print col_fam.get('row_key', column_reversed=True, column_count=3) print col_fam.multiget(['row1', 'row2']) for i in range(1, 10): col_fam.insert('row_key', {str(i): 'val'}) print col_fam.get('row_key', column_start='5', column_finish='7')
#pool = ConnectionPool('entries', server_list=server_list, pool_size=20) pool = ConnectionPool('entries', server_list=server_list) ############################## Create Column Family ########################### sys.create_column_family('entries', 'Author', comparator_type=UTF8_TYPE) author_cf = ColumnFamily(pool, 'Author') ################################ INSERT ####################################### # Insert a row with a Column author_cf.insert('sacharya', {'first_name': 'Sudarshan'}) # Insert a row with multiple columns author_cf.insert('sacharya1', {'first_name': 'Sudarshan', 'last_name': 'Acharya'}) # Insert multiple rows author_cf.batch_insert({'rowkey1': {'first_name': 'Sudarshan', 'last_name': 'Acharya'}, 'rowkey2': {'first_name': 'Sudarshan', 'last_name': 'Acharya'}}) # Insert lots of individual rows for i in range(100): author_cf.insert('sacharya'+str(i), {'first_name': 'sudarshan'+ str(i)}) author_cf.insert('1000', {'1': '1'}) time.sleep(5) ################################### GET ####################################### # Get the row for the rowkey authors = author_cf.get('sacharya') print authors # Get value for column print "Get value for column"
import pycassa import time import json from elasticsearch import Elasticsearch from datetime import date from pprint import pprint from pycassa.pool import ConnectionPool from pycassa.columnfamily import ColumnFamily pool=ConnectionPool('Twitter',['localhost:9160']) col_fam=ColumnFamily(pool,'Tweets') #s={'article3': {'author': 'Monam ', 'data': 'ye kya bc kaat rhe hai hum opps BC!!!!!!','time_of_addition': time.strftime("%c") },'article4': { 'author': 'Kriti' ,'data' : 'yaar bakwass hai ye kuch ni hona iss project ka','time_of_addition':time.strftime("%c")}} #print s; print final[n]; s=final; print s[n]; col_fam.batch_insert(s) es=Elasticsearch(["localhost:9200", "localhost:9200"]) count=1; for key in s.keys(): #print key #print s[key]['data'] content1=s[key]['Tweet_Data']; # l=content1.split() #print l; for word in re.findall(r"\w+", content1): word=word.lower(); t={"name":"'"+word+"'",'docid': "'"+key+"'"} #print t # res=es.index(index="twitter", doc_type=word, id=count, body=t) os.system("curl -XPUT http://localhost:9200/twitter/tweet/"+str(count) +" -d '"+str(t)+"' >> out");
readData = col_fam.get('key3') col_fam.insert('key3', {'Column5':'PycassaData2', 'Column6':'PycassaData3'}) readData = col_fam.get('key3') from pycassa.types import * col_fam.column_validators['IntColumn5'] = IntegerType() col_fam.column_validators['IntColumn6'] = IntegerType() col_fam.insert('intData', {'IntColumn5':5, 'IntColumn6':6}) print col_fam.get('intData') # OrderedDict([('IntColumn5', 5), ('IntColumn6', 6)]) #Batch operations col_fam.batch_insert({'key4': {'Column1': 'PycassaData4', 'Column2': 'PycassaData5', 'Column3': 'PycassaData6', 'Column4': 'PycassaData7', 'Column5': 'PycassaData8'}, 'key5': {'Column7': 'PycassaData9'}}) readData = col_fam.multiget(['key3', 'key4', 'key5']) readData = col_fam.multiget(['key3', 'key4', 'key5'], columns=['Column1', 'Column7']) #Column Slices readData = col_fam.get('key4', column_start='Column2', column_finish='Column4') readData = col_fam.get('key4', column_reversed=True, column_count=3) #Types # from pycassa.types import * # class User(object): # key = AsciiType()