Example #1
0
class CassandraRepository(object):
    def __init__(self, keyspace, column_family_name):
        self.pool = ConnectionPool(keyspace, cassandra_settings.NODE_POOL)

        self.cf = ColumnFamily(self.pool, column_family_name)

        self.batch = {}

    def add_batch(self, batch, start_time=None):
        """
        :param batch:
        """

        self.cf.batch_insert(batch)
        if start_time is not None:
            print 'time to  insert batch: %s ms' % (int(time.time() * 1000) -
                                                    start_time)

    def get(self, timestamp):
        return self.cf.get(str(timestamp))

    def get_range(self, start, end):
        return list(self.cf.get_range(start=str(start), finish=str(end)))

    def close(self):
        self.sys.close()
class CassandraRepository(object):
    def __init__(self, keyspace, column_family_name):
        self.pool = ConnectionPool(keyspace,
                                   cassandra_settings.NODE_POOL)

        self.cf = ColumnFamily(self.pool, column_family_name)

        self.batch = {}

    def add_batch(self, batch, start_time=None):
        """
        :param batch:
        """

        self.cf.batch_insert(batch)
        if start_time is not None:
            print 'time to  insert batch: %s ms' % (int(time.time() * 1000) - start_time)


    def get(self, timestamp):
        return self.cf.get(str(timestamp))

    def get_range(self, start, end):
        return list(self.cf.get_range(start=str(start), finish=str(end)))

    def close(self):
        self.sys.close()
Example #3
0
    def batch_insert(self, instances, timestamp=None, ttl=None,
            write_consistency_level=None):
        """
        Insert or update stored instances.

        `instances` should be a list containing instances of `cls` to store.
        """
        insert_dict = dict(
            [(instance.key, self._get_instance_as_dict(instance))
                for instance in instances]
        )
        return ColumnFamily.batch_insert(self, insert_dict,
                timestamp=timestamp, ttl=ttl,
                write_consistency_level=write_consistency_level)
Example #4
0
from pycassa.pool import ConnectionPool
from pycassa.columnfamilymap import ColumnFamilyMap
from pycassa.columnfamily import ColumnFamily


if __name__ == '__main__':
    #['10.15.62.100:9160','10.15.62.101:9160','10.15.62.102:9160'] 
    pool = ConnectionPool('Cassandra_Test',['10.107.4.187:9160'])
    print pool
#    cf_map = ColumnFamilyMap(User, pool, 'Users')
    col_fam =  ColumnFamily(pool, 'Users')
    print col_fam.get('author')
    print col_fam.get_count('author')
    col_fam.insert('row_key', {'col_name': 'col_val'})
    col_fam.insert('row_key', {'col_name':'col_val', 'col_name2':'col_val2'})
    col_fam.batch_insert({'row1': {'name1': 'val1', 'name2': 'val2'},'row2': {'foo': 'bar'}})
    #col_fam.insert('super_key', {'key':{'col_name':'col_val', 'col_name2':'col_val2'}})
    print col_fam.get_count('row_key', columns=['foo', 'bar'])
    print col_fam.get_count('row_key', column_start='foo') 
    print col_fam.multiget_count(['fib0', 'fib1', 'fib2', 'fib3', 'fib4'])
    print col_fam.multiget_count(['fib0', 'fib1', 'fib2', 'fib3', 'fib4'],columns=['col1', 'col2', 'col3'])
    print col_fam.multiget_count(['fib0', 'fib1', 'fib2', 'fib3', 'fib4'],column_start='col1', column_finish='col3')
    print col_fam.get_count('row_key')
    print col_fam.get('row_key')
    print col_fam.get('author')
    print col_fam.get('row_key', columns=['col_name', 'col_name2'])
    print col_fam.get('row_key', column_reversed=True, column_count=3)
    print col_fam.multiget(['row1', 'row2'])
    for i in range(1, 10):
        col_fam.insert('row_key', {str(i): 'val'})
    print col_fam.get('row_key', column_start='5', column_finish='7')
#pool = ConnectionPool('entries', server_list=server_list, pool_size=20)
pool = ConnectionPool('entries', server_list=server_list)

############################## Create Column Family ###########################
sys.create_column_family('entries', 'Author', comparator_type=UTF8_TYPE)
author_cf = ColumnFamily(pool, 'Author')

################################ INSERT #######################################
# Insert a row with a Column
author_cf.insert('sacharya', {'first_name': 'Sudarshan'})
# Insert a row with multiple columns
author_cf.insert('sacharya1', {'first_name': 'Sudarshan', 'last_name':
'Acharya'})
# Insert multiple rows
author_cf.batch_insert({'rowkey1': {'first_name': 'Sudarshan', 'last_name':
'Acharya'},
                'rowkey2': {'first_name': 'Sudarshan', 'last_name': 'Acharya'}})
# Insert lots of individual rows
for i in range(100):
    author_cf.insert('sacharya'+str(i), {'first_name': 'sudarshan'+ str(i)})

author_cf.insert('1000', {'1': '1'})
time.sleep(5)

################################### GET #######################################
# Get the row for the rowkey
authors = author_cf.get('sacharya')
print authors

# Get value for column
print "Get value for column"
Example #6
0
import pycassa
import time
import json
from elasticsearch import Elasticsearch
from datetime import date
from pprint import pprint
from pycassa.pool import ConnectionPool
from pycassa.columnfamily import ColumnFamily
pool=ConnectionPool('Twitter',['localhost:9160'])
col_fam=ColumnFamily(pool,'Tweets')
#s={'article3': {'author': 'Monam ', 'data': 'ye kya bc kaat rhe hai hum opps BC!!!!!!','time_of_addition': time.strftime("%c") },'article4': { 'author': 'Kriti' ,'data' : 'yaar  bakwass hai ye kuch ni hona iss project ka','time_of_addition':time.strftime("%c")}}
#print s;
print final[n];
s=final;
print s[n];
col_fam.batch_insert(s)

es=Elasticsearch(["localhost:9200", "localhost:9200"])
count=1;
for key in s.keys():
	#print key
	#print s[key]['data']
	content1=s[key]['Tweet_Data'];
#	l=content1.split()
	#print l;
	for word in re.findall(r"\w+", content1):
		word=word.lower();
		t={"name":"'"+word+"'",'docid': "'"+key+"'"}
	#print t
#	res=es.index(index="twitter", doc_type=word, id=count, body=t)
		os.system("curl -XPUT http://localhost:9200/twitter/tweet/"+str(count) +" -d '"+str(t)+"' >> out");
Example #7
0
    readData = col_fam.get('key3')    
    col_fam.insert('key3', {'Column5':'PycassaData2', 'Column6':'PycassaData3'})
    readData = col_fam.get('key3')

    from pycassa.types import *
    col_fam.column_validators['IntColumn5'] = IntegerType()
    col_fam.column_validators['IntColumn6'] = IntegerType()    
    col_fam.insert('intData', {'IntColumn5':5, 'IntColumn6':6})
    print col_fam.get('intData')
    # OrderedDict([('IntColumn5', 5), ('IntColumn6', 6)])
    
#Batch operations

    col_fam.batch_insert({'key4': {'Column1': 'PycassaData4', 
                                   'Column2': 'PycassaData5',
                                   'Column3': 'PycassaData6',
                                   'Column4': 'PycassaData7',
                                   'Column5': 'PycassaData8'},
                          'key5': {'Column7': 'PycassaData9'}})
    readData = col_fam.multiget(['key3', 'key4', 'key5'])
    readData = col_fam.multiget(['key3', 'key4', 'key5'], columns=['Column1', 'Column7'])

#Column Slices

    readData = col_fam.get('key4', column_start='Column2', column_finish='Column4')
    readData = col_fam.get('key4', column_reversed=True, column_count=3)    

#Types

#     from pycassa.types import *
#     class User(object):
#          key = AsciiType()