Exemple #1
0
class CassandraRepository(object):
    def __init__(self, keyspace, column_family_name):
        self.pool = ConnectionPool(keyspace, cassandra_settings.NODE_POOL)

        self.cf = ColumnFamily(self.pool, column_family_name)

        self.batch = {}

    def add_batch(self, batch, start_time=None):
        """
        :param batch:
        """

        self.cf.batch_insert(batch)
        if start_time is not None:
            print 'time to  insert batch: %s ms' % (int(time.time() * 1000) -
                                                    start_time)

    def get(self, timestamp):
        return self.cf.get(str(timestamp))

    def get_range(self, start, end):
        return list(self.cf.get_range(start=str(start), finish=str(end)))

    def close(self):
        self.sys.close()
class CassandraRepository(object):
    def __init__(self, keyspace, column_family_name):
        self.pool = ConnectionPool(keyspace,
                                   cassandra_settings.NODE_POOL)

        self.cf = ColumnFamily(self.pool, column_family_name)

        self.batch = {}

    def add_batch(self, batch, start_time=None):
        """
        :param batch:
        """

        self.cf.batch_insert(batch)
        if start_time is not None:
            print 'time to  insert batch: %s ms' % (int(time.time() * 1000) - start_time)


    def get(self, timestamp):
        return self.cf.get(str(timestamp))

    def get_range(self, start, end):
        return list(self.cf.get_range(start=str(start), finish=str(end)))

    def close(self):
        self.sys.close()
Exemple #3
0
    def batch_insert(self, instances, timestamp=None, ttl=None,
            write_consistency_level=None):
        """
        Insert or update stored instances.

        `instances` should be a list containing instances of `cls` to store.
        """
        insert_dict = dict(
            [(instance.key, self._get_instance_as_dict(instance))
                for instance in instances]
        )
        return ColumnFamily.batch_insert(self, insert_dict,
                timestamp=timestamp, ttl=ttl,
                write_consistency_level=write_consistency_level)
from pycassa.pool import ConnectionPool
from pycassa.columnfamilymap import ColumnFamilyMap
from pycassa.columnfamily import ColumnFamily


if __name__ == '__main__':
    #['10.15.62.100:9160','10.15.62.101:9160','10.15.62.102:9160'] 
    pool = ConnectionPool('Cassandra_Test',['10.107.4.187:9160'])
    print pool
#    cf_map = ColumnFamilyMap(User, pool, 'Users')
    col_fam =  ColumnFamily(pool, 'Users')
    print col_fam.get('author')
    print col_fam.get_count('author')
    col_fam.insert('row_key', {'col_name': 'col_val'})
    col_fam.insert('row_key', {'col_name':'col_val', 'col_name2':'col_val2'})
    col_fam.batch_insert({'row1': {'name1': 'val1', 'name2': 'val2'},'row2': {'foo': 'bar'}})
    #col_fam.insert('super_key', {'key':{'col_name':'col_val', 'col_name2':'col_val2'}})
    print col_fam.get_count('row_key', columns=['foo', 'bar'])
    print col_fam.get_count('row_key', column_start='foo') 
    print col_fam.multiget_count(['fib0', 'fib1', 'fib2', 'fib3', 'fib4'])
    print col_fam.multiget_count(['fib0', 'fib1', 'fib2', 'fib3', 'fib4'],columns=['col1', 'col2', 'col3'])
    print col_fam.multiget_count(['fib0', 'fib1', 'fib2', 'fib3', 'fib4'],column_start='col1', column_finish='col3')
    print col_fam.get_count('row_key')
    print col_fam.get('row_key')
    print col_fam.get('author')
    print col_fam.get('row_key', columns=['col_name', 'col_name2'])
    print col_fam.get('row_key', column_reversed=True, column_count=3)
    print col_fam.multiget(['row1', 'row2'])
    for i in range(1, 10):
        col_fam.insert('row_key', {str(i): 'val'})
    print col_fam.get('row_key', column_start='5', column_finish='7')
#pool = ConnectionPool('entries', server_list=server_list, pool_size=20)
pool = ConnectionPool('entries', server_list=server_list)

############################## Create Column Family ###########################
sys.create_column_family('entries', 'Author', comparator_type=UTF8_TYPE)
author_cf = ColumnFamily(pool, 'Author')

################################ INSERT #######################################
# Insert a row with a Column
author_cf.insert('sacharya', {'first_name': 'Sudarshan'})
# Insert a row with multiple columns
author_cf.insert('sacharya1', {'first_name': 'Sudarshan', 'last_name':
'Acharya'})
# Insert multiple rows
author_cf.batch_insert({'rowkey1': {'first_name': 'Sudarshan', 'last_name':
'Acharya'},
                'rowkey2': {'first_name': 'Sudarshan', 'last_name': 'Acharya'}})
# Insert lots of individual rows
for i in range(100):
    author_cf.insert('sacharya'+str(i), {'first_name': 'sudarshan'+ str(i)})

author_cf.insert('1000', {'1': '1'})
time.sleep(5)

################################### GET #######################################
# Get the row for the rowkey
authors = author_cf.get('sacharya')
print authors

# Get value for column
print "Get value for column"
import pycassa
import time
import json
from elasticsearch import Elasticsearch
from datetime import date
from pprint import pprint
from pycassa.pool import ConnectionPool
from pycassa.columnfamily import ColumnFamily
pool=ConnectionPool('Twitter',['localhost:9160'])
col_fam=ColumnFamily(pool,'Tweets')
#s={'article3': {'author': 'Monam ', 'data': 'ye kya bc kaat rhe hai hum opps BC!!!!!!','time_of_addition': time.strftime("%c") },'article4': { 'author': 'Kriti' ,'data' : 'yaar  bakwass hai ye kuch ni hona iss project ka','time_of_addition':time.strftime("%c")}}
#print s;
print final[n];
s=final;
print s[n];
col_fam.batch_insert(s)

es=Elasticsearch(["localhost:9200", "localhost:9200"])
count=1;
for key in s.keys():
	#print key
	#print s[key]['data']
	content1=s[key]['Tweet_Data'];
#	l=content1.split()
	#print l;
	for word in re.findall(r"\w+", content1):
		word=word.lower();
		t={"name":"'"+word+"'",'docid': "'"+key+"'"}
	#print t
#	res=es.index(index="twitter", doc_type=word, id=count, body=t)
		os.system("curl -XPUT http://localhost:9200/twitter/tweet/"+str(count) +" -d '"+str(t)+"' >> out");
Exemple #7
0
    readData = col_fam.get('key3')    
    col_fam.insert('key3', {'Column5':'PycassaData2', 'Column6':'PycassaData3'})
    readData = col_fam.get('key3')

    from pycassa.types import *
    col_fam.column_validators['IntColumn5'] = IntegerType()
    col_fam.column_validators['IntColumn6'] = IntegerType()    
    col_fam.insert('intData', {'IntColumn5':5, 'IntColumn6':6})
    print col_fam.get('intData')
    # OrderedDict([('IntColumn5', 5), ('IntColumn6', 6)])
    
#Batch operations

    col_fam.batch_insert({'key4': {'Column1': 'PycassaData4', 
                                   'Column2': 'PycassaData5',
                                   'Column3': 'PycassaData6',
                                   'Column4': 'PycassaData7',
                                   'Column5': 'PycassaData8'},
                          'key5': {'Column7': 'PycassaData9'}})
    readData = col_fam.multiget(['key3', 'key4', 'key5'])
    readData = col_fam.multiget(['key3', 'key4', 'key5'], columns=['Column1', 'Column7'])

#Column Slices

    readData = col_fam.get('key4', column_start='Column2', column_finish='Column4')
    readData = col_fam.get('key4', column_reversed=True, column_count=3)    

#Types

#     from pycassa.types import *
#     class User(object):
#          key = AsciiType()