def _connect(): """ Connect to the node and keyspace :return: """ auth_provider = None if args.username is not None and args.password is not None: auth_provider = PlainTextAuthProvider( username=args.username, password=args.password) cluster = Cluster([args.ip], protocol_version=args.protocol_version, auth_provider=auth_provider, port=int(args.port)) # connect to keyspace try: cluster = cluster.connect(args.keyspace) except InvalidRequest: _incorrect("Keyspace " + args.keyspace + "does not exist.") except Exception: _incorrect("Unable to connect to host " + args.ip + " using port " + str(args.port) + " or username/password is incorrect.") cluster.default_timeout = int(args.timeout) return cluster
def initialize_connection(self): cluster = self.host+':'+self.thriftport pool = ConnectionPool(self.keyspace, [cluster], timeout=30) col_fam = ColumnFamily(pool, self.source) session = Cluster(contact_points=[self.host], port=self.port).connect(keyspace=self.keyspace) session.default_timeout=30 # configuring spark with cassandra keyspace and columnfamily rdd = self.sc.cassandraTable(self.keyspace, self.source).cache() val = self.get_key(self.db, session) if val == 1: time_rdd = rdd.select("timestamp", "key").groupByKey().collect() # collecting rows in rdd grouped by timestamp else: time = self.get_timestamp(session,self.dest,val) time_rdd = rdd.select("timestamp", "key").filter(lambda row: row.timestamp > time).groupByKey().collect() # function call self.create_table(self.dest, session) batch = BatchStatement() # preparing a batchstatement row_count = 1 for timekey in time_rdd: print row_count # function calls process = retrieve_count(timekey[1], timekey[0], col_fam) # create instance for the class retrieve_count fields = process.retrieve_fields() insert = batch_insert(batch, fields, self.dest, session) # create instance for the class batch_insert batches = insert.batch_prepare() if row_count % 100 == 0: # inserting in batches of 100 insert.insert_fields() batch = BatchStatement() # creating a fresh batch row_count = row_count + 1 insert.insert_fields() # inserting the final batch return 1
def insert_cassandra(self, session, source, country, country_count): query = "SELECT id FROM main_count" print query session2 = Cluster(contact_points=[self.host], port=self.port).connect(keyspace=self.keyspace) session2.default_timeout = 100 data = session2.execute(query) for row in data: uid = row[0] # retrieving the id from the source table and updating the country_count (list) and country (list) columns print len(country), len(country_count) session.execute("UPDATE "+source+" SET country=%s, country_count=%s WHERE id=%s", parameters=[country, country_count, uid]) return 1
def initialize_connection(self): session = Cluster(contact_points=[self.host], port=self.port).connect(keyspace=self.keyspace) session.default_timeout = 100 query = "SELECT host, id FROM "+self.table statement = SimpleStatement(query) getdata = session.execute(statement) hosts, id_val, count = [], [], 0 count_list = {} for data in getdata: value = str(data[0]).strip() id_val.append(data[1]) if value.find(',') == -1: hosts.append(value) else: hosts.append(value.split(",")[0]) count += 1 print count #create instance for the class retrieve_location process = retrieve_location() # function calls count_list = process.get_location(session, self.table, hosts, id_val) self.insert_cassandra(session, self.source, count_list.keys(), count_list.values()) return 1
import cassandra from cassandra.cluster import SimpleStatement, Cluster import timeit,sys cluster = Cluster(['10.1.0.104', '10.1.0.105', '127.0.0.1'], port=9233) cluster.default_timeout = None session = cluster.connect('group3alt') # keyspace should be our own indexsearch = False #turn this to true when you want to do looping queries for group by print cluster.metadata.cluster_name # should make sure this is group3 print cassandra.__version__,"\n" try: Consist_Level = int(sys.argv[1]) except: Consist_Level = 1 program_st = timeit.default_timer() #====================================================================== # QUERY 1 #====================================================================== query = SimpleStatement(""" SELECT count (*) as ten_atomic FROM cdr WHERE (MSC_CODE ,CITY_ID,SERVICE_NODE_ID ,RUM_DATA_NUM ,DUP_SEQ_NUM ,SEIZ_CELL_NUM ,FLOW_DATA_INC ,SUB_HOME_INT_PRI ,CON_OHM_NUM, SESS_SFC) > (10000,10000,10000,3,10000,1,10000,10000,10000,10000) AND (MSC_CODE ,CITY_ID,SERVICE_NODE_ID ,RUM_DATA_NUM ,DUP_SEQ_NUM ,SEIZ_CELL_NUM ,FLOW_DATA_INC ,SUB_HOME_INT_PRI ,CON_OHM_NUM, SESS_SFC)
def current_time(): return (datetime.now() - datetime(1970, 1, 1)).total_seconds() for lines in args.lines: for columns in args.columns: initial_size = 0 print("Deleting previous data") try: subprocess.check_output("rm " + args.queue_dir + " -R", shell=True) except subprocess.CalledProcessError: pass try: session = Cluster(['localhost']).connect('kairosdb') session.default_timeout = 1200 session.execute('drop keyspace kairosdb;') print("Data deleted.") except NoHostAvailable: print("No previous data exists. Skipping delete.") print("Starting Kairosdb") kairos = subprocess.Popen([args.kairos_path, "run"], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL) time.sleep(4) while True: try: subprocess.check_output("curl localhost:8080", shell=True, stderr=subprocess.DEVNULL) except subprocess.CalledProcessError:
import logging import time from uuid import UUID from cassandra.cluster import Cluster import cassandra LOG = logging.getLogger(__name__) PTM = PhysicalTopologyManager("../topologies/mmm_physical_test_tracing.yaml") VTM = VirtualTopologyManager("../topologies/mmm_virtual_test_tracing.yaml") BM = BindingManager(PTM, VTM) cass_host = get_container_by_hostname("cassandra1").get_ip_address() cassandra = Cluster([cass_host]).connect() cassandra.default_timeout = 60.0 binding_multihost = { "description": "spanning across multiple MMs", "bindings": [ {"binding": {"device_name": "bridge-000-001", "port_id": 2, "host_id": 1, "interface_id": 1}}, {"binding": {"device_name": "bridge-000-002", "port_id": 2, "host_id": 2, "interface_id": 2}}, ], } def set_filters(router_name, inbound_filter_name, outbound_filter_name): """Sets in-/out-bound filters to a router.""" router = VTM.get_router(router_name) inbound_filter = None
import logging import time from uuid import UUID from cassandra.cluster import Cluster import cassandra LOG = logging.getLogger(__name__) PTM = PhysicalTopologyManager('../topologies/mmm_physical_test_tracing.yaml') VTM = VirtualTopologyManager('../topologies/mmm_virtual_test_tracing.yaml') BM = BindingManager(PTM, VTM) cass_host = get_container_by_hostname('cassandra1').get_ip_address() cassandra = Cluster([cass_host]).connect() cassandra.default_timeout = 60.0 binding_multihost = { 'description': 'spanning across multiple MMs', 'bindings': [ { 'binding': { 'device_name': 'bridge-000-001', 'port_id': 2, 'host_id': 1, 'interface_id': 1 } }, { 'binding': {