def try_connecting(username='', password=''): """ Wait until can connect to cluster. When cluster starts up there is some time while it is not possible to connect to it even though Cassandra is listening on port 7000. Here we wait until we can actually issue successful Cluster.connect() method. :param username: optional user name for connection :param password: optional password for connection :return: True if can successfully connect to cluster within 2 minutes, False otherwise """ if username and password: ap = AuthenticationTests.get_authentication_provider(username, password) else: ap = None maxwait = 120 # in seconds sleeptime = 1 wait_time = 0 while wait_time < maxwait: try: cluster = Cluster(protocol_version=tests.integration.PROTOCOL_VERSION, auth_provider=ap) cluster.connect() log.debug("Can connect after %d seconds" % wait_time) return True except Exception: wait_time += sleeptime time.sleep(sleeptime) return False
def test_pool_management(self): # Ensure that in_flight and request_ids quiesce after cluster operations cluster = Cluster(protocol_version=PROTOCOL_VERSION, idle_heartbeat_interval=0) # no idle heartbeat here, pool management is tested in test_idle_heartbeat session = cluster.connect() session2 = cluster.connect() # prepare p = session.prepare("SELECT * FROM system.local WHERE key=?") self.assertTrue(session.execute(p, ('local',))) # simple self.assertTrue(session.execute("SELECT * FROM system.local WHERE key='local'")) # set keyspace session.set_keyspace('system') session.set_keyspace('system_traces') # use keyspace session.execute('USE system') session.execute('USE system_traces') # refresh schema cluster.refresh_schema_metadata() cluster.refresh_schema_metadata(max_schema_agreement_wait=0) # submit schema refresh future = cluster.submit_schema_refresh() future.result() assert_quiescent_pool_state(self, cluster) cluster.shutdown()
def test_submit_schema_refresh(self): """ Ensure new new schema is refreshed after submit_schema_refresh() """ cluster = Cluster(protocol_version=PROTOCOL_VERSION) cluster.connect() self.assertNotIn("newkeyspace", cluster.metadata.keyspaces) other_cluster = Cluster(protocol_version=PROTOCOL_VERSION) session = other_cluster.connect() session.execute( """ CREATE KEYSPACE newkeyspace WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'} """) future = cluster.submit_schema_refresh() future.result() self.assertIn("newkeyspace", cluster.metadata.keyspaces) session.execute("DROP KEYSPACE newkeyspace") cluster.shutdown() other_cluster.shutdown()
def connect_cassandra(): error = False cluster = Cluster([config.get('cassandra', 'db_host')], port=config.get('cassandra', 'db_port'), protocol_version=3, idle_heartbeat_interval=120) try: LOG.info("Connecting to Cassandra..") return cluster.connect(config.get('cassandra', 'keyspace')) except NoHostAvailable: error = True LOG.info("ERROR: Check Cassandra connection settings in conf") except InvalidRequest: LOG.info("ERROR: Could not find existing Cassandra keyspace. will create new one") try: db_connection = cluster.connect() CREATE_KEYSPACE = """ CREATE KEYSPACE %s WITH replication = {'class': '%s', 'replication_factor': %s } """ % (config.get('cassandra', 'keyspace'), config.get('cassandra', 'replication_strategy'), config.get('cassandra', 'replication_factor')) db_connection.execute(CREATE_KEYSPACE) db_connection.set_keyspace(config.get('cassandra', 'keyspace')) LOG.info("Created and session set to new keyspace: %s" % config.get('cassandra', 'keyspace')) return db_connection except SyntaxException: error = True LOG.info("ERROR: couldn't create new keyspace. check keyspace settings in conf. Exiting now.") raise except: error = True LOG.info("ERROR: something wrong with Cassandra connection") finally: if error: LOG.info("Exiting..") sys.exit(0)
def getEntrys(page,keyspaceid,columnfamilyid): cluster = Cluster() session = cluster.connect('system') rows = session.execute('select * from schema_keyspaces') info = [] if int(keyspaceid) < (len(rows) + 1) and int(keyspaceid) > 0: info = {} info['name'] = rows[int(keyspaceid) -1][0] keyspacename = rows[int(keyspaceid) -1][0] rows = session.execute("SELECT * FROM schema_columnfamilies where keyspace_name='" + info['name'] + "'") columnfamilyname = rows[int(columnfamilyid)-1][1] session = cluster.connect(rows[int(keyspaceid)-1][0]) rows = session.execute("SELECT * FROM " + columnfamilyname) info = rows rows = session.execute("SELECT * FROM system.schema_columns WHERE keyspace_name = '" + keyspacename + "' AND columnfamily_name = '" + columnfamilyname + "'") fields = [] for i in rows: fields.append(i) temp = fields[len(rows) - 1] fields[len(rows) - 1] = fields[0] fields[0] = temp temp = fields[1] fields[1] = fields[2] fields[2] = temp else: return render_template('error.html',error="Not a valid keyspaceid") pages = info[(page-1)*PER_PAGE:PER_PAGE*page] if not pages and page != 1: abort(404) pagination = Pagination(page, PER_PAGE, len(info)) return render_template('listentrys.html',pagination=pagination,keyspaceid=keyspaceid,columnfamilyid=columnfamilyid,pages=pages,fields=fields,section = 'getEntrys')
def test_raise_error_on_control_connection_timeout(self): """ Test for initial control connection timeout test_raise_error_on_control_connection_timeout tests that the driver times out after the set initial connection timeout. It first pauses node1, essentially making it unreachable. It then attempts to create a Cluster object via connecting to node1 with a timeout of 1 second, and ensures that a NoHostAvailable is raised, along with an OperationTimedOut for 1 second. @expected_errors NoHostAvailable When node1 is paused, and a connection attempt is made. @since 2.6.0 @jira_ticket PYTHON-206 @expected_result NoHostAvailable exception should be raised after 1 second. @test_category connection """ get_node(1).pause() cluster = Cluster(contact_points=['127.0.0.1'], protocol_version=PROTOCOL_VERSION, connect_timeout=1) with self.assertRaisesRegexp(NoHostAvailable, "OperationTimedOut\('errors=Timed out creating connection \(1 seconds\)"): cluster.connect() cluster.shutdown() get_node(1).resume()
def getEntrysInfo(keyspaceid,columnfamilyid,entryname): cluster = Cluster() session = cluster.connect('system') rows = session.execute('select * from schema_keyspaces') info = [] if int(keyspaceid) < (len(rows) + 1) and int(keyspaceid) > 0: info = {} info['name'] = rows[int(keyspaceid) -1][0] keyspacename = rows[int(keyspaceid) -1][0] rows = session.execute("SELECT * FROM schema_columnfamilies where keyspace_name='" + info['name'] + "'") columnfamilyname = rows[int(columnfamilyid)-1][1] primarykey = rows[int(columnfamilyid)-1] session = cluster.connect(rows[int(keyspaceid)-1][0]) primarykey = primarykey[17][2:] primarykey = primarykey[:-2] query = "SELECT * FROM " + columnfamilyname + " WHERE " + primarykey + "='" + entryname + "'" rows = session.execute(query) info = rows query = "SELECT * FROM system.schema_columns WHERE keyspace_name='" + keyspacename + "' AND columnfamily_name = '" + columnfamilyname + "'" rows = session.execute(query) fields = [] for i in rows: fields.append(i) temp = fields[len(rows) - 1] fields[len(rows) - 1] = fields[0] fields[0] = temp temp = fields[1] fields[1] = fields[2] fields[2] = temp return render_template('entryinfo.html',info=info,fields=fields,keyspaceid=keyspaceid,columnfamilyid=columnfamilyid,entryname=entryname)
def test_pool_with_host_down(self): """ Test to ensure that cluster.connect() doesn't return prior to pools being initialized. This test will figure out which host our pool logic will connect to first. It then shuts that server down. Previouly the cluster.connect() would return prior to the pools being initialized, and the first queries would return a no host exception @since 3.7.0 @jira_ticket PYTHON-617 @expected_result query should complete successfully @test_category connection """ # find the first node, we will try create connections to, shut it down. cluster = Cluster(protocol_version=PROTOCOL_VERSION) cluster.connect() hosts = cluster.metadata.all_hosts() address = hosts[0].address node_to_stop = int(address.split('.')[-1:][0]) try: force_stop(node_to_stop) wait_for_down(cluster, node_to_stop) # Attempt a query against that node. It should complete cluster2 = Cluster(protocol_version=PROTOCOL_VERSION) session2 = cluster2.connect() session2.execute("SELECT * FROM system.local") cluster2.shutdown() finally: start(node_to_stop) wait_for_up(cluster, node_to_stop) cluster.shutdown()
def test_invalid_protocol_negotation(self): """ Test for protocol negotiation when explicit versions are set If an explicit protocol version that is not compatible with the server version is set an exception should be thrown. It should not attempt to negotiate for reference supported protocol version to server versions is as follows/ 1.2 -> 1 2.0 -> 2, 1 2.1 -> 3, 2, 1 2.2 -> 4, 3, 2, 1 3.X -> 4, 3 @since 3.6.0 @jira_ticket PYTHON-537 @expected_result downgrading should not be allowed when explicit protocol versions are set. @test_category connection """ upper_bound = get_unsupported_upper_protocol() if upper_bound is not None: cluster = Cluster(protocol_version=upper_bound) with self.assertRaises(NoHostAvailable): cluster.connect() cluster.shutdown() lower_bound = get_unsupported_lower_protocol() if lower_bound is not None: cluster = Cluster(protocol_version=lower_bound) with self.assertRaises(NoHostAvailable): cluster.connect() cluster.shutdown()
def makeConnection(): ip_address = findIP() notResolved = True while notResolved: notResolved=False try: userpass = findUserPass() ap = PlainTextAuthProvider(username=userpass[0], password=userpass[1]) bCluster=Cluster([ip_address],connection_class=AsyncoreConnection,auth_provider=ap) bSpace = bCluster.connect() except Exception as er: redFlag = ['AuthenticationFailed','username','password','incorrect'] test = filter(lambda x: x.lower() in str(er).lower(), redFlag) if len(test)==len(redFlag): #all redFlags words exists on message print 'provided username doesnt work. trying default:' ap = PlainTextAuthProvider(username='******', password='******') try: bCluster=Cluster([ip_address],connection_class=AsyncoreConnection,auth_provider=ap) bSpace=bCluster.connect() bSpace.execute("ALTER USER cassandra with password 'merogharanuwakotmaparchhatimrokahaparchha'") except Exception as er: print er ap = PlainTextAuthProvider(username='******', password='******') bCluster=Cluster([ip_address],connection_class=AsyncoreConnection,auth_provider=ap) bSpace=bCluster.connect() bSpace.execute("CREATE USER %s with password '%s' SUPERUSER" % (userpass[0],userpass[1])) print ('The username and password created. Now trying login again') bCluster.shutdown() notResolved=True else: raise return bCluster, bSpace
def test_can_register_udt_before_connecting(self): """ Test the registration of UDTs before session creation """ c = Cluster(protocol_version=PROTOCOL_VERSION) s = c.connect() s.execute( """ CREATE KEYSPACE udt_test_register_before_connecting WITH replication = { 'class' : 'SimpleStrategy', 'replication_factor': '1' } """ ) s.set_keyspace("udt_test_register_before_connecting") s.execute("CREATE TYPE user (age int, name text)") s.execute("CREATE TABLE mytable (a int PRIMARY KEY, b frozen<user>)") s.execute( """ CREATE KEYSPACE udt_test_register_before_connecting2 WITH replication = { 'class' : 'SimpleStrategy', 'replication_factor': '1' } """ ) s.set_keyspace("udt_test_register_before_connecting2") s.execute("CREATE TYPE user (state text, is_cool boolean)") s.execute("CREATE TABLE mytable (a int PRIMARY KEY, b frozen<user>)") # now that types are defined, shutdown and re-create Cluster c.shutdown() c = Cluster(protocol_version=PROTOCOL_VERSION) User1 = namedtuple("user", ("age", "name")) User2 = namedtuple("user", ("state", "is_cool")) c.register_user_type("udt_test_register_before_connecting", "user", User1) c.register_user_type("udt_test_register_before_connecting2", "user", User2) s = c.connect() s.set_keyspace("udt_test_register_before_connecting") s.execute("INSERT INTO mytable (a, b) VALUES (%s, %s)", (0, User1(42, "bob"))) result = s.execute("SELECT b FROM mytable WHERE a=0") self.assertEqual(1, len(result)) row = result[0] self.assertEqual(42, row.b.age) self.assertEqual("bob", row.b.name) self.assertTrue(type(row.b) is User1) # use the same UDT name in a different keyspace s.set_keyspace("udt_test_register_before_connecting2") s.execute("INSERT INTO mytable (a, b) VALUES (%s, %s)", (0, User2("Texas", True))) result = s.execute("SELECT b FROM mytable WHERE a=0") self.assertEqual(1, len(result)) row = result[0] self.assertEqual("Texas", row.b.state) self.assertEqual(True, row.b.is_cool) self.assertTrue(type(row.b) is User2) c.shutdown()
def test_cannot_connect_with_bad_client_auth(self): """ Test to validate that we cannot connect with invalid client auth. This test will use bad keys/certs to preform client authentication. It will then attempt to connect to a server that has client authentication enabled. @since 2.7.0 @expected_result The client will throw an exception on connect @test_category connection:ssl """ # Setup absolute paths to key/cert files abs_path_ca_cert_path = os.path.abspath(CLIENT_CA_CERTS) abs_driver_keyfile = os.path.abspath(DRIVER_KEYFILE) abs_driver_certfile = os.path.abspath(DRIVER_CERTFILE_BAD) cluster = Cluster(protocol_version=PROTOCOL_VERSION, ssl_options={'ca_certs': abs_path_ca_cert_path, 'ssl_version': ssl.PROTOCOL_TLSv1, 'keyfile': abs_driver_keyfile, 'certfile': abs_driver_certfile}) with self.assertRaises(NoHostAvailable) as context: cluster.connect() cluster.shutdown()
def copy_model(**kwargs): conf = Configuration('global').configuration cluster_source = Cluster(conf['cassandra']['hosts']) source = cluster_source.connect(conf['cassandra']['keyspace']) source.row_factory = dict_factory cluster_dest = Cluster(conf['new_cassandra']['hosts']) dest = cluster_dest.connect(conf['new_cassandra']['keyspace']) table = kwargs['model'].lower() fetch_size = kwargs.get('fetch_size', 100) query = "SELECT * FROM {0}".format(table) if 'where' in kwargs and kwargs['where']: query = "{0} WHERE {1} ALLOW FILTERING".format(query, kwargs['where']) statement = SimpleStatement(query, fetch_size=fetch_size) insert_query = "INSERT INTO {0} ({1}) VALUES ({2})" cpt = 0 insert = None for row in source.execute(statement): if cpt == 0: columns = ['"{}"'.format(x) for x in row.keys()] binds = ['?' for x in range(0, len(columns))] insert_str = insert_query.format(table, ','.join(columns), ','.join(binds)) insert = dest.prepare(insert_str) bound = insert.bind(row.values()) dest.execute(bound) cpt += 1 print('Copy of {} records from {}'.format(cpt, table)) return cpt
def test_session_no_cluster(self): """ Test session context without cluster context. @since 3.4 @jira_ticket PYTHON-521 @expected_result session should be created correctly. Session should shutdown correctly outside of context @test_category configuration """ cluster = Cluster(**self.cluster_kwargs) unmanaged_session = cluster.connect() with cluster.connect() as session: self.assertFalse(cluster.is_shutdown) self.assertFalse(session.is_shutdown) self.assertFalse(unmanaged_session.is_shutdown) self.assertTrue(session.execute('select release_version from system.local')[0]) self.assertTrue(session.is_shutdown) self.assertFalse(cluster.is_shutdown) self.assertFalse(unmanaged_session.is_shutdown) unmanaged_session.shutdown() self.assertTrue(unmanaged_session.is_shutdown) self.assertFalse(cluster.is_shutdown) cluster.shutdown() self.assertTrue(cluster.is_shutdown)
def test_for_schema_disagreement_attribute(self): """ Tests to ensure that schema disagreement is properly surfaced on the response future. Creates and destroys keyspaces/tables with various schema agreement timeouts set. First part runs cql create/drop cmds with schema agreement set in such away were it will be impossible for agreement to occur during timeout. It then validates that the correct value is set on the result. Second part ensures that when schema agreement occurs, that the result set reflects that appropriately @since 3.1.0 @jira_ticket PYTHON-458 @expected_result is_schema_agreed is set appropriately on response thefuture @test_category schema """ # This should yield a schema disagreement cluster = Cluster(protocol_version=PROTOCOL_VERSION, max_schema_agreement_wait=0.001) session = cluster.connect(wait_for_all_pools=True) rs = session.execute("CREATE KEYSPACE test_schema_disagreement WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1}") self.check_and_wait_for_agreement(session, rs, False) rs = session.execute("CREATE TABLE test_schema_disagreement.cf (key int PRIMARY KEY, value int)") self.check_and_wait_for_agreement(session, rs, False) rs = session.execute("DROP KEYSPACE test_schema_disagreement") self.check_and_wait_for_agreement(session, rs, False) # These should have schema agreement cluster = Cluster(protocol_version=PROTOCOL_VERSION, max_schema_agreement_wait=100) session = cluster.connect() rs = session.execute("CREATE KEYSPACE test_schema_disagreement WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1}") self.check_and_wait_for_agreement(session, rs, True) rs = session.execute("CREATE TABLE test_schema_disagreement.cf (key int PRIMARY KEY, value int)") self.check_and_wait_for_agreement(session, rs, True) rs = session.execute("DROP KEYSPACE test_schema_disagreement") self.check_and_wait_for_agreement(session, rs, True)
class ConnectionTest(BaseCassEngTestCase): @classmethod def setUpClass(cls): cls.original_cluster = connection.get_cluster() cls.keyspace1 = 'ctest1' cls.keyspace2 = 'ctest2' super(ConnectionTest, cls).setUpClass() cls.setup_cluster = Cluster(protocol_version=PROTOCOL_VERSION) cls.setup_session = cls.setup_cluster.connect() ddl = "CREATE KEYSPACE {0} WITH replication = {{'class': 'SimpleStrategy', 'replication_factor': '{1}'}}".format(cls.keyspace1, 1) execute_with_long_wait_retry(cls.setup_session, ddl) ddl = "CREATE KEYSPACE {0} WITH replication = {{'class': 'SimpleStrategy', 'replication_factor': '{1}'}}".format(cls.keyspace2, 1) execute_with_long_wait_retry(cls.setup_session, ddl) @classmethod def tearDownClass(cls): execute_with_long_wait_retry(cls.setup_session, "DROP KEYSPACE {0}".format(cls.keyspace1)) execute_with_long_wait_retry(cls.setup_session, "DROP KEYSPACE {0}".format(cls.keyspace2)) models.DEFAULT_KEYSPACE = DEFAULT_KEYSPACE cls.original_cluster.shutdown() cls.setup_cluster.shutdown() setup_connection(DEFAULT_KEYSPACE) models.DEFAULT_KEYSPACE def setUp(self): self.c = Cluster(protocol_version=PROTOCOL_VERSION) self.session1 = self.c.connect(keyspace=self.keyspace1) self.session1.row_factory = dict_factory self.session2 = self.c.connect(keyspace=self.keyspace2) self.session2.row_factory = dict_factory def tearDown(self): self.c.shutdown() def test_connection_session_switch(self): """ Test to ensure that when the default keyspace is changed in a session and that session, is set in the connection class, that the new defaul keyspace is honored. @since 3.1 @jira_ticket PYTHON-486 @expected_result CQLENGINE adopts whatever keyspace is passed in vai the set_session method as default @test_category object_mapper """ connection.set_session(self.session1) sync_table(TestConnectModel) TCM1 = TestConnectModel.create(id=1, keyspace=self.keyspace1) connection.set_session(self.session2) sync_table(TestConnectModel) TCM2 = TestConnectModel.create(id=1, keyspace=self.keyspace2) connection.set_session(self.session1) self.assertEqual(1, TestConnectModel.objects.count()) self.assertEqual(TestConnectModel.objects.first(), TCM1) connection.set_session(self.session2) self.assertEqual(1, TestConnectModel.objects.count()) self.assertEqual(TestConnectModel.objects.first(), TCM2)
def connect(self): """ Connect to Cassandra cluster :return: """ cluster = Cluster() self.sensor_data_session = cluster.connect(SENSOR_DATA_KEYSPACE) self.analytics_session = cluster.connect(ANALYTICS_KEYSPACE)
def test_non_existing_types(self): c = Cluster(protocol_version=PROTOCOL_VERSION) c.connect() User = namedtuple('user', ('age', 'name')) self.assertRaises(UserTypeDoesNotExist, c.register_user_type, "some_bad_keyspace", "user", User) self.assertRaises(UserTypeDoesNotExist, c.register_user_type, "system", "user", User) c.shutdown()
def test_idle_heartbeat(self): interval = 1 cluster = Cluster(protocol_version=PROTOCOL_VERSION, idle_heartbeat_interval=interval) if PROTOCOL_VERSION < 3: cluster.set_core_connections_per_host(HostDistance.LOCAL, 1) session = cluster.connect() # This test relies on impl details of connection req id management to see if heartbeats # are being sent. May need update if impl is changed connection_request_ids = {} for h in cluster.get_connection_holders(): for c in h.get_connections(): # make sure none are idle (should have startup messages) self.assertFalse(c.is_idle) with c.lock: connection_request_ids[id(c)] = deque(c.request_ids) # copy of request ids # let two heatbeat intervals pass (first one had startup messages in it) time.sleep(2 * interval + interval/10.) connections = [c for holders in cluster.get_connection_holders() for c in holders.get_connections()] # make sure requests were sent on all connections for c in connections: expected_ids = connection_request_ids[id(c)] expected_ids.rotate(-1) with c.lock: self.assertListEqual(list(c.request_ids), list(expected_ids)) # assert idle status self.assertTrue(all(c.is_idle for c in connections)) # send messages on all connections statements_and_params = [("SELECT release_version FROM system.local", ())] * len(cluster.metadata.all_hosts()) results = execute_concurrent(session, statements_and_params) for success, result in results: self.assertTrue(success) # assert not idle status self.assertFalse(any(c.is_idle if not c.is_control_connection else False for c in connections)) # holders include session pools and cc holders = cluster.get_connection_holders() self.assertIn(cluster.control_connection, holders) self.assertEqual(len(holders), len(cluster.metadata.all_hosts()) + 1) # hosts pools, 1 for cc # include additional sessions session2 = cluster.connect() holders = cluster.get_connection_holders() self.assertIn(cluster.control_connection, holders) self.assertEqual(len(holders), 2 * len(cluster.metadata.all_hosts()) + 1) # 2 sessions' hosts pools, 1 for cc cluster._idle_heartbeat.stop() cluster._idle_heartbeat.join() assert_quiescent_pool_state(self, cluster) cluster.shutdown()
def test_can_shutdown_asyncoreconnection_subclass(self): start_and_prime_singledc() class ExtendedConnection(AsyncoreConnection): pass cluster = Cluster(contact_points=["127.0.0.2"], connection_class=ExtendedConnection) cluster.connect() cluster.shutdown()
def getclustsess(self, keyspace=None): """ Return a Cluster instance and a session object """ cluster = Cluster([self.chost]) # Cluster(['192.168.1.1', '192.168.1.2']) if keyspace: session = cluster.connect() session.set_keyspace(keyspace) else: session = cluster.connect() return cluster, session
def test_export_schema(self): """ Test export schema functionality """ cluster = Cluster() cluster.connect() self.assertIsInstance(cluster.metadata.export_schema_as_string(), unicode)
def test_export_schema(self): """ Test export schema functionality """ cluster = Cluster(protocol_version=PROTOCOL_VERSION) cluster.connect() self.assertIsInstance(cluster.metadata.export_schema_as_string(), six.string_types)
def test_token(self): expected_node_count = len(get_cluster().nodes) cluster = Cluster(protocol_version=PROTOCOL_VERSION) cluster.connect() tmap = cluster.metadata.token_map self.assertTrue(issubclass(tmap.token_class, Token)) self.assertEqual(expected_node_count, len(tmap.ring)) cluster.shutdown()
def test_token(self): expected_node_count = len(get_cluster().nodes) cluster = Cluster() cluster.connect() tmap = cluster.metadata.token_map self.assertTrue(issubclass(tmap.token_class, Token)) self.assertEqual(expected_node_count, len(tmap.ring)) self.assertEqual(expected_node_count, len(tmap.tokens_to_hosts)) cluster.shutdown()
def __init__(self): cluster = Cluster(["panoptes-cassandra.zooniverse.org"]) try: self.cassandra_session = cluster.connect("active_weather") except InvalidRequest as e: print(e) self.cassandra_session = cluster.connect() self.cassandra_session.execute("CREATE KEYSPACE active_weather WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 2 }") self.cassandra_session = cluster.connect('active_weather')
def _create_session(self): cassandra_ip = os.environ['CASSANDRA_PORT_9042_TCP_ADDR'] cassandra_port = os.environ['CASSANDRA_PORT_9042_TCP_PORT'] # Sometimes Cassandra is still starting, so we might need to wait for _ in range(10): try: cluster = Cluster([cassandra_ip], port=cassandra_port) return cluster.connect() except cassandra.cluster.NoHostAvailable: time.sleep(20) cluster = Cluster([cassandra_ip], port=cassandra_port) return cluster.connect()
def test_export_keyspace_schema(self): """ Test export keyspace schema functionality """ cluster = Cluster(protocol_version=PROTOCOL_VERSION) cluster.connect() for keyspace in cluster.metadata.keyspaces: keyspace_metadata = cluster.metadata.keyspaces[keyspace] self.assertIsInstance(keyspace_metadata.export_as_string(), six.string_types) self.assertIsInstance(keyspace_metadata.as_cql_query(), six.string_types)
def test_export_keyspace_schema(self): """ Test export keyspace schema functionality """ cluster = Cluster() cluster.connect() for keyspace in cluster.metadata.keyspaces: keyspace_metadata = cluster.metadata.keyspaces[keyspace] self.assertIsInstance(keyspace_metadata.export_as_string(), unicode) self.assertIsInstance(keyspace_metadata.as_cql_query(), unicode)
def test_metrics_per_cluster(self): """ Test to validate that metrics can be scopped to invdividual clusters @since 3.6.0 @jira_ticket PYTHON-561 @expected_result metrics should be scopped to a cluster level @test_category metrics """ cluster2 = Cluster(metrics_enabled=True, protocol_version=PROTOCOL_VERSION, default_retry_policy=FallthroughRetryPolicy()) cluster2.connect(self.ks_name, wait_for_all_pools=True) self.assertEqual(len(cluster2.metadata.all_hosts()), 3) query = SimpleStatement("SELECT * FROM {0}.{0}".format(self.ks_name), consistency_level=ConsistencyLevel.ALL) self.session.execute(query) # Pause node so it shows as unreachable to coordinator get_node(1).pause() try: # Test write query = SimpleStatement("INSERT INTO {0}.{0} (k, v) VALUES (2, 2)".format(self.ks_name), consistency_level=ConsistencyLevel.ALL) with self.assertRaises(WriteTimeout): self.session.execute(query, timeout=None) finally: get_node(1).resume() # Change the scales stats_name of the cluster2 cluster2.metrics.set_stats_name('cluster2-metrics') stats_cluster1 = self.cluster.metrics.get_stats() stats_cluster2 = cluster2.metrics.get_stats() # Test direct access to stats self.assertEqual(1, self.cluster.metrics.stats.write_timeouts) self.assertEqual(0, cluster2.metrics.stats.write_timeouts) # Test direct access to a child stats self.assertNotEqual(0.0, self.cluster.metrics.request_timer['mean']) self.assertEqual(0.0, cluster2.metrics.request_timer['mean']) # Test access via metrics.get_stats() self.assertNotEqual(0.0, stats_cluster1['request_timer']['mean']) self.assertEqual(0.0, stats_cluster2['request_timer']['mean']) # Test access by stats_name self.assertEqual(0.0, scales.getStats()['cluster2-metrics']['request_timer']['mean']) cluster2.shutdown()
#newRating = dataOutput[YColumn] #join updated rating updatedEmailList = pd.concat([centralEmailList, dataOutput[["responseRating"]]], axis=1) if updatedEmailList["responseRating"].isnull().any(): raise ValueError("we have missed some ratings, which is not expected") if updatedEmailList.shape[0] != centralEmailList.shape[0]: raise ValueError("we have some missing values, which is not expected") #print updatedEmailList.head(10) updatedEmailList = updatedEmailList.sort([YColumn],ascending=[False]) #print updatedEmailList.head(5) return(updatedEmailList) #connect to cassandra print "connecting to cassandra for local mode" cluster = Cluster() session = cluster.connect('marketingApp') session.row_factory = dict_factory #define the email rating paramters ratingParameters = {"no":0,"open":1,"click":2,"sold":3} minValidResponse = 10 #load the sent email list (the most recent) from cassandra print "retrieving the most recent sent email list as training data from cassandra" rawEmailList = session.execute(""" select * from "sentEmailList" """) #convert paged results to a list then a dataframe sentEmailList = pd.DataFrame(list(rawEmailList)) #pre-check and summarize all responses
from cassandra.cluster import Cluster from hdt import HDTDocument from cassandra.policies import DCAwareRoundRobinPolicy from cassandra.query import BatchStatement from cassandra.query import SimpleStatement import time import datetime from cassandra.util import uuid_from_time, datetime_from_uuid1 cluster = Cluster( ['172.16.134.144', '172.16.134.142', '172.16.134.143'], load_balancing_policy=DCAwareRoundRobinPolicy(local_dc='dc1')) # cluster = Cluster() session = cluster.connect() # Dans ce script on fait une insertion sur une primary key complexe afin de pouvoir faire des requetes avec nos tokens derriere # Creating keyspace session.execute( """ CREATE KEYSPACE IF NOT EXISTS pkspo WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 1 } """ ) # on switch sur le bon KEYSPACE
""" Real-time processing """ from pyleus.storm import SimpleBolt import simplejson as json import datetime import time from cassandra.cluster import Cluster from cassandra.query import BatchStatement, PreparedStatement cluster = Cluster(['172.31.1.44', '172.31.1.45', '172.31.1.46']) session = cluster.connect('flashback') cql_query = "INSERT INTO rt_reddit (secslot,subreddit,author,created_utc,body) VALUES (?,?,?,?,?)" cql_reddit_stmt = session.prepare(cql_query) def extract_json(json_line): """ simple json is slightly faster to use to load jsons than the default json """ try: item = json.loads(json_line) except: return None reddit = {} reddit['author'] = item['author'] reddit['subreddit'] = item['subreddit'] reddit['body'] = item['body'] reddit['created_utc'] = item['created_utc'] return reddit
class QueryPagingTests(unittest.TestCase): def setUp(self): if PROTOCOL_VERSION < 2: raise unittest.SkipTest( "Protocol 2.0+ is required for Paging state, currently testing against %r" % (PROTOCOL_VERSION, )) self.cluster = Cluster(protocol_version=PROTOCOL_VERSION) if PROTOCOL_VERSION < 3: self.cluster.set_core_connections_per_host(HostDistance.LOCAL, 1) self.session = self.cluster.connect(wait_for_all_pools=True) self.session.execute("TRUNCATE test3rf.test") def tearDown(self): self.cluster.shutdown() def test_paging(self): statements_and_params = zip( cycle(["INSERT INTO test3rf.test (k, v) VALUES (%s, 0)"]), [(i, ) for i in range(100)]) execute_concurrent(self.session, list(statements_and_params)) prepared = self.session.prepare("SELECT * FROM test3rf.test") for fetch_size in (2, 3, 7, 10, 99, 100, 101, 10000): self.session.default_fetch_size = fetch_size self.assertEqual( 100, len(list(self.session.execute("SELECT * FROM test3rf.test")))) statement = SimpleStatement("SELECT * FROM test3rf.test") self.assertEqual(100, len(list(self.session.execute(statement)))) self.assertEqual(100, len(list(self.session.execute(prepared)))) def test_paging_state(self): """ Test to validate paging state api @since 3.7.0 @jira_ticket PYTHON-200 @expected_result paging state should returned should be accurate, and allow for queries to be resumed. @test_category queries """ statements_and_params = zip( cycle(["INSERT INTO test3rf.test (k, v) VALUES (%s, 0)"]), [(i, ) for i in range(100)]) execute_concurrent(self.session, list(statements_and_params)) list_all_results = [] self.session.default_fetch_size = 3 result_set = self.session.execute("SELECT * FROM test3rf.test") while (result_set.has_more_pages): for row in result_set.current_rows: self.assertNotIn(row, list_all_results) list_all_results.extend(result_set.current_rows) page_state = result_set.paging_state result_set = self.session.execute("SELECT * FROM test3rf.test", paging_state=page_state) if (len(result_set.current_rows) > 0): list_all_results.append(result_set.current_rows) self.assertEqual(len(list_all_results), 100) def test_paging_verify_writes(self): statements_and_params = zip( cycle(["INSERT INTO test3rf.test (k, v) VALUES (%s, 0)"]), [(i, ) for i in range(100)]) execute_concurrent(self.session, statements_and_params) prepared = self.session.prepare("SELECT * FROM test3rf.test") for fetch_size in (2, 3, 7, 10, 99, 100, 101, 10000): self.session.default_fetch_size = fetch_size results = self.session.execute("SELECT * FROM test3rf.test") result_array = set() result_set = set() for result in results: result_array.add(result.k) result_set.add(result.v) self.assertEqual(set(range(100)), result_array) self.assertEqual(set([0]), result_set) statement = SimpleStatement("SELECT * FROM test3rf.test") results = self.session.execute(statement) result_array = set() result_set = set() for result in results: result_array.add(result.k) result_set.add(result.v) self.assertEqual(set(range(100)), result_array) self.assertEqual(set([0]), result_set) results = self.session.execute(prepared) result_array = set() result_set = set() for result in results: result_array.add(result.k) result_set.add(result.v) self.assertEqual(set(range(100)), result_array) self.assertEqual(set([0]), result_set) def test_paging_verify_with_composite_keys(self): ddl = ''' CREATE TABLE test3rf.test_paging_verify_2 ( k1 int, k2 int, v int, PRIMARY KEY(k1, k2) )''' self.session.execute(ddl) statements_and_params = zip( cycle([ "INSERT INTO test3rf.test_paging_verify_2 " "(k1, k2, v) VALUES (0, %s, %s)" ]), [(i, i + 1) for i in range(100)]) execute_concurrent(self.session, statements_and_params) prepared = self.session.prepare( "SELECT * FROM test3rf.test_paging_verify_2") for fetch_size in (2, 3, 7, 10, 99, 100, 101, 10000): self.session.default_fetch_size = fetch_size results = self.session.execute( "SELECT * FROM test3rf.test_paging_verify_2") result_array = [] value_array = [] for result in results: result_array.append(result.k2) value_array.append(result.v) self.assertSequenceEqual(range(100), result_array) self.assertSequenceEqual(range(1, 101), value_array) statement = SimpleStatement( "SELECT * FROM test3rf.test_paging_verify_2") results = self.session.execute(statement) result_array = [] value_array = [] for result in results: result_array.append(result.k2) value_array.append(result.v) self.assertSequenceEqual(range(100), result_array) self.assertSequenceEqual(range(1, 101), value_array) results = self.session.execute(prepared) result_array = [] value_array = [] for result in results: result_array.append(result.k2) value_array.append(result.v) self.assertSequenceEqual(range(100), result_array) self.assertSequenceEqual(range(1, 101), value_array) def test_async_paging(self): statements_and_params = zip( cycle(["INSERT INTO test3rf.test (k, v) VALUES (%s, 0)"]), [(i, ) for i in range(100)]) execute_concurrent(self.session, list(statements_and_params)) prepared = self.session.prepare("SELECT * FROM test3rf.test") for fetch_size in (2, 3, 7, 10, 99, 100, 101, 10000): self.session.default_fetch_size = fetch_size self.assertEqual( 100, len( list( self.session.execute_async( "SELECT * FROM test3rf.test").result()))) statement = SimpleStatement("SELECT * FROM test3rf.test") self.assertEqual( 100, len(list(self.session.execute_async(statement).result()))) self.assertEqual( 100, len(list(self.session.execute_async(prepared).result()))) def test_async_paging_verify_writes(self): ddl = ''' CREATE TABLE test3rf.test_async_paging_verify ( k1 int, k2 int, v int, PRIMARY KEY(k1, k2) )''' self.session.execute(ddl) statements_and_params = zip( cycle([ "INSERT INTO test3rf.test_async_paging_verify " "(k1, k2, v) VALUES (0, %s, %s)" ]), [(i, i + 1) for i in range(100)]) execute_concurrent(self.session, statements_and_params) prepared = self.session.prepare( "SELECT * FROM test3rf.test_async_paging_verify") for fetch_size in (2, 3, 7, 10, 99, 100, 101, 10000): self.session.default_fetch_size = fetch_size results = self.session.execute_async( "SELECT * FROM test3rf.test_async_paging_verify").result() result_array = [] value_array = [] for result in results: result_array.append(result.k2) value_array.append(result.v) self.assertSequenceEqual(range(100), result_array) self.assertSequenceEqual(range(1, 101), value_array) statement = SimpleStatement( "SELECT * FROM test3rf.test_async_paging_verify") results = self.session.execute_async(statement).result() result_array = [] value_array = [] for result in results: result_array.append(result.k2) value_array.append(result.v) self.assertSequenceEqual(range(100), result_array) self.assertSequenceEqual(range(1, 101), value_array) results = self.session.execute_async(prepared).result() result_array = [] value_array = [] for result in results: result_array.append(result.k2) value_array.append(result.v) self.assertSequenceEqual(range(100), result_array) self.assertSequenceEqual(range(1, 101), value_array) def test_paging_callbacks(self): """ Test to validate callback api @since 3.9.0 @jira_ticket PYTHON-733 @expected_result callbacks shouldn't be called twice per message and the fetch_size should be handled in a transparent way to the user @test_category queries """ statements_and_params = zip( cycle(["INSERT INTO test3rf.test (k, v) VALUES (%s, 0)"]), [(i, ) for i in range(100)]) execute_concurrent(self.session, list(statements_and_params)) prepared = self.session.prepare("SELECT * FROM test3rf.test") for fetch_size in (2, 3, 7, 10, 99, 100, 101, 10000): self.session.default_fetch_size = fetch_size future = self.session.execute_async("SELECT * FROM test3rf.test", timeout=20) event = Event() counter = count() number_of_calls = count() def handle_page(rows, future, counter, number_of_calls): next(number_of_calls) for row in rows: next(counter) if future.has_more_pages: future.start_fetching_next_page() else: event.set() def handle_error(err): event.set() self.fail(err) future.add_callbacks(callback=handle_page, callback_args=(future, counter, number_of_calls), errback=handle_error) event.wait() self.assertEqual(next(number_of_calls), 100 // fetch_size + 1) self.assertEqual(next(counter), 100) # simple statement future = self.session.execute_async( SimpleStatement("SELECT * FROM test3rf.test"), timeout=20) event.clear() counter = count() number_of_calls = count() future.add_callbacks(callback=handle_page, callback_args=(future, counter, number_of_calls), errback=handle_error) event.wait() self.assertEqual(next(number_of_calls), 100 // fetch_size + 1) self.assertEqual(next(counter), 100) # prepared statement future = self.session.execute_async(prepared, timeout=20) event.clear() counter = count() number_of_calls = count() future.add_callbacks(callback=handle_page, callback_args=(future, counter, number_of_calls), errback=handle_error) event.wait() self.assertEqual(next(number_of_calls), 100 // fetch_size + 1) self.assertEqual(next(counter), 100) def test_concurrent_with_paging(self): statements_and_params = zip( cycle(["INSERT INTO test3rf.test (k, v) VALUES (%s, 0)"]), [(i, ) for i in range(100)]) execute_concurrent(self.session, list(statements_and_params)) prepared = self.session.prepare("SELECT * FROM test3rf.test") for fetch_size in (2, 3, 7, 10, 99, 100, 101, 10000): self.session.default_fetch_size = fetch_size results = execute_concurrent_with_args(self.session, prepared, [None] * 10) self.assertEqual(10, len(results)) for (success, result) in results: self.assertTrue(success) self.assertEqual(100, len(list(result))) def test_fetch_size(self): """ Ensure per-statement fetch_sizes override the default fetch size. """ statements_and_params = zip( cycle(["INSERT INTO test3rf.test (k, v) VALUES (%s, 0)"]), [(i, ) for i in range(100)]) execute_concurrent(self.session, list(statements_and_params)) prepared = self.session.prepare("SELECT * FROM test3rf.test") self.session.default_fetch_size = 10 result = self.session.execute(prepared, []) self.assertTrue(result.has_more_pages) self.session.default_fetch_size = 2000 result = self.session.execute(prepared, []) self.assertFalse(result.has_more_pages) self.session.default_fetch_size = None result = self.session.execute(prepared, []) self.assertFalse(result.has_more_pages) self.session.default_fetch_size = 10 prepared.fetch_size = 2000 result = self.session.execute(prepared, []) self.assertFalse(result.has_more_pages) prepared.fetch_size = None result = self.session.execute(prepared, []) self.assertFalse(result.has_more_pages) prepared.fetch_size = 10 result = self.session.execute(prepared, []) self.assertTrue(result.has_more_pages) prepared.fetch_size = 2000 bound = prepared.bind([]) result = self.session.execute(bound, []) self.assertFalse(result.has_more_pages) prepared.fetch_size = None bound = prepared.bind([]) result = self.session.execute(bound, []) self.assertFalse(result.has_more_pages) prepared.fetch_size = 10 bound = prepared.bind([]) result = self.session.execute(bound, []) self.assertTrue(result.has_more_pages) bound.fetch_size = 2000 result = self.session.execute(bound, []) self.assertFalse(result.has_more_pages) bound.fetch_size = None result = self.session.execute(bound, []) self.assertFalse(result.has_more_pages) bound.fetch_size = 10 result = self.session.execute(bound, []) self.assertTrue(result.has_more_pages) s = SimpleStatement("SELECT * FROM test3rf.test", fetch_size=None) result = self.session.execute(s, []) self.assertFalse(result.has_more_pages) s = SimpleStatement("SELECT * FROM test3rf.test") result = self.session.execute(s, []) self.assertTrue(result.has_more_pages) s = SimpleStatement("SELECT * FROM test3rf.test") s.fetch_size = None result = self.session.execute(s, []) self.assertFalse(result.has_more_pages)
class DataProcess(Process): """ This process handles all data submissions is_database_raw is a bool, if True, will write data to raw-db, else to decoded-db) """ def __init__(self, is_database_raw, verbosity=0): """ Starts up the Data handling Process """ super(DataProcess, self).__init__() if is_database_raw: self.input_exchange = 'data-pipeline-in' self.queue = 'db-raw' self.statement = "INSERT INTO sensor_data_raw (node_id, date, plugin_name, plugin_version, plugin_instance, timestamp, parameter, data) VALUES (?, ?, ?, ?, ?, ?, ?, ?)" self.function_ExtractValuesFromMessage = self.ExtractValuesFromMessage_raw else: self.input_exchange = 'plugins-out' self.queue = 'db-decoded' self.statement = "INSERT INTO sensor_data_decoded (node_id, date, ingest_id, meta_id, timestamp, data_set, sensor, parameter, data, unit) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)" self.function_ExtractValuesFromMessage = self.ExtractValuesFromMessage_decoded logger.info("Initializing DataProcess") # Set up the Rabbit connection #self.connection = pika.BlockingConnection(pika.ConnectionParameters(host='localhost')) #Connect to rabbitMQ while True: try: self.connection = pika.BlockingConnection(pika_params) except Exception as e: logger.error( "QueueToDb: Could not connect to RabbitMQ server \"%s\": %s" % (pika_params.host, e)) time.sleep(1) continue break logger.info("Connected to RabbitMQ server \"%s\"" % (pika_params.host)) self.verbosity = verbosity self.numInserted = 0 self.numFailed = 0 self.session = None self.cluster = None self.prepared_statement = None self.cassandra_connect() self.channel = self.connection.channel() self.channel.basic_qos(prefetch_count=1) # Declare this process's queue self.channel.queue_declare(self.queue, durable=True) self.channel.queue_bind(exchange=self.input_exchange, queue=self.queue) try: self.channel.basic_consume(self.callback, queue=self.queue) except KeyboardInterrupt: logger.info("exiting.") sys.exit(0) except Exception as e: logger.error("error: %s" % (str(e))) def callback(self, ch, method, props, body): #TODO: this simply drops failed messages, might find a better solution!? Keeping them has the risk of spamming RabbitMQ if self.verbosity > 1: print('######################################') print('method = ', method) print('props = ', props) print('body = ', body) '''EXAMPLE: props = <BasicProperties(['app_id=coresense:3', 'content_type=b', 'delivery_mode=2', 'reply_to=0000001e06107d97', 'timestamp=1476135836151', 'type=frame'])> ''' try: for iValues, values in enumerate( self.function_ExtractValuesFromMessage(props, body)): # Send the data off to Cassandra if self.verbosity > 1: print('iValues =', iValues) print(' values =', values) self.cassandra_insert(values) except Exception as e: values = None self.numFailed += 1 logger.error("Error inserting data: %s" % (str(e))) logger.error(' method = {}'.format(repr(method))) logger.error(' props = {}'.format(repr(props))) logger.error(' body = {}'.format(repr(body))) ch.basic_ack(delivery_tag=method.delivery_tag) return ch.basic_ack(delivery_tag=method.delivery_tag) if values: self.numInserted += 1 if self.numInserted % 1000 == 0: logger.debug(' inserted {} / {} raw samples of data'.format( self.numInserted, self.numInserted + self.numFailed)) # Parse a message of sensor data and convert to the values to be inserted into a row in the db. NOTE: this is a generator - because the decoded messages produce multiple rows of data. def ExtractValuesFromMessage_raw(self, props, body): if self.verbosity > 0: print('props.app_id =', props.app_id) versionStrings = props.app_id.split(':') sampleDatetime = datetime.datetime.utcfromtimestamp( float(props.timestamp) / 1000.0) sampleDate = sampleDatetime.strftime('%Y-%m-%d') node_id = props.reply_to #ingest_id = props.ingest_id ##props.get('ingest_id', 0) #print('ingest_id: ', ingest_id) plugin_name = versionStrings[0] plugin_version = versionStrings[1] plugin_instance = '0' if ( len(versionStrings) < 3) else versionStrings[2] timestamp = int(props.timestamp) parameter = props.type data = str(binascii.hexlify(body)) values = (node_id, sampleDate, plugin_name, plugin_version, plugin_instance, timestamp, parameter, data) if self.verbosity > 0: print(' node_id = ', node_id) print(' date = ', sampleDate) #print(' ingest_id = ', ingest_id ) print(' plugin_name = ', plugin_name) print(' plugin_version = ', plugin_version) print(' plugin_instance = ', plugin_instance) print(' timestamp = ', timestamp) print(' parameter = ', parameter) print(' data = ', data) yield values def ExtractValuesFromMessage_decoded(self, props, body): #(node_id, date, meta_id, timestamp, data_set, sensor, parameter, data, unit) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)" dictData = json.loads(body.decode()) # same for each parameter:value pair sampleDatetime = datetime.datetime.utcfromtimestamp( float(props.timestamp) / 1000.0) node_id = props.reply_to sampleDate = sampleDatetime.strftime('%Y-%m-%d') ingest_id = 0 # props.ingest_id ##props.get('ingest_id', 0) #print('ingest_id: ', ingest_id) meta_id = 0 #props.meta_id timestamp = int(props.timestamp) data_set = props.app_id sensor = props.type unit = 'NO_UNIT' #props.unit for k in dictData.keys(): parameter = k data = str(dictData[k]) values = (node_id, sampleDate, ingest_id, meta_id, timestamp, data_set, sensor, parameter, data, unit) if self.verbosity > 0: print(' node_id = ', node_id) print(' date = ', sampleDate) print(' ingest_id = ', ingest_id) print(' meta_id = ', meta_id) print(' timestamp = ', timestamp) print(' data_set = ', data_set) print(' sensor = ', sensor) print(' parameter = ', parameter) print(' data = ', data) print(' unit = ', unit) yield values def cassandra_insert(self, values): if not self.session: self.cassandra_connect() if not self.prepared_statement: try: self.prepared_statement = self.session.prepare(self.statement) except Exception as e: logger.error("Error preparing statement: (%s) %s" % (type(e).__name__, str(e))) raise if self.verbosity > 1: logger.debug("inserting: %s" % (str(values))) try: bound_statement = self.prepared_statement.bind(values) except Exception as e: logger.error( "QueueToDb: Error binding cassandra cql statement:(%s) %s -- values was: %s" % (type(e).__name__, str(e), str(values))) raise connection_retry_delay = 1 while True: # this is long term storage try: self.session.execute(bound_statement) except TypeError as e: logger.error( "QueueToDb: (TypeError) Error executing cassandra cql statement: %s -- values was: %s" % (str(e), str(values))) break except Exception as e: logger.error( "QueueToDb: Error (type: %s) executing cassandra cql statement: %s -- values was: %s" % (type(e).__name__, str(e), str(values))) if "TypeError" in str(e): logger.debug( "detected TypeError, will ignore this message") break self.cassandra_connect() time.sleep(connection_retry_delay) if connection_retry_delay < 10: connection_retry_delay += 1 continue break def cassandra_connect(self): bDone = False iTry = 0 while not bDone and (iTry < 5): if self.cluster: try: self.cluster.shutdown() except: pass self.cluster = Cluster(contact_points=[CASSANDRA_HOST]) self.session = None iTry2 = 0 while not bDone and (iTry2 < 5): iTry2 += 1 try: # Might not immediately connect. That's fine. It'll try again if/when it needs to. self.session = self.cluster.connect('waggle') if self.session: bDone = True except: logger.warning( "QueueToDb: WARNING: Cassandra connection to " + CASSANDRA_HOST + " failed.") logger.warning( "QueueToDb: The process will attempt to re-connect at a later time." ) if not bDone: time.sleep(3) def run(self): self.cassandra_connect() self.channel.start_consuming() def join(self): super(DataProcess, self).terminate() self.connection.close(0) if self.cluster: self.cluster.shutdown()
def token_aware(self, keyspace, use_prepared=False): use_singledc() cluster = Cluster(load_balancing_policy=TokenAwarePolicy( RoundRobinPolicy()), protocol_version=PROTOCOL_VERSION) session = cluster.connect() wait_for_up(cluster, 1, wait=False) wait_for_up(cluster, 2, wait=False) wait_for_up(cluster, 3) create_schema(session, keyspace, replication_factor=1) self._insert(session, keyspace) self._query(session, keyspace, use_prepared=use_prepared) self.coordinator_stats.assert_query_count_equals(self, 1, 0) self.coordinator_stats.assert_query_count_equals(self, 2, 12) self.coordinator_stats.assert_query_count_equals(self, 3, 0) self.coordinator_stats.reset_counts() self._query(session, keyspace, use_prepared=use_prepared) self.coordinator_stats.assert_query_count_equals(self, 1, 0) self.coordinator_stats.assert_query_count_equals(self, 2, 12) self.coordinator_stats.assert_query_count_equals(self, 3, 0) self.coordinator_stats.reset_counts() force_stop(2) wait_for_down(cluster, 2, wait=True) try: self._query(session, keyspace, use_prepared=use_prepared) self.fail() except Unavailable as e: self.assertEqual(e.consistency, 1) self.assertEqual(e.required_replicas, 1) self.assertEqual(e.alive_replicas, 0) self.coordinator_stats.reset_counts() start(2) wait_for_up(cluster, 2, wait=True) self._query(session, keyspace, use_prepared=use_prepared) self.coordinator_stats.assert_query_count_equals(self, 1, 0) self.coordinator_stats.assert_query_count_equals(self, 2, 12) self.coordinator_stats.assert_query_count_equals(self, 3, 0) self.coordinator_stats.reset_counts() stop(2) wait_for_down(cluster, 2, wait=True) try: self._query(session, keyspace, use_prepared=use_prepared) self.fail() except Unavailable: pass self.coordinator_stats.reset_counts() start(2) wait_for_up(cluster, 2, wait=True) decommission(2) wait_for_down(cluster, 2, wait=True) self._query(session, keyspace, use_prepared=use_prepared) results = set([ self.coordinator_stats.get_query_count(1), self.coordinator_stats.get_query_count(3) ]) self.assertEqual(results, set([0, 12])) self.coordinator_stats.assert_query_count_equals(self, 2, 0) cluster.shutdown()
kafka_broker = args.kafka_broker cassandra_broker = args.cassandra_broker keyspace = args.keyspace table = args.table #create kafka consumer consumer = KafkaConsumer (#??? data structure python??? topic_name, bootstrap_servers = kafka_broker ) #create a cassandra session cassandra_cluster = Cluster ( contact_points = cassandra_broker.split(',') ) session = cassandra_cluster.connect() #user pass the argument: keyspace and table here #if the keyspace and table are not existed, create new ones session.execute("CREATE KEYSPACE IF NOT EXISTS %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}" % keyspace) session.set_keyspace(keyspace) session.execute("CREATE TABLE IF NOT EXISTS %s (symbol text, trade_time timestamp, price float, PRIMARY KEY(symbol, trade_time))" % table) atexit.register(shutdown_hook, consumer, session) for msg in consumer: # logger.debug(msg) save_data(msg.value, session)
class Cassandra: def __init__(self): self.MORPHL_SERVER_IP_ADDRESS = getenv('MORPHL_SERVER_IP_ADDRESS') self.MORPHL_CASSANDRA_USERNAME = getenv('MORPHL_CASSANDRA_USERNAME') self.MORPHL_CASSANDRA_PASSWORD = getenv('MORPHL_CASSANDRA_PASSWORD') self.MORPHL_CASSANDRA_KEYSPACE = getenv('MORPHL_CASSANDRA_KEYSPACE') self.CASS_REQ_TIMEOUT = 3600.0 self.auth_provider = PlainTextAuthProvider( username=self.MORPHL_CASSANDRA_USERNAME, password=self.MORPHL_CASSANDRA_PASSWORD) self.cluster = Cluster([self.MORPHL_SERVER_IP_ADDRESS], auth_provider=self.auth_provider) self.session = self.cluster.connect(self.MORPHL_CASSANDRA_KEYSPACE) self.session.row_factory = dict_factory self.session.default_fetch_size = 100 self.prepare_statements() def prepare_statements(self): """ Prepare statements for database select queries """ self.prep_stmts = {'predictions': {}, 'models': {}, 'access_logs': {}} template_for_single_row = 'SELECT * FROM ga_chp_predictions WHERE client_id = ? LIMIT 1' template_for_multiple_rows = 'SELECT client_id, prediction FROM ga_chp_predictions_by_prediction_date WHERE prediction_date = ?' template_for_predictions_statistics = 'SELECT loyal, neutral, churning, lost FROM ga_chp_predictions_statistics WHERE prediction_date= ? LIMIT 1' template_for_models_rows = 'SELECT accuracy, loss, day_as_str FROM ga_chp_valid_models WHERE is_model_valid = True LIMIT 20 ALLOW FILTERING' template_for_access_log_insert = 'INSERT INTO ga_chp_predictions_access_logs (client_id, tstamp, prediction) VALUES (?,?,?)' self.prep_stmts['predictions']['single'] = self.session.prepare( template_for_single_row) self.prep_stmts['predictions']['multiple'] = self.session.prepare( template_for_multiple_rows) self.prep_stmts['predictions']['statistics'] = self.session.prepare( template_for_predictions_statistics) self.prep_stmts['models']['multiple'] = self.session.prepare( template_for_models_rows) self.prep_stmts['access_logs']['insert'] = self.session.prepare( template_for_access_log_insert) def retrieve_prediction(self, client_id): bind_list = [client_id] return self.session.execute( self.prep_stmts['predictions']['single'], bind_list, timeout=self.CASS_REQ_TIMEOUT)._current_rows def retrieve_predictions(self, paging_state, date): bind_list = [date] # Check if paginated request if paging_state is not None: try: # Convert page from hex format to bytes previous_paging_state = bytes.fromhex(paging_state) results = self.session.execute( self.prep_stmts['predictions']['multiple'], bind_list, paging_state=previous_paging_state, timeout=self.CASS_REQ_TIMEOUT) except (ValueError, ProtocolException): # If paging_state causes an error, return invalid request since the format was probably valid but the actual value was wrong return {'status': 0, 'error': 'Invalid pagination request.'} else: # If no page is set get first page of results results = self.session.execute( self.prep_stmts['predictions']['multiple'], bind_list, timeout=self.CASS_REQ_TIMEOUT) return { 'status': 1, 'predictions': results._current_rows, 'next_paging_state': results.paging_state.hex() if results.has_more_pages == True else 0 } def get_statistics(self, date): bind_list = [date] response = self.session.execute( self.prep_stmts['predictions']['statistics'], bind_list, timeout=self.CASS_REQ_TIMEOUT)._current_rows return {} if not response else response[0] def get_model_statistics(self): return self.session.execute( self.prep_stmts['models']['multiple'], timeout=self.CASS_REQ_TIMEOUT)._current_rows def insert_access_log(self, client_id, p): bind_list = [ client_id, datetime.now(), -1 if len(p) == 0 else p[0]['prediction'] ] return self.session.execute(self.prep_stmts['access_logs']['insert'], bind_list, timeout=self.CASS_REQ_TIMEOUT)
if 'file' not in request.files: flash('No file part') return redirect(request.url) file = request.files['file'] #In case file has an empty name if file.filename == '': flash('No selected file') return redirect(request.url) #Everything is correct and we can run the prediction if file and allowed_file(file.filename): #save and read uploaded image filename = secure_filename(file.filename) file.save(secure_filename(file.filename)) image = Image.open(file.filename) #flatten_img = np.reshape(image, 784) app = Predict() x = app.predict(image) current_time = str(datetime.datetime.now()) session.execute( """ INSERT INTO demandtable (time, filename, result) VALUES (%s, %s, %s) """, (current_time, filename, x)) return '识别结果:' + x if __name__ == '__main__': cluster = Cluster(contact_points=['127.0.0.1'], port=80) session = cluster.connect(KEYSPACE)
def getdata(searchname): searchname = parse.unquote( searchname) # Decode URL, e.g. turn %20 into space etc cluster = Cluster() # Connect to local host on default port 9042 session = cluster.connect('car_pricing') # Connect to car_pricing keyspace # Get adIDs into list related to our search name cql = 'SELECT DISTINCT searchname, advertid FROM car_pricing.searchdata;' prepStatement = session.prepare(cql) queryResults = session.execute(prepStatement) adIDs = [] for qr in queryResults: if qr[0] == searchname: # If this is a result from our desired search then add it to the list adIDs.append(qr[1]) # List columns we want to use as features (or to create features from) and build up cql query colListOther = [ 'advertid', 'plate', 'bodytype', 'transmission', 'fueltype', 'sellertype', 'make', 'model', 'dealername', 'location', 'searchcriteria', 'distancefromyou', 'features', 'adtitle', 'foundtime' ] colListPlottable = [ 'year', 'mileage', 'enginesize', 'bhp', 'price', 'averagempg', 'predictedprice' ] colListPlottableFriendly = [ 'Registration Year', 'Mileage (miles)', 'Engine Size (L)', 'Engine Power (BHP)', 'Price (£)', 'Avg. Fuel Consumpt. (mpg)', 'Predicted Price (£)' ] cql = 'SELECT ' + ','.join( colListPlottable + colListOther ) + ' FROM car_pricing.searchdata WHERE searchname = ? AND advertid = ? LIMIT 1;' prepStatement = session.prepare(cql) # Create data frame to store results df_D3data = pd.DataFrame(columns=(colListPlottable + colListOther)) for adID in adIDs: # Query to get the latest information (latest data gathering time) for each advert queryResults = session.execute(prepStatement, [searchname, adID]) #df_D3data = df_D3data.append(pd.DataFrame(data = [list(queryResults[0])], columns = (colListPlottable + colListOther))) # Note that list is embedded in another list df_D3data = df_D3data.append( pandas_factory((colListPlottable + colListOther), queryResults)) # Add advert age to the data frame df_D3data['advertage_days'] = df_D3data['advertid'].apply(compare_dates) colListPlottable += ['advertage_days'] colListPlottableFriendly += ['Advert Age (days)'] session.shutdown() cluster.shutdown() # Remove any points which are not valid, i.e. NaN, None, etc df_D3data['predictedprice'] = 0 df_D3data = df_D3data[df_D3data.notnull().all(axis=1)] # Predict price based on parameters and saved model X = pd.get_dummies( df_D3data, dummy_na=False, columns=['bodytype', 'fueltype', 'make', 'model', 'sellertype']) gbr_gscv = joblib.load('scraper/price_predictor.sav') dfColList = joblib.load('scraper/price_predictor_columns.sav') X = X.reindex( columns=dfColList, fill_value=0 ) # Fill all the one hot encoded columns with zero if they don't exist to ensure model is in correct shape to do predictions df_D3data['predictedprice'] = gbr_gscv.predict(X) # Calculate price difference and add to data frame and column lists df_D3data['pricediff'] = df_D3data['price'] - df_D3data['predictedprice'] colListPlottable += ['pricediff'] colListPlottableFriendly += ['Price Difference (£)'] # Add advert URL so you can open it directly in Autotrader df_D3data[ 'advertURL'] = 'https://www.autotrader.co.uk/classified/advert/' + df_D3data[ 'advertid'] # Required to generate index for DF so that it can be turned into JSON df_D3data = df_D3data.reset_index() # Prepare columns for output by sorting in alphabetical order and putting into dictionary for output colListPlottableFriendly, colListPlottable = ( list(x) for x in zip(*sorted(zip(colListPlottableFriendly, colListPlottable), key=lambda pair: pair[0])) ) # Taken from https://stackoverflow.com/questions/13668393/python-sorting-two-lists colOutputList = [{ 'name': n, 'friendly_name': fn } for n, fn in zip(colListPlottable, colListPlottableFriendly)] response = jsonify({ 'data': df_D3data.to_dict(orient='records'), 'plottable_columns': colOutputList }) return response
from cassandra.cluster import Cluster # default cassandra docker is 127.0.0.1:9042 cluster = Cluster() session = cluster.connect('student_keyspace') student_insert_str = "INSERT INTO student_by_department (department, id, name) VALUES ('%s', %d, '%s');" new_si_student = ['ilham', 'Sasa', 'Kevin'] for student_id, student_name in zip(range(len(new_si_student)), new_si_student): session.execute(student_insert_str % ('si', student_id, student_name)) si_students = session.execute("SELECT * FROM student_by_department WHERE department='si' ORDER BY id DESC;") for student in si_students: print(student.department, student.id, student.name)
from cassandra.cluster import Cluster cluster = Cluster() connection = cluster.connect('max_connect') batch = """ BEGIN BATCH INSERT INTO max_connect.feature_remedy( feature_id, remedy_id, remedy_name_color) VALUES (1, 'r2', {'mascara':'dark blue'} ); INSERT INTO max_connect.INSERT INTO client_feature(client_id, client_info, age, colors, skin_condition, feature_id, feature_name) VALUES (2, {first_name: 'Nastya', second_name: 'Gogol', email: '*****@*****.**'}, 25, {{'hair':'auburn'},{'eyes':'brown'},{'skin':'light'}}, 7, 3,
import time from random import randint from sopel.module import commands, event, rule from cassandra.cluster import Cluster from cassandra.auth import PlainTextAuthProvider #Configuration contactpoints = ['1.2.3.4', '4.3.2.1'] auth_provider = PlainTextAuthProvider(username='******', password='******') keyspace = "whois" print "Connecting to cluster" cluster = Cluster(contact_points=contactpoints, auth_provider=auth_provider) session = cluster.connect(keyspace) NETWORK = "testnet" def send_whois(bot, nick): """ Sends the WHOIS command to the server for the specified nick. """ time.sleep(randint(10, 40)) bot.write(["WHOIS", nick]) #bot.say("whois sent: " + nick) def send_names(bot, channel):
for c in cnts: cv2.drawContours(image, [c], -1, (255,255,255), 2) # Remove vertical vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,10)) detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2) cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) cnts = cnts[0] if len(cnts) == 2 else cnts[1] for c in cnts: cv2.drawContours(image, [c], -1, (255,255,255), 2) cv2.imwrite(img_for_box_extraction_path, image) from cassandra.cluster import Cluster cluster = Cluster(['192.168.0.131', '192.168.0.132','192.168.0.133']) session = cluster.connect('electionanalysis') selectStmtStartedProcessing= session.prepare("SELECT * FROM votelist_full_pdf_queue where STATUS='processing started'") selectStmtComplete= session.prepare("SELECT * FROM votelist_full_pdf_queue where STATUS='Complete'") selectStmtError= session.prepare("SELECT * FROM votelist_full_pdf_queue where STATUS='ERROR'") dfProcessingStarted = pd.DataFrame(list(session.execute(selectStmtStartedProcessing))) hostname= socket.gethostname() if dfProcessingStarted.empty==False: dfProcessingStarted = dfProcessingStarted.loc[(dfProcessingStarted['hostname']== str(hostname))] dfProcessingComplete = pd.DataFrame(list(session.execute(selectStmtComplete))) if dfProcessingComplete.empty==False: dfProcessingComplete = dfProcessingComplete.loc[(dfProcessingComplete['hostname']== str(hostname))] dfError = pd.DataFrame(list(session.execute(selectStmtError))) if dfError.empty==False: dfError = dfError.loc[(dfError['hostname']== str(hostname))] if dfProcessingStarted.empty==False and dfProcessingComplete.empty==False:
def test_dc_aware_roundrobin_one_remote_host(self): use_multidc([2, 2]) keyspace = 'test_dc_aware_roundrobin_one_remote_host' cluster = Cluster(load_balancing_policy=DCAwareRoundRobinPolicy( 'dc2', used_hosts_per_remote_dc=1), protocol_version=PROTOCOL_VERSION) session = cluster.connect() wait_for_up(cluster, 1, wait=False) wait_for_up(cluster, 2, wait=False) wait_for_up(cluster, 3, wait=False) wait_for_up(cluster, 4) create_schema(session, keyspace, replication_strategy=[2, 2]) self._insert(session, keyspace) self._query(session, keyspace) self.coordinator_stats.assert_query_count_equals(self, 1, 0) self.coordinator_stats.assert_query_count_equals(self, 2, 0) self.coordinator_stats.assert_query_count_equals(self, 3, 6) self.coordinator_stats.assert_query_count_equals(self, 4, 6) self.coordinator_stats.reset_counts() bootstrap(5, 'dc1') wait_for_up(cluster, 5) self._query(session, keyspace) self.coordinator_stats.assert_query_count_equals(self, 1, 0) self.coordinator_stats.assert_query_count_equals(self, 2, 0) self.coordinator_stats.assert_query_count_equals(self, 3, 6) self.coordinator_stats.assert_query_count_equals(self, 4, 6) self.coordinator_stats.assert_query_count_equals(self, 5, 0) self.coordinator_stats.reset_counts() decommission(3) decommission(4) wait_for_down(cluster, 3, wait=True) wait_for_down(cluster, 4, wait=True) self._query(session, keyspace) self.coordinator_stats.assert_query_count_equals(self, 3, 0) self.coordinator_stats.assert_query_count_equals(self, 4, 0) responses = set() for node in [1, 2, 5]: responses.add(self.coordinator_stats.get_query_count(node)) self.assertEqual(set([0, 0, 12]), responses) self.coordinator_stats.reset_counts() decommission(5) wait_for_down(cluster, 5, wait=True) self._query(session, keyspace) self.coordinator_stats.assert_query_count_equals(self, 3, 0) self.coordinator_stats.assert_query_count_equals(self, 4, 0) self.coordinator_stats.assert_query_count_equals(self, 5, 0) responses = set() for node in [1, 2]: responses.add(self.coordinator_stats.get_query_count(node)) self.assertEqual(set([0, 12]), responses) self.coordinator_stats.reset_counts() decommission(1) wait_for_down(cluster, 1, wait=True) self._query(session, keyspace) self.coordinator_stats.assert_query_count_equals(self, 1, 0) self.coordinator_stats.assert_query_count_equals(self, 2, 12) self.coordinator_stats.assert_query_count_equals(self, 3, 0) self.coordinator_stats.assert_query_count_equals(self, 4, 0) self.coordinator_stats.assert_query_count_equals(self, 5, 0) self.coordinator_stats.reset_counts() force_stop(2) try: self._query(session, keyspace) self.fail() except NoHostAvailable: pass cluster.shutdown()
def kmeans(query): from cassandra.cluster import Cluster from random import randint cluster = Cluster() session = cluster.connect('e08') STOP = 0 ittr_conv = 0 nb_clusters = 3 N = 0 query_trip1 = session.execute(query) #************************COUNT NUMBER OF ELEMENTS/ROWS RETURNED******************************# for row in query_trip1: N = N+1 #************************GET RANDOM INDEXES FOR CENTROIDS******************************# centroids_indx = [randint(0,N) for p in range(0,nb_clusters)] centroids_indx = sorted(centroids_indx, key=int) counter = 0 index = 0 init_centroids = [] query_trip1 = session.execute(query) for row in query_trip1: if counter == centroids_indx[index]: centroid = [row.startlong, row.startlat, row.endlong, row.endlat] init_centroids.append(centroid) index = index +1 if index == nb_clusters: break; counter = counter + 1 #************************INITIALIZE CENTROIDS *****************************# oldcentroids = [{'startlong':0,'startlat':0,'endlong':0,'endlat':0}, {'startlong':0,'startlat':0,'endlong':0,'endlat':0}, {'startlong':0,'startlat':0,'endlong':0,'endlat':0}] newcentroids = [{'startlong':init_centroids[0][0], 'startlat':init_centroids[0][1], 'endlong':init_centroids[0][2], 'endlat':init_centroids[0][3]}, {'startlong':init_centroids[1][0], 'startlat':init_centroids[1][1], 'endlong':init_centroids[1][2], 'endlat':init_centroids[1][3]}, {'startlong':init_centroids[2][0], 'startlat':init_centroids[2][1], 'endlong':init_centroids[2][2], 'endlat':init_centroids[2][3]}] #************************TIME TO STARTI ITTERATING WHILE NOT CONVERGING*****************************# while(STOP == 0): query_trip1 = session.execute(query) coord_sums = [{'startlong':0,'startlat':0,'endlong':0,'endlat':0 ,'nb':0},{'startlong':0,'startlat':0,'endlong':0,'endlat':0,'nb':0}, {'startlong':0,'startlat':0,'endlong':0,'endlat':0,'nb':0}] for row in query_trip1: distance1 = double_distance(newcentroids[0], row) distance2 = double_distance(newcentroids[1], row) distance3 = double_distance(newcentroids[2], row) distances = [distance1, distance2, distance3] min_indx = distances.index(min(distances)) coord_sums[min_indx]['startlong'] += row.startlong coord_sums[min_indx]['startlat'] += row.startlat coord_sums[min_indx]['endlong'] += row.endlong coord_sums[min_indx]['endlat'] += row.endlat coord_sums[min_indx]['nb'] += 1 #************************NEW CENTROIDS******************************# oldcentroids = newcentroids for i in range(nb_clusters): newcentroids[i]['startlong'] = coord_sums[i]['startlong']/coord_sums[i]['nb'] newcentroids[i]['startlat'] = coord_sums[i]['startlat']/coord_sums[i]['nb'] newcentroids[i]['endlong'] = coord_sums[i]['endlong']/coord_sums[i]['nb'] newcentroids[i]['endlat'] = coord_sums[i]['endlat']/coord_sums[i]['nb'] #************************SHOUDL WE STOP******************************# ittr_conv = ittr_conv + 1 if oldcentroids == newcentroids: STOP = 1 return(newcentroids)
from cassandra.cluster import Cluster import copy from datetime import datetime cluster = Cluster() session = cluster.connect("creditcard") def get_by_query(table_name, args): q_string = dict(copy.copy(args)) terms = [] for k, v in q_string.items(): terms.append(str(k) + "='" + str(v) + "'") if len(terms) > 0: wc = "WHERE " + " AND ".join(terms) else: wc = "" q = "SELECT * FROM " + table_name + " " + wc rows = session.execute(q) print("Query = " + q) return rows def get_costumer_by_id(cc_num): args = {"cc_num": cc_num} row = get_by_query("customer", args)
def __init__(self): hosts = [CASSANDRA_DNS] cluster = Cluster(hosts) self.session = cluster.connect() self.session.set_keyspace('birdfeed')
#!/usr/bin/env python2.7 #pip install kafka-python #pip install cassandra-driver import time import datetime from cassandra.cluster import Cluster cluster = Cluster(['10.0.0.1', '10.0.0.2', '10.0.0.3']) session = cluster.connect('kafka') print('Bridge Kafka Cassandra wrote in Python') from kafka import KafkaConsumer consumer = KafkaConsumer('topic2put', bootstrap_servers='10.0.0.5:32777') for msg in consumer: ts = time.time() st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S') print(st + ':' + msg.value) session.execute( "insert into kafka.telemetry (topic, event_time,valore) values('topic2put',toTimestamp(now()),'" + msg.value + "') using ttl 20;")
from cassandra.auth import PlainTextAuthProvider from cassandra.cluster import Cluster from cassandra.query import BatchStatement cluster = Cluster(auth_provider=PlainTextAuthProvider(username='******', password='******')) connection = cluster.connect() connection.execute(''' CREATE KEYSPACE IF NOT EXISTS lecture_vcs WITH replication = { 'class': 'SimpleStrategy', 'replication_factor': '1' }; ''') batch_list = [ ''' UPDATE lecture_vcs.users SET data = {"role": 'teacher', "date_registered": '2019-10-10'}, email = '*****@*****.**' WHERE user_id = 1; ''', ''' UPDATE lecture_vcs.groups SET user_email = '*****@*****.**' WHERE group_id = 2 and group_name = 'km-62' and user_id = 1; ''' ] def execute_batch(statement_list):
def test_heart_beat_timeout(self): """ Test to ensure the hosts are marked as down after a OTO is received. Also to ensure this happens within the expected timeout @since 3.10 @jira_ticket PYTHON-762 @expected_result all the hosts have been marked as down at some point @test_category metadata """ number_of_dcs = 3 nodes_per_dc = 100 query_to_prime = "INSERT INTO test3rf.test (k, v) VALUES (0, 1);" idle_heartbeat_timeout = 5 idle_heartbeat_interval = 1 start_and_prime_cluster_defaults(number_of_dcs, nodes_per_dc, CASSANDRA_VERSION) self.addCleanup(stop_simulacron) listener = TrackDownListener() executor = ThreadTracker(max_workers=16) # We need to disable compression since it's not supported in simulacron cluster = Cluster( compression=False, idle_heartbeat_interval=idle_heartbeat_interval, idle_heartbeat_timeout=idle_heartbeat_timeout, executor_threads=16, execution_profiles={ EXEC_PROFILE_DEFAULT: ExecutionProfile(load_balancing_policy=RoundRobinPolicy()) }) self.addCleanup(cluster.shutdown) cluster.scheduler.shutdown() cluster.executor = executor cluster.scheduler = _Scheduler(executor) session = cluster.connect(wait_for_all_pools=True) cluster.register_listener(listener) log = logging.getLogger() log.setLevel('CRITICAL') self.addCleanup(log.setLevel, "DEBUG") prime_query(query_to_prime, then=NO_THEN) futures = [] for _ in range(number_of_dcs * nodes_per_dc): future = session.execute_async(query_to_prime) futures.append(future) for f in futures: f._event.wait() self.assertIsInstance(f._final_exception, OperationTimedOut) prime_request(PrimeOptions(then=NO_THEN)) # We allow from some extra time for all the hosts to be to on_down # The callbacks should start happening after idle_heartbeat_timeout + idle_heartbeat_interval time.sleep((idle_heartbeat_timeout + idle_heartbeat_interval) * 2) for host in cluster.metadata.all_hosts(): self.assertIn(host, listener.hosts_marked_down) # In this case HostConnection._replace shouldn't be called self.assertNotIn("_replace", executor.called_functions)
def addToCassandra(mean,movie): cluster = Cluster() session = cluster.connect('movie_reviews') session.execute("CREATE TABLE IF NOT EXISTS movie_reviews.movie_sentiment ( movie_name text, sentiment int, PRIMARY KEY (movie_name,sentiment) );") session.execute("INSERT INTO movie_reviews.movie_sentiment (movie_name,sentiment) VALUES (%s,%s);", (movie,int(mean)))
def monthRetrieve( startTime, endTime=datetime.datetime.today(), fields1=[ 'trade_status', 'close', 'mkt_freeshares', 'mkt_cap_float', 'mfd_buyamt_d', 'mfd_sellamt_d', 'roa', 'pe', 'pb' ], option1="ruleType=8;unit=1;traderType=1;Period=M;Fill=Previous;PriceAdj=B", multi_mfd=True): # cassandra connect cluster = Cluster(['192.168.1.111']) session = cluster.connect('factors') # factors: factors_month # 启动Wind API w.start() # 获取可交易日 times = w.tdays(startTime, endTime, "Period=M").Times timeList = [] for i in range(len(times)): row = str(times[i]) row = row[:row.find(' ')] timeList.append(row) print(timeList) # # 【解耦:迁移至stock.py,定期更新】判断数据有效性 # 获取某个月份所有可交易的A股 (如此的话每次一支股票只拿一个数据,分多个时间点去拿,请求数目过多,改成批量拉取一支股票 # 所有因子 # stocks = w.wset("SectorConstituent", u"sector=全部A股;field=wind_code") # validStocks ={} # # Total stock: 3183 [2017-04-13] # print (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), "Total A stocks number: ", len(stocks.Data[0])) # # stock status update statement # updateStmt = session.prepare('''INSERT INTO stock_info(stock, ipo_date, trade_status) VALUES (?,?,?)''') # # #for stock in ["000852.SZ","603788.SH","603987.SH","603988.SH","603989.SH","603990.SH","603991.SH","603993.SH"]: # #for stock in ["000852.SZ","603788.SH","603990.SH","603991.SH","603993.SH"]: # for stock in stocks.Data[0]: # ipo_status = w.wsd(stock, "ipo_date, trade_status", datetime.datetime.today()) # #print (ipo_status) # try: # days = (datetime.datetime.today() - ipo_status.Data[0][0]).days # # trade_status 不能用一个变量表示,而是一个时序的因子,这里的0/1只能用区分IPO是否符合要求 # if days > 90 and ipo_status.Data[1][0] == "交易": # # if days > 90: # validStocks[stock] = ipo_status.Data[1][0] # session.execute(updateStmt, (stock, ipo_status.Data[0][0], '1')) # else: # # set status 0 # session.execute(updateStmt, (stock, ipo_status.Data[0][0], '0')) # print (" Set invalid data: ", stock, str(ipo_status.Data[0][0])) # except TypeError: # print (" -- Log TypeError at Stock: ", stock, " :\t", str(ipo_status.Data[0][0])) # Valid: 2819 [2017-04-13] # tradable stocks' collection rows = session.execute( '''SELECT stock, ipo_date FROM stock_info WHERE trade_status = '1' ALLOW FILTERING ''' ) validStocks = {} validStockCode = [] for row in rows: validStocks[row.stock] = row.ipo_date validStockCode.append(row.stock) validN = len(validStocks) print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), " valid stocks' number: ", validN) #print (validStocks) ## 拉取因子,分阶段拉取,拉完异步存DB if multi_mfd == True: columns = fields1 + [ 'mfd_buyamt_d2', 'mfd_sellamt_d2', 'mfd_buyamt_d4', 'mfd_sellamt_d4' ] else: columns = fields1 dataList = [] #创建数组 cnt = 0 #当前拉取了多少支股票 index = 0 #上一次dump的位置,主要目的是通过此索引找到该股票代码 CHUNK_SIZE = 300 #每一次异步dump的股票个数 preparedStmt = session.prepare( '''INSERT INTO factors_month(stock, factor, time, value) VALUES (?,?,?,?)''' ) print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), " ------ Starting to insert to DB") # 拉取交易状态便于之后数据过滤 hasTradeStatus = False if len(fields1) > 1 and fields1[0] == 'trade_status': hasTradeStatus = True ## 遍历所有股票 for stock, ipo_date in validStocks.items(): # 只取 IPO 之后的数据【需求变更,IPO之前的ROA也可能是有用的数据】 # start = startTime if startTime > ipo_date.date() else ipo_date.date() start = startTime # 同一个变量,参数不一样,需要分成几次拉取 wsd_data = w.wsd(stock, fields1, start, endTime, option1).Data if multi_mfd == True: fields2 = ['mfd_buyamt_d', 'mfd_sellamt_d'] option2 = "unit=1;traderType=2;Period=M;Fill=Previous;PriceAdj=B" wsd_data = wsd_data + w.wsd(stock, fields2, start, endTime, option2).Data option3 = "unit=1;traderType=4;Period=M;Fill=Previous;PriceAdj=B" wsd_data = wsd_data + w.wsd(stock, fields2, start, endTime, option3).Data ##【修改:计算动量模块单独移出来,为可扩展性】mmt = close_1 / close_2; 没有数据增长率为0 # mmt = [] # mmt.append(1) # for i in range(1, len(wsd_data[0])): # if wsd_data[0][i] is not None and wsd_data[0][i] != 0: # mmt.append(wsd_data[0][i] / wsd_data[0][i-1]) # else: # mmt.append(float('nan')) # wsd_data.append(mmt) dataList.append(wsd_data) cnt += 1 #阶段性异步导出 dump data asynchronously, 300 stocks / round if cnt % CHUNK_SIZE == 0: for s in range(index, cnt): for i in range(len(columns)): for j in range(len(dataList[s - index][i])): #print (validStocks[s],columns[i],timeList[j],dataList[s - index][i][j]) try: value = dataList[s - index][i][j] if hasTradeStatus == True and i == 0: # 交易 状态作为一个因子 if value is not None and value == "交易": value = 1 else: value = 0 elif value is not None: value = float(value) else: value = float('nan') except (ValueError, TypeError, KeyError) as e: value = float('nan') print("--Log ValueError in ", validStockCode[s], "\t", columns[i], "\t", str(timeList[j]), "\t", str(value)) print(e) print( "--------------------------------------------------------------------------" ) except IndexError as e: print( "--------------------------------------------------------------------------" ) print( "len s: %d, len i: %d, len j: %d ~ " % (cnt, len(columns), len(timeList)), (s - index, i, j)) print(e) session.execute_async(preparedStmt, (validStockCode[s], columns[i], timeList[j], value)) #记录上一次导出数据位置,清空buffer index = cnt dataList = [] print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), '------ Dump NO.%d end at stock %s \n' % (cnt, stock)) print("---- Last chunk size: ", len(dataList)) print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), '---------------- Pulling finished!\n') # 最后的剩余数据插入cassandra for s in range(index, cnt): for i in range(len(columns)): for j in range(len(dataList[s - index][i])): #print (validStocks[s],columns[i],timeList[j],dataList[s - index][i][j]) try: value = dataList[s - index][i][j] if hasTradeStatus == True and i == 0: if value is not None and value == "交易": value = 1 else: value = 0 elif value is not None: value = float(value) else: value = float('nan') except (ValueError, TypeError, KeyError) as e: value = float('nan') print("--Log ValueError in ", validStockCode[s], "\t", columns[i], "\t", str(timeList[j]), "\t", str(value)) print(e) print( "--------------------------------------------------------------------------" ) except IndexError as e: print( "--------------------------------------------------------------------------" ) print( "len s: %d, len i: %d, len j: %d ~ " % (cnt, len(columns), len(timeList)), (s - index, i, j)) print(e) session.execute_async( preparedStmt, (validStockCode[s], columns[i], timeList[j], value)) print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), '---------------- Persistion finished!\n') #result testing print("---------- Inserstion Testing: ") rows = session.execute( "select * from factors_month where stock='000852.SZ' and factor in ('roa', 'trade_status') and time > '2017-01-02'" ) for row in rows: print(row.stock, row.factor, row.time, row.value) # close connection with cassandra cluster.shutdown()
class HeartbeatTest(unittest.TestCase): """ Test to validate failing a heartbeat check doesn't mark a host as down @since 3.3 @jira_ticket PYTHON-286 @expected_result host should not be marked down when heartbeat fails @test_category connection heartbeat """ def setUp(self): self.cluster = Cluster(protocol_version=PROTOCOL_VERSION, idle_heartbeat_interval=1) self.session = self.cluster.connect(wait_for_all_pools=True) def tearDown(self): self.cluster.shutdown() @local def test_heart_beat_timeout(self): # Setup a host listener to ensure the nodes don't go down test_listener = TestHostListener() host = "127.0.0.1" node = get_node(1) initial_connections = self.fetch_connections(host, self.cluster) self.assertNotEqual(len(initial_connections), 0) self.cluster.register_listener(test_listener) # Pause the node try: node.pause() # Wait for connections associated with this host go away self.wait_for_no_connections(host, self.cluster) # Resume paused node finally: node.resume() # Run a query to ensure connections are re-established current_host = "" count = 0 while current_host != host and count < 100: rs = self.session.execute_async("SELECT * FROM system.local", trace=False) rs.result() current_host = str(rs._current_host) count += 1 time.sleep(.1) self.assertLess(count, 100, "Never connected to the first node") new_connections = self.wait_for_connections(host, self.cluster) self.assertIsNone(test_listener.host_down) # Make sure underlying new connections don't match previous ones for connection in initial_connections: self.assertFalse(connection in new_connections) def fetch_connections(self, host, cluster): # Given a cluster object and host grab all connection associated with that host connections = [] holders = cluster.get_connection_holders() for conn in holders: if host == str(getattr(conn, 'host', '')): if isinstance(conn, HostConnectionPool): if conn._connections is not None and len(conn._connections) > 0: connections.append(conn._connections) else: if conn._connection is not None: connections.append(conn._connection) return connections def wait_for_connections(self, host, cluster): retry = 0 while(retry < 300): retry += 1 connections = self.fetch_connections(host, cluster) if len(connections) is not 0: return connections time.sleep(.1) self.fail("No new connections found") def wait_for_no_connections(self, host, cluster): retry = 0 while(retry < 100): retry += 1 connections = self.fetch_connections(host, cluster) if len(connections) is 0: return time.sleep(.5) self.fail("Connections never cleared")
import csv from cassandra.cluster import Cluster # Write sentiment score to Cassandra cluster = Cluster(['169.53.141.8']) session = cluster.connect('msd_01') # Open the csv file with tracks, sentiment score with open('sentiment.csv', 'rb') as csvfile: reader = csv.reader(csvfile, delimiter=',') numRows = 0 for csvrow in reader: if numRows % 5000 == 0: print numRows track_id = csvrow[0] sentiment = csvrow[1] # Write a query to update columns in the database update_query = """update songs set sentiment = %s where track_id = '%s'""" % (sentiment, track_id) session.execute(update_query) numRows += 1 cluster.shutdown()
import mysql.connector from cassandra.cluster import Cluster cluster = Cluster(['34.196.59.158'], port=9042) session = cluster.connect('zubat') cnx = mysql.connector.connect( user='******', password='******', host='sensedb.cenacuetgbbz.us-east-1.rds.amazonaws.com', database='sense2') cursor = cnx.cursor() insert_company_table = ("INSERT INTO company(name,type) VALUES (%s,%s) ") insert_deploy_table = ( "INSERT INTO deploy(company_id, name,date_time_start,date_time_end,street_address,zip,city,state,internal_id) VALUES(%s,%s, %s,%s,%s,%s,%s,%s,%s) " ) insert_deploy_data_sense = ( "INSERT INTO deploy_data_sense(deploy_id, sense_time, week_days, holiday) VALUES (%s, %s, %s, %s)" ) insert_sensor_table = ( "INSERT INTO sensor(id, deploy_id, register_date, last_update, model, version) VALUES (%s, %s, %s, %s, %s, %s) " ) insert_sensor_data_sense_table = ( "INSERT INTO sensor_data_sense(sensor_id, name) VALUES ( %s, %s )") insert_whitelist_table = ( "INSERT INTO whitelist_sense(mac_address, deploy_id) VALUES ( %s, %s)") insert_whitelist_person_data_table = ( "INSERT INTO whitelist_person_data_sense(whitelist_id, keyword, content) VALUES (%s, %s, %s)" )
def main(): cluster = Cluster(['127.0.0.1'], port=9042) session = cluster.connect() log.info("creating keyspace...") session.execute(""" CREATE KEYSPACE IF NOT EXISTS %s WITH replication = { 'class': 'SimpleStrategy', 'replication_factor': '2' } """ % KEYSPACE) log.info("setting keyspace...") session.set_keyspace(KEYSPACE) session.execute("""DROP TABLE mytable""") log.info("creating table...") session.execute(""" CREATE TABLE IF NOT EXISTS mytable ( event_number text, date_time text, address_rounded_to_block_number_or_intersection text, patrol_beat text, incident_type text, incident_type_description text, priority int, time time, hour text, priority_hour text, PRIMARY KEY (event_number) ) """) query = SimpleStatement(""" INSERT INTO mytable (event_number, date_time, address_rounded_to_block_number_or_intersection, patrol_beat, incident_type, incident_type_description, priority, time, hour, priority_hour) VALUES (%(event_number)s, %(date_time)s, %(address_rounded_to_block_number_or_intersection)s, %(patrol_beat)s, %(incident_type)s, %(incident_type_description)s, %(priority)s, %(time)s, %(hour)s, %(priority_hour)s) """, consistency_level=ConsistencyLevel.ONE) prepared = session.prepare(""" INSERT INTO mytable (event_number, date_time, address_rounded_to_block_number_or_intersection, patrol_beat, incident_type, incident_type_description, priority, time, hour, priority_hour) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """) import pandas as pd data = pd.read_csv('oak-crimes-for-cassandra.csv').dropna().sample(10000, random_state=42) from tqdm import tqdm for i, row in tqdm(data.iterrows()): # log.info("inserting row %d" % i) # log.info(tuple(row)) # session.execute(query, dict(key="key%d" % i, a='a', b='b')) session.execute(prepared, tuple(row)) future = session.execute_async("SELECT * FROM mytable") # log.info("key\tcol1\tcol2") # log.info("---\t----\t----") try: rows = future.result() except Exception: log.exception("Error reading rows:") return for row in rows: log.info(row)
#creates a given number of threads (pass by argument) and assigns an equal portion of the workload to each one #the main thread waits actively until they finish from cassandra.cluster import Cluster import threading import sys import itertools cluster = Cluster() cluster = Cluster(["minerva-5"]) session = cluster.connect("case18") query = "SELECT * FROM case18.particle WHERE partid=?" prepared = session.prepare(query) num_keys = 10000 max_parallelism = int(sys.argv[1]) count = itertools.count() #starts at 0 finished = False def call(keys): global finished for k in keys: result = session.execute(prepared, [k]) # next returns value and increments subsequently if count.next() == max_parallelism - 1: finished = True ths = []
from cassandra.auth import PlainTextAuthProvider from cassandra.cluster import Cluster from cassandra.query import SimpleStatement import config """ Find the number of speeds > 100 in the data set. """ ap = PlainTextAuthProvider(username=config.username, password=config.password) node_ips = config.hosts cluster = Cluster(node_ips, protocol_version=4, auth_provider=ap, port=config.port) session = cluster.connect('part_3_version_0') query = 'SELECT speed FROM loopdata_by_detector' statement = SimpleStatement(query, fetch_size=5000) count = 0 for row in session.execute(statement): if isinstance(row.speed, int) and row.speed > 100: count += 1 print("\nNumber of speeds > 100: " + str(count) + "\n") cluster.shutdown()
from kafka import KafkaConsumer import json from cassandra.cluster import Cluster address = 'localhost' cluster = Cluster([address]) session = cluster.connect("emoji") consumer = KafkaConsumer('France', bootstrap_servers=['localhost:9092']) for message in consumer: print("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition, message.offset, message.key, message.value)) session.execute("INSERT INTO pays (nom_pays, emojis) VALUES (" + message.topic + ", ['" + message.value + "']);")