def test_driver_recovers_nework_isolation(self): start_and_prime_singledc() idle_heartbeat_timeout = 3 idle_heartbeat_interval = 1 listener = TrackDownListener() cluster = Cluster(['127.0.0.1'], load_balancing_policy=RoundRobinPolicy(), idle_heartbeat_timeout=idle_heartbeat_timeout, idle_heartbeat_interval=idle_heartbeat_interval, executor_threads=16) session = cluster.connect(wait_for_all_pools=True) cluster.register_listener(listener) prime_request(PrimeOptions(then=NO_THEN)) prime_request(RejectConnections(RejectType.REJECT_STARTUP)) time.sleep((idle_heartbeat_timeout + idle_heartbeat_interval) * 2) for host in cluster.metadata.all_hosts(): self.assertIn(host, listener.hosts_marked_down) self.assertRaises(NoHostAvailable, session.execute, "SELECT * from system.local") clear_queries() prime_request(AcceptConnections()) time.sleep(idle_heartbeat_timeout + idle_heartbeat_interval + 2) self.assertIsNotNone(session.execute("SELECT * from system.local"))
def test_retry_after_defunct(self): """ We test cluster._retry is called if an the connection is defunct in the middle of a query Finally we verify the driver recovers correctly in the event of a network partition @since 3.12 @expected_result the driver is able to query even if a host is marked as down in the middle of the query, it will go to the next one if the timeout hasn't expired @test_category connection """ number_of_dcs = 3 nodes_per_dc = 2 query_to_prime = "INSERT INTO test3rf.test (k, v) VALUES (0, 1);" idle_heartbeat_timeout = 1 idle_heartbeat_interval = 5 simulacron_cluster = start_and_prime_cluster_defaults( number_of_dcs, nodes_per_dc) dc_ids = sorted(simulacron_cluster.data_center_ids) last_host = dc_ids.pop() prime_query(query_to_prime, cluster_name="{}/{}".format( simulacron_cluster.cluster_name, last_host)) roundrobin_lbp = OrderedRoundRobinPolicy() cluster = Cluster( compression=False, idle_heartbeat_interval=idle_heartbeat_interval, idle_heartbeat_timeout=idle_heartbeat_timeout, execution_profiles={ EXEC_PROFILE_DEFAULT: ExecutionProfile(load_balancing_policy=roundrobin_lbp) }) session = cluster.connect(wait_for_all_pools=True) self.addCleanup(cluster.shutdown) # This simulates we only have access to one DC for dc_id in dc_ids: datacenter_path = "{}/{}".format(simulacron_cluster.cluster_name, dc_id) prime_query(query_to_prime, then=NO_THEN, cluster_name=datacenter_path) prime_request( PrimeOptions(then=NO_THEN, cluster_name=datacenter_path)) # Only the last datacenter will respond, therefore the first host won't # We want to make sure the returned hosts are 127.0.0.1, 127.0.0.2, ... 127.0.0.8 roundrobin_lbp._position = 0 # After 3 + 1 seconds the connection should be marked and down and another host retried response_future = session.execute_async( query_to_prime, timeout=4 * idle_heartbeat_interval + idle_heartbeat_timeout) response_future.result() self.assertGreater(len(response_future.attempted_hosts), 1) # No error should be raised here since the hosts have been marked # as down and there's still 1 DC available for _ in range(10): session.execute(query_to_prime) # Might take some time to close the previous connections and reconnect time.sleep(10) assert_quiescent_pool_state(self, cluster) clear_queries() time.sleep(10) assert_quiescent_pool_state(self, cluster)
def tearDown(self): clear_queries()
def tearDown(self): clear_queries() stop_simulacron()
def test_retry_after_defunct(self): """ We test cluster._retry is called if an the connection is defunct in the middle of a query Finally we verify the driver recovers correctly in the event of a network partition @since 3.12 @expected_result the driver is able to query even if a host is marked as down in the middle of the query, it will go to the next one if the timeout hasn't expired @test_category connection """ number_of_dcs = 3 nodes_per_dc = 2 query_to_prime = "INSERT INTO test3rf.test (k, v) VALUES (0, 1);" idle_heartbeat_timeout = 1 idle_heartbeat_interval = 5 simulacron_cluster = start_and_prime_cluster_defaults(number_of_dcs, nodes_per_dc) dc_ids = sorted(simulacron_cluster.data_center_ids) last_host = dc_ids.pop() prime_query(query_to_prime, cluster_name="{}/{}".format(simulacron_cluster.cluster_name, last_host)) roundrobin_lbp = OrderedRoundRobinPolicy() cluster = Cluster(compression=False, idle_heartbeat_interval=idle_heartbeat_interval, idle_heartbeat_timeout=idle_heartbeat_timeout, execution_profiles={ EXEC_PROFILE_DEFAULT: ExecutionProfile(load_balancing_policy=roundrobin_lbp)}) session = cluster.connect(wait_for_all_pools=True) self.addCleanup(cluster.shutdown) # This simulates we only have access to one DC for dc_id in dc_ids: datacenter_path = "{}/{}".format(simulacron_cluster.cluster_name, dc_id) prime_query(query_to_prime, then=NO_THEN, cluster_name=datacenter_path) prime_request(PrimeOptions(then=NO_THEN, cluster_name=datacenter_path)) # Only the last datacenter will respond, therefore the first host won't # We want to make sure the returned hosts are 127.0.0.1, 127.0.0.2, ... 127.0.0.8 roundrobin_lbp._position = 0 # After 3 + 1 seconds the connection should be marked and down and another host retried response_future = session.execute_async(query_to_prime, timeout=4 * idle_heartbeat_interval + idle_heartbeat_timeout) response_future.result() self.assertGreater(len(response_future.attempted_hosts), 1) # No error should be raised here since the hosts have been marked # as down and there's still 1 DC available for _ in range(10): session.execute(query_to_prime) # Might take some time to close the previous connections and reconnect time.sleep(10) assert_quiescent_pool_state(self, cluster) clear_queries() time.sleep(10) assert_quiescent_pool_state(self, cluster)
def test_retry_policy_on_request_error(self): """ Test to verify that on_request_error is called properly. @since 3.18 @jira_ticket PYTHON-1064 @expected_result the appropriate retry policy is called @test_category connection """ overloaded_error = {"result": "overloaded", "message": "overloaded"} bootstrapping_error = { "result": "is_bootstrapping", "message": "isbootstrapping" } truncate_error = { "result": "truncate_error", "message": "truncate_error" } server_error = {"result": "server_error", "message": "server_error"} # Test the on_request_error call retry_policy = CounterRetryPolicy() self.set_cluster(retry_policy) for prime_error, exc in [(overloaded_error, OverloadedErrorMessage), (bootstrapping_error, IsBootstrappingErrorMessage), (truncate_error, TruncateError), (server_error, ServerError)]: clear_queries() query_to_prime = "SELECT * from simulacron_keyspace.simulacron_table;" prime_query(query_to_prime, then=prime_error, rows=None, column_types=None) rf = self.session.execute_async(query_to_prime) with self.assertRaises(exc): rf.result() self.assertEqual(len(rf.attempted_hosts), 1) # no retry self.assertEqual(next(retry_policy.request_error), 4) # Test that by default, retry on next host retry_policy = RetryPolicy() self.set_cluster(retry_policy) for e in [ overloaded_error, bootstrapping_error, truncate_error, server_error ]: clear_queries() query_to_prime = "SELECT * from simulacron_keyspace.simulacron_table;" prime_query(query_to_prime, then=e, rows=None, column_types=None) rf = self.session.execute_async(query_to_prime) with self.assertRaises(NoHostAvailable): rf.result() self.assertEqual(len(rf.attempted_hosts), 3) # all 3 nodes failed