def setUpClass(cls): if DSE_VERSION is None and SIMULACRON_JAR is None or CASSANDRA_VERSION < Version( "2.1"): return cls.simulacron_cluster = start_and_prime_cluster_defaults( dse_version=DSE_VERSION, nodes_per_dc=cls.nodes_per_dc) if cls.connect: cls.cluster = Cluster(protocol_version=PROTOCOL_VERSION, compression=False) cls.session = cls.cluster.connect(wait_for_all_pools=True)
def test_heart_beat_timeout(self): """ Test to ensure the hosts are marked as down after a OTO is received. Also to ensure this happens within the expected timeout @since 3.10 @jira_ticket PYTHON-762 @expected_result all the hosts have been marked as down at some point @test_category metadata """ number_of_dcs = 3 nodes_per_dc = 20 query_to_prime = "INSERT INTO test3rf.test (k, v) VALUES (0, 1);" idle_heartbeat_timeout = 5 idle_heartbeat_interval = 1 start_and_prime_cluster_defaults(number_of_dcs, nodes_per_dc) listener = TrackDownListener() executor = ThreadTracker(max_workers=8) # We need to disable compression since it's not supported in simulacron cluster = Cluster( compression=False, idle_heartbeat_interval=idle_heartbeat_interval, idle_heartbeat_timeout=idle_heartbeat_timeout, executor_threads=8, execution_profiles={ EXEC_PROFILE_DEFAULT: ExecutionProfile(load_balancing_policy=RoundRobinPolicy()) }) self.addCleanup(cluster.shutdown) cluster.scheduler.shutdown() cluster.executor = executor cluster.scheduler = _Scheduler(executor) session = cluster.connect(wait_for_all_pools=True) cluster.register_listener(listener) log = logging.getLogger() log.setLevel('CRITICAL') self.addCleanup(log.setLevel, "DEBUG") prime_query(query_to_prime, then=NO_THEN) futures = [] for _ in range(number_of_dcs * nodes_per_dc): future = session.execute_async(query_to_prime) futures.append(future) for f in futures: f._event.wait() self.assertIsInstance(f._final_exception, OperationTimedOut) prime_request(PrimeOptions(then=NO_THEN)) # We allow from some extra time for all the hosts to be to on_down # The callbacks should start happening after idle_heartbeat_timeout + idle_heartbeat_interval time.sleep((idle_heartbeat_timeout + idle_heartbeat_interval) * 2.5) for host in cluster.metadata.all_hosts(): self.assertIn(host, listener.hosts_marked_down) # In this case HostConnection._replace shouldn't be called self.assertNotIn("_replace", executor.called_functions)
def test_retry_after_defunct(self): """ We test cluster._retry is called if an the connection is defunct in the middle of a query Finally we verify the driver recovers correctly in the event of a network partition @since 3.12 @expected_result the driver is able to query even if a host is marked as down in the middle of the query, it will go to the next one if the timeout hasn't expired @test_category connection """ number_of_dcs = 3 nodes_per_dc = 2 query_to_prime = "INSERT INTO test3rf.test (k, v) VALUES (0, 1);" idle_heartbeat_timeout = 1 idle_heartbeat_interval = 5 simulacron_cluster = start_and_prime_cluster_defaults( number_of_dcs, nodes_per_dc) dc_ids = sorted(simulacron_cluster.data_center_ids) last_host = dc_ids.pop() prime_query(query_to_prime, cluster_name="{}/{}".format( simulacron_cluster.cluster_name, last_host)) roundrobin_lbp = OrderedRoundRobinPolicy() cluster = Cluster( compression=False, idle_heartbeat_interval=idle_heartbeat_interval, idle_heartbeat_timeout=idle_heartbeat_timeout, execution_profiles={ EXEC_PROFILE_DEFAULT: ExecutionProfile(load_balancing_policy=roundrobin_lbp) }) session = cluster.connect(wait_for_all_pools=True) self.addCleanup(cluster.shutdown) # This simulates we only have access to one DC for dc_id in dc_ids: datacenter_path = "{}/{}".format(simulacron_cluster.cluster_name, dc_id) prime_query(query_to_prime, then=NO_THEN, cluster_name=datacenter_path) prime_request( PrimeOptions(then=NO_THEN, cluster_name=datacenter_path)) # Only the last datacenter will respond, therefore the first host won't # We want to make sure the returned hosts are 127.0.0.1, 127.0.0.2, ... 127.0.0.8 roundrobin_lbp._position = 0 # After 3 + 1 seconds the connection should be marked and down and another host retried response_future = session.execute_async( query_to_prime, timeout=4 * idle_heartbeat_interval + idle_heartbeat_timeout) response_future.result() self.assertGreater(len(response_future.attempted_hosts), 1) # No error should be raised here since the hosts have been marked # as down and there's still 1 DC available for _ in range(10): session.execute(query_to_prime) # Might take some time to close the previous connections and reconnect time.sleep(10) assert_quiescent_pool_state(self, cluster) clear_queries() time.sleep(10) assert_quiescent_pool_state(self, cluster)
def test_heart_beat_timeout(self): """ Test to ensure the hosts are marked as down after a OTO is received. Also to ensure this happens within the expected timeout @since 3.10 @jira_ticket PYTHON-762 @expected_result all the hosts have been marked as down at some point @test_category metadata """ number_of_dcs = 3 nodes_per_dc = 20 query_to_prime = "INSERT INTO test3rf.test (k, v) VALUES (0, 1);" idle_heartbeat_timeout = 5 idle_heartbeat_interval = 1 start_and_prime_cluster_defaults(number_of_dcs, nodes_per_dc) listener = TrackDownListener() executor = ThreadTracker(max_workers=8) # We need to disable compression since it's not supported in simulacron cluster = Cluster(compression=False, idle_heartbeat_interval=idle_heartbeat_interval, idle_heartbeat_timeout=idle_heartbeat_timeout, executor_threads=8, execution_profiles={ EXEC_PROFILE_DEFAULT: ExecutionProfile(load_balancing_policy=RoundRobinPolicy())}) self.addCleanup(cluster.shutdown) cluster.scheduler.shutdown() cluster.executor = executor cluster.scheduler = _Scheduler(executor) session = cluster.connect(wait_for_all_pools=True) cluster.register_listener(listener) log = logging.getLogger() log.setLevel('CRITICAL') self.addCleanup(log.setLevel, "DEBUG") prime_query(query_to_prime, then=NO_THEN) futures = [] for _ in range(number_of_dcs * nodes_per_dc): future = session.execute_async(query_to_prime) futures.append(future) for f in futures: f._event.wait() self.assertIsInstance(f._final_exception, OperationTimedOut) prime_request(PrimeOptions(then=NO_THEN)) # We allow from some extra time for all the hosts to be to on_down # The callbacks should start happening after idle_heartbeat_timeout + idle_heartbeat_interval time.sleep((idle_heartbeat_timeout + idle_heartbeat_interval) * 2.5) for host in cluster.metadata.all_hosts(): self.assertIn(host, listener.hosts_marked_down) # In this case HostConnection._replace shouldn't be called self.assertNotIn("_replace", executor.called_functions)
def test_retry_after_defunct(self): """ We test cluster._retry is called if an the connection is defunct in the middle of a query Finally we verify the driver recovers correctly in the event of a network partition @since 3.12 @expected_result the driver is able to query even if a host is marked as down in the middle of the query, it will go to the next one if the timeout hasn't expired @test_category connection """ number_of_dcs = 3 nodes_per_dc = 2 query_to_prime = "INSERT INTO test3rf.test (k, v) VALUES (0, 1);" idle_heartbeat_timeout = 1 idle_heartbeat_interval = 5 simulacron_cluster = start_and_prime_cluster_defaults(number_of_dcs, nodes_per_dc) dc_ids = sorted(simulacron_cluster.data_center_ids) last_host = dc_ids.pop() prime_query(query_to_prime, cluster_name="{}/{}".format(simulacron_cluster.cluster_name, last_host)) roundrobin_lbp = OrderedRoundRobinPolicy() cluster = Cluster(compression=False, idle_heartbeat_interval=idle_heartbeat_interval, idle_heartbeat_timeout=idle_heartbeat_timeout, execution_profiles={ EXEC_PROFILE_DEFAULT: ExecutionProfile(load_balancing_policy=roundrobin_lbp)}) session = cluster.connect(wait_for_all_pools=True) self.addCleanup(cluster.shutdown) # This simulates we only have access to one DC for dc_id in dc_ids: datacenter_path = "{}/{}".format(simulacron_cluster.cluster_name, dc_id) prime_query(query_to_prime, then=NO_THEN, cluster_name=datacenter_path) prime_request(PrimeOptions(then=NO_THEN, cluster_name=datacenter_path)) # Only the last datacenter will respond, therefore the first host won't # We want to make sure the returned hosts are 127.0.0.1, 127.0.0.2, ... 127.0.0.8 roundrobin_lbp._position = 0 # After 3 + 1 seconds the connection should be marked and down and another host retried response_future = session.execute_async(query_to_prime, timeout=4 * idle_heartbeat_interval + idle_heartbeat_timeout) response_future.result() self.assertGreater(len(response_future.attempted_hosts), 1) # No error should be raised here since the hosts have been marked # as down and there's still 1 DC available for _ in range(10): session.execute(query_to_prime) # Might take some time to close the previous connections and reconnect time.sleep(10) assert_quiescent_pool_state(self, cluster) clear_queries() time.sleep(10) assert_quiescent_pool_state(self, cluster)