def test_host_is_not_set_to_down_after_query_oto(self): """ Test to ensure that the connections aren't closed if there's an OperationTimedOut in a normal query. This should only happen from the heart beat thread (in the case of a OperationTimedOut) with the default configuration @since 3.12 @expected_result the connections aren't closed nor the hosts are set to down @test_category connection """ start_and_prime_singledc() query_to_prime = "SELECT * FROM madeup_keyspace.madeup_table" prime_query(query_to_prime, then=NO_THEN) listener = TrackDownListener() cluster = Cluster(compression=False) session = cluster.connect(wait_for_all_pools=True) cluster.register_listener(listener) futures = [] for _ in range(10): future = session.execute_async(query_to_prime) futures.append(future) for f in futures: f._event.wait() self.assertIsInstance(f._final_exception, OperationTimedOut) self.assertEqual(listener.hosts_marked_down, []) assert_quiescent_pool_state(self, cluster)
def test_retry_policy_ignores_and_rethrows(self): """ Test to verify :class:`~cassandra.protocol.WriteTimeoutErrorMessage` is decoded correctly and that :attr:`.~cassandra.policies.RetryPolicy.RETHROW` and :attr:`.~cassandra.policies.RetryPolicy.IGNORE` are respected to localhost @since 3.12 @jira_ticket PYTHON-812 @expected_result the retry policy functions as expected @test_category connection """ query_to_prime_simple = "SELECT * from simulacron_keyspace.simple" query_to_prime_cdc = "SELECT * from simulacron_keyspace.cdc" then = { "result": "write_timeout", "delay_in_ms": 0, "consistency_level": "LOCAL_QUORUM", "received": 1, "block_for": 2, "write_type": "SIMPLE" } prime_query(query_to_prime_simple, then=then) then["write_type"] = "CDC" prime_query(query_to_prime_cdc, then=then) with self.assertRaises(WriteTimeout): self.session.execute(query_to_prime_simple) #CDC should be ignored self.session.execute(query_to_prime_cdc)
def test_writetimeout(self): write_type = "UNLOGGED_BATCH" consistency = "LOCAL_QUORUM" received_responses = 1 required_responses = 4 query_to_prime_simple = "SELECT * from simulacron_keyspace.simple" then = { "result": "write_timeout", "delay_in_ms": 0, "consistency_level": consistency, "received": received_responses, "block_for": required_responses, "write_type": write_type, "ignore_on_prepare": True } prime_query(query_to_prime_simple, then=then, rows=None, column_types=None) with self.assertRaises(WriteTimeout) as assert_raised_context: self.session.execute(query_to_prime_simple) wt = assert_raised_context.exception self.assertEqual(wt.write_type, WriteType.name_to_value[write_type]) self.assertEqual(wt.consistency, ConsistencyLevel.name_to_value[consistency]) self.assertEqual(wt.received_responses, received_responses) self.assertEqual(wt.required_responses, required_responses) self.assertIn(write_type, str(wt)) self.assertIn(consistency, str(wt)) self.assertIn(str(received_responses), str(wt)) self.assertIn(str(required_responses), str(wt))
def test_speculative_and_timeout(self): """ Test to ensure the timeout is honored when using speculative execution @since 3.10 @jira_ticket PYTHON-750 @expected_result speculative retries be schedule every fixed period, during the maximum period of the timeout. @test_category metadata """ query_to_prime = "INSERT INTO testkeyspace.testtable (k, v) VALUES (0, 1);" prime_query(query_to_prime, then=NO_THEN) statement = SimpleStatement(query_to_prime, is_idempotent=True) # An OperationTimedOut is placed here in response_future, # that's why we can't call session.execute,which would raise it, but # we have to directly wait for the event response_future = self.session.execute_async( statement, execution_profile='spec_ep_brr_lim', timeout=14) response_future._event.wait(16) self.assertIsInstance(response_future._final_exception, OperationTimedOut) # This is because 14 / 4 + 1 = 4 self.assertEqual(len(response_future.attempted_hosts), 4)
def test_host_is_not_set_to_down_after_query_oto(self): """ Test to ensure that the connections aren't closed if there's an OperationTimedOut in a normal query. This should only happen from the heart beat thread (in the case of a OperationTimedOut) with the default configuration @since 3.12 @expected_result the connections aren't closed nor the hosts are set to down @test_category connection """ start_and_prime_singledc() query_to_prime = "SELECT * FROM madeup_keyspace.madeup_table" prime_query(query_to_prime, then=NO_THEN) listener = TrackDownListener() cluster = Cluster(compression=False) session = cluster.connect(wait_for_all_pools=True) cluster.register_listener(listener) futures = [] for _ in range(10): future = session.execute_async(query_to_prime) futures.append(future) for f in futures: f._event.wait() self.assertIsInstance(f._final_exception, OperationTimedOut) self.assertEqual(listener.hosts_marked_down, []) assert_quiescent_pool_state(self, cluster)
def test_close_when_query(self): """ Test to ensure the driver behaves correctly if the connection is closed just when querying @since 3.12 @expected_result NoHostAvailable is risen @test_category connection """ start_and_prime_singledc() cluster = Cluster(protocol_version=PROTOCOL_VERSION, compression=False) session = cluster.connect() self.addCleanup(cluster.shutdown) query_to_prime = "SELECT * from testkesypace.testtable" for close_type in ("disconnect", "shutdown_read", "shutdown_write"): then = { "result": "close_connection", "delay_in_ms": 0, "close_type": close_type, "scope": "connection" } prime_query(query_to_prime, then=then, rows=None, column_types=None) self.assertRaises(NoHostAvailable, session.execute, query_to_prime)
def test_callbacks_and_pool_when_oto(self): """ Test to ensure the callbacks are correcltly called and the connection is returned when there is an OTO @since 3.12 @jira_ticket PYTHON-630 @expected_result the connection is correctly returned to the pool after an OTO, also the only the errback is called and not the callback when the message finally arrives. @test_category metadata """ start_and_prime_singledc() cluster = Cluster(protocol_version=PROTOCOL_VERSION, compression=False) session = cluster.connect() self.addCleanup(cluster.shutdown) query_to_prime = "SELECT * from testkesypace.testtable" server_delay = 2 # seconds prime_query(query_to_prime, then={"delay_in_ms": server_delay * 1000}) future = session.execute_async(query_to_prime, timeout=1) callback, errback = Mock(name='callback'), Mock(name='errback') future.add_callbacks(callback, errback) self.assertRaises(OperationTimedOut, future.result) assert_quiescent_pool_state(self, cluster) time.sleep(server_delay + 1) # PYTHON-630 -- only the errback should be called errback.assert_called_once() callback.assert_not_called()
def test_retry_policy_ignores_and_rethrows(self): """ Test to verify :class:`~cassandra.protocol.WriteTimeoutErrorMessage` is decoded correctly and that :attr:`.~cassandra.policies.RetryPolicy.RETHROW` and :attr:`.~cassandra.policies.RetryPolicy.IGNORE` are respected to localhost @since 3.12 @jira_ticket PYTHON-812 @expected_result the retry policy functions as expected @test_category connection """ self.set_cluster(CustomRetryPolicy()) query_to_prime_simple = "SELECT * from simulacron_keyspace.simple" query_to_prime_cdc = "SELECT * from simulacron_keyspace.cdc" then = { "result": "write_timeout", "delay_in_ms": 0, "consistency_level": "LOCAL_QUORUM", "received": 1, "block_for": 2, "write_type": "SIMPLE", "ignore_on_prepare": True } prime_query(query_to_prime_simple, then=then, rows=None, column_types=None) then["write_type"] = "CDC" prime_query(query_to_prime_cdc, then=then, rows=None, column_types=None) with self.assertRaises(WriteTimeout): self.session.execute(query_to_prime_simple) #CDC should be ignored self.session.execute(query_to_prime_cdc)
def test_callbacks_and_pool_when_oto(self): """ Test to ensure the callbacks are correcltly called and the connection is returned when there is an OTO @since 3.12 @jira_ticket PYTHON-630 @expected_result the connection is correctly returned to the pool after an OTO, also the only the errback is called and not the callback when the message finally arrives. @test_category metadata """ start_and_prime_singledc() cluster = Cluster(protocol_version=PROTOCOL_VERSION, compression=False) session = cluster.connect() self.addCleanup(cluster.shutdown) query_to_prime = "SELECT * from testkesypace.testtable" server_delay = 2 # seconds prime_query(query_to_prime, then={"delay_in_ms": server_delay * 1000}) future = session.execute_async(query_to_prime, timeout=1) callback, errback = Mock(name='callback'), Mock(name='errback') future.add_callbacks(callback, errback) self.assertRaises(OperationTimedOut, future.result) assert_quiescent_pool_state(self, cluster) time.sleep(server_delay + 1) # PYTHON-630 -- only the errback should be called errback.assert_called_once() callback.assert_not_called()
def test_close_when_query(self): """ Test to ensure the driver behaves correctly if the connection is closed just when querying @since 3.12 @expected_result NoHostAvailable is risen @test_category connection """ start_and_prime_singledc() cluster = Cluster(protocol_version=PROTOCOL_VERSION, compression=False) session = cluster.connect() self.addCleanup(cluster.shutdown) query_to_prime = "SELECT * from testkesypace.testtable" for close_type in ("disconnect", "shutdown_read", "shutdown_write"): then = { "result": "close_connection", "delay_in_ms": 0, "close_type": close_type, "scope": "connection" } prime_query(query_to_prime, then=then, rows=None, column_types=None) self.assertRaises(NoHostAvailable, session.execute, query_to_prime)
def test_speculative_and_timeout(self): """ Test to ensure the timeout is honored when using speculative execution @since 3.10 @jira_ticket PYTHON-750 @expected_result speculative retries be schedule every fixed period, during the maximum period of the timeout. @test_category metadata """ query_to_prime = "INSERT INTO testkeyspace.testtable (k, v) VALUES (0, 1);" prime_query(query_to_prime, then=NO_THEN) statement = SimpleStatement(query_to_prime, is_idempotent=True) # An OperationTimedOut is placed here in response_future, # that's why we can't call session.execute,which would raise it, but # we have to directly wait for the event response_future = self.session.execute_async(statement, execution_profile='spec_ep_brr_lim', timeout=14) response_future._event.wait(16) self.assertIsInstance(response_future._final_exception, OperationTimedOut) # This is because 14 / 4 + 1 = 4 self.assertEqual(len(response_future.attempted_hosts), 4)
def test_writetimeout(self): write_type = "UNLOGGED_BATCH" consistency = "LOCAL_QUORUM" received_responses = 1 required_responses = 4 query_to_prime_simple = "SELECT * from simulacron_keyspace.simple" then = { "result": "write_timeout", "delay_in_ms": 0, "consistency_level": consistency, "received": received_responses, "block_for": required_responses, "write_type": write_type, "ignore_on_prepare": True } prime_query(query_to_prime_simple, then=then, rows=None, column_types=None) with self.assertRaises(WriteTimeout) as assert_raised_context: self.session.execute(query_to_prime_simple) wt = assert_raised_context.exception self.assertEqual(wt.write_type, WriteType.name_to_value[write_type]) self.assertEqual(wt.consistency, ConsistencyLevel.name_to_value[consistency]) self.assertEqual(wt.received_responses, received_responses) self.assertEqual(wt.required_responses, required_responses) self.assertIn(write_type, str(wt)) self.assertIn(consistency, str(wt)) self.assertIn(str(received_responses), str(wt)) self.assertIn(str(required_responses), str(wt))
def test_retry_policy_on_request_error(self): """ Test to verify that on_request_error is called properly. @since 3.18 @jira_ticket PYTHON-1064 @expected_result the appropriate retry policy is called @test_category connection """ overloaded_error = { "result": "overloaded", "message": "overloaded" } bootstrapping_error = { "result": "is_bootstrapping", "message": "isbootstrapping" } truncate_error = { "result": "truncate_error", "message": "truncate_error" } server_error = { "result": "server_error", "message": "server_error" } # Test the on_request_error call retry_policy = CounterRetryPolicy() self.set_cluster(retry_policy) for e in [overloaded_error, bootstrapping_error, truncate_error, server_error]: query_to_prime = "SELECT * from simulacron_keyspace.simulacron_table;" prime_query(query_to_prime, then=e, rows=None, column_types=None) rf = self.session.execute_async(query_to_prime) try: rf.result() except: pass self.assertEqual(len(rf.attempted_hosts), 1) # no retry self.assertEqual(next(retry_policy.request_error), 4) # Test that by default, retry on next host retry_policy = RetryPolicy() self.set_cluster(retry_policy) for e in [overloaded_error, bootstrapping_error, truncate_error, server_error]: query_to_prime = "SELECT * from simulacron_keyspace.simulacron_table;" prime_query(query_to_prime, then=e, rows=None, column_types=None) rf = self.session.execute_async(query_to_prime) try: rf.result() except: pass self.assertEqual(len(rf.attempted_hosts), 3) # all 3 nodes failed
def test_retry_policy_on_request_error(self): """ Test to verify that on_request_error is called properly. @since 3.18 @jira_ticket PYTHON-1064 @expected_result the appropriate retry policy is called @test_category connection """ overloaded_error = {"result": "overloaded", "message": "overloaded"} bootstrapping_error = { "result": "is_bootstrapping", "message": "isbootstrapping" } truncate_error = { "result": "truncate_error", "message": "truncate_error" } server_error = {"result": "server_error", "message": "server_error"} # Test the on_request_error call retry_policy = CounterRetryPolicy() self.set_cluster(retry_policy) for e in [ overloaded_error, bootstrapping_error, truncate_error, server_error ]: query_to_prime = "SELECT * from simulacron_keyspace.simulacron_table;" prime_query(query_to_prime, then=e, rows=None, column_types=None) rf = self.session.execute_async(query_to_prime) try: rf.result() except: pass self.assertEqual(len(rf.attempted_hosts), 1) # no retry self.assertEqual(next(retry_policy.request_error), 4) # Test that by default, retry on next host retry_policy = RetryPolicy() self.set_cluster(retry_policy) for e in [ overloaded_error, bootstrapping_error, truncate_error, server_error ]: query_to_prime = "SELECT * from simulacron_keyspace.simulacron_table;" prime_query(query_to_prime, then=e, rows=None, column_types=None) rf = self.session.execute_async(query_to_prime) try: rf.result() except: pass self.assertEqual(len(rf.attempted_hosts), 3) # all 3 nodes failed
def test_speculative_execution(self): """ Test to ensure that speculative execution honors LBP, and that they retry appropriately. This test will use various LBP, and ConstantSpeculativeExecutionPolicy settings and ensure the proper number of hosts are queried @since 3.7.0 @jira_ticket PYTHON-218 @expected_result speculative retries should honor max retries, idempotent state of queries, and underlying lbp. @test_category metadata """ query_to_prime = "INSERT INTO test3rf.test (k, v) VALUES (0, 1);" prime_query(query_to_prime, then={"delay_in_ms": 10000}) statement = SimpleStatement(query_to_prime, is_idempotent=True) statement_non_idem = SimpleStatement(query_to_prime, is_idempotent=False) # This LBP should repeat hosts up to around 30 result = self.session.execute(statement, execution_profile='spec_ep_brr') self.assertEqual(7, len(result.response_future.attempted_hosts)) # This LBP should keep host list to 3 result = self.session.execute(statement, execution_profile='spec_ep_rr') self.assertEqual(3, len(result.response_future.attempted_hosts)) # Spec_execution policy should limit retries to 1 result = self.session.execute(statement, execution_profile='spec_ep_rr_lim') self.assertEqual(2, len(result.response_future.attempted_hosts)) # Spec_execution policy should not be used if the query is not idempotent result = self.session.execute(statement_non_idem, execution_profile='spec_ep_brr') self.assertEqual(1, len(result.response_future.attempted_hosts)) # Default policy with non_idem query result = self.session.execute(statement_non_idem, timeout=12) self.assertEqual(1, len(result.response_future.attempted_hosts)) # Should be able to run an idempotent query against default execution policy with no speculative_execution_policy result = self.session.execute(statement, timeout=12) self.assertEqual(1, len(result.response_future.attempted_hosts)) # Test timeout with spec_ex with self.assertRaises(OperationTimedOut): self.session.execute(statement, execution_profile='spec_ep_rr', timeout=.5) prepared_query_to_prime = "SELECT * FROM test3rf.test where k = ?" when = {"params": {"k": "0"}, "param_types": {"k": "ascii"}} prime_query(prepared_query_to_prime, when=when, then={"delay_in_ms": 4000}) # PYTHON-736 Test speculation policy works with a prepared statement prepared_statement = self.session.prepare(prepared_query_to_prime) # non-idempotent result = self.session.execute(prepared_statement, ("0",), execution_profile='spec_ep_brr') self.assertEqual(1, len(result.response_future.attempted_hosts)) # idempotent prepared_statement.is_idempotent = True result = self.session.execute(prepared_statement, ("0",), execution_profile='spec_ep_brr') self.assertLess(1, len(result.response_future.attempted_hosts))
def test_retry_policy_with_prepared(self): """ Test to verify that the retry policy is called as expected for bound and prepared statements when set at the cluster level @since 3.13 @jira_ticket PYTHON-861 @expected_result the appropriate retry policy is called @test_category connection """ counter_policy = CounterRetryPolicy() self.set_cluster(counter_policy) query_to_prime = "SELECT * from simulacron_keyspace.simulacron_table" then = { "result": "write_timeout", "delay_in_ms": 0, "consistency_level": "LOCAL_QUORUM", "received": 1, "block_for": 2, "write_type": "SIMPLE", "ignore_on_prepare": True } prime_query(query_to_prime, then=then, rows=None, column_types=None) self.session.execute(query_to_prime) self.assertEqual(next(counter_policy.write_timeout), 1) counter_policy.reset_counters() query_to_prime_prepared = "SELECT * from simulacron_keyspace.simulacron_table WHERE key = :key" when = {"params": {"key": "0"}, "param_types": {"key": "ascii"}} prime_query(query_to_prime_prepared, when=when, then=then, rows=None, column_types=None) prepared_stmt = self.session.prepare(query_to_prime_prepared) bound_stm = prepared_stmt.bind({"key": "0"}) self.session.execute(bound_stm) self.assertEqual(next(counter_policy.write_timeout), 1) counter_policy.reset_counters() self.session.execute(prepared_stmt, ("0", )) self.assertEqual(next(counter_policy.write_timeout), 1)
def test_delay_can_be_0(self): """ Test to validate that the delay can be zero for the ConstantSpeculativeExecutionPolicy @since 3.13 @jira_ticket PYTHON-836 @expected_result all the queries are executed immediately @test_category policy """ query_to_prime = "INSERT INTO madeup_keyspace.madeup_table(k, v) VALUES (1, 2)" prime_query(query_to_prime, then={"delay_in_ms": 5000}) number_of_requests = 4 spec = ExecutionProfile( load_balancing_policy=RoundRobinPolicy(), speculative_execution_policy=ConstantSpeculativeExecutionPolicy( 0, number_of_requests)) cluster = Cluster() cluster.add_execution_profile("spec", spec) session = cluster.connect(wait_for_all_pools=True) self.addCleanup(cluster.shutdown) counter = count() def patch_and_count(f): def patched(*args, **kwargs): next(counter) print("patched") f(*args, **kwargs) return patched self.addCleanup(setattr, ResponseFuture, "send_request", ResponseFuture.send_request) ResponseFuture.send_request = patch_and_count( ResponseFuture.send_request) stmt = SimpleStatement(query_to_prime) stmt.is_idempotent = True results = session.execute(stmt, execution_profile="spec") self.assertEqual(len(results.response_future.attempted_hosts), 3) # send_request is called number_of_requests times for the speculative request # plus one for the call from the main thread. self.assertEqual(next(counter), number_of_requests + 1)
def test_retry_policy_with_prepared(self): """ Test to verify that the retry policy is called as expected for bound and prepared statements when set at the cluster level @since 3.13 @jira_ticket PYTHON-861 @expected_result the appropriate retry policy is called @test_category connection """ counter_policy = CounterRetryPolicy() self.set_cluster(counter_policy) query_to_prime = "SELECT * from simulacron_keyspace.simulacron_table" then = { "result": "write_timeout", "delay_in_ms": 0, "consistency_level": "LOCAL_QUORUM", "received": 1, "block_for": 2, "write_type": "SIMPLE", "ignore_on_prepare": True } prime_query(query_to_prime, then=then, rows=None, column_types=None) self.session.execute(query_to_prime) self.assertEqual(next(counter_policy.write_timeout), 1) counter_policy.reset_counters() query_to_prime_prepared = "SELECT * from simulacron_keyspace.simulacron_table WHERE key = :key" when = {"params": {"key": "0"}, "param_types": {"key": "ascii"}} prime_query(query_to_prime_prepared, when=when, then=then, rows=None, column_types=None) prepared_stmt = self.session.prepare(query_to_prime_prepared) bound_stm = prepared_stmt.bind({"key": "0"}) self.session.execute(bound_stm) self.assertEqual(next(counter_policy.write_timeout), 1) counter_policy.reset_counters() self.session.execute(prepared_stmt, ("0",)) self.assertEqual(next(counter_policy.write_timeout), 1)
def test_duplicate(self): mock_handler = MockLoggingHandler() logger = logging.getLogger(cassandra.cluster.__name__) logger.addHandler(mock_handler) address_column = "native_transport_address" if DSE_VERSION and DSE_VERSION > Version("6.0") else "rpc_address" rows = [ {"peer": "127.0.0.1", "data_center": "dc", "host_id": "dontcare1", "rack": "rack1", "release_version": "3.11.4", address_column: "127.0.0.1", "schema_version": "dontcare", "tokens": "1"}, {"peer": "127.0.0.2", "data_center": "dc", "host_id": "dontcare2", "rack": "rack1", "release_version": "3.11.4", address_column: "127.0.0.2", "schema_version": "dontcare", "tokens": "2"}, ] prime_query(ControlConnection._SELECT_PEERS, rows=rows) cluster = Cluster(protocol_version=PROTOCOL_VERSION, compression=False) session = cluster.connect(wait_for_all_pools=True) warnings = mock_handler.messages.get("warning") self.assertEqual(len(warnings), 1) self.assertTrue('multiple hosts with the same endpoint' in warnings[0]) logger.removeHandler(mock_handler) cluster.shutdown()
def test_delay_can_be_0(self): """ Test to validate that the delay can be zero for the ConstantSpeculativeExecutionPolicy @since 3.13 @jira_ticket PYTHON-836 @expected_result all the queries are executed immediately @test_category policy """ query_to_prime = "INSERT INTO madeup_keyspace.madeup_table(k, v) VALUES (1, 2)" prime_query(query_to_prime, then={"delay_in_ms": 5000}) number_of_requests = 4 spec = ExecutionProfile(load_balancing_policy=RoundRobinPolicy(), speculative_execution_policy=ConstantSpeculativeExecutionPolicy(0, number_of_requests)) cluster = Cluster() cluster.add_execution_profile("spec", spec) session = cluster.connect(wait_for_all_pools=True) self.addCleanup(cluster.shutdown) counter = count() def patch_and_count(f): def patched(*args, **kwargs): next(counter) print("patched") f(*args, **kwargs) return patched self.addCleanup(setattr, ResponseFuture, "send_request", ResponseFuture.send_request) ResponseFuture.send_request = patch_and_count(ResponseFuture.send_request) stmt = SimpleStatement(query_to_prime) stmt.is_idempotent = True results = session.execute(stmt, execution_profile="spec") self.assertEqual(len(results.response_future.attempted_hosts), 3) # send_request is called number_of_requests times for the speculative request # plus one for the call from the main thread. self.assertEqual(next(counter), number_of_requests + 1)
def test_retry_after_defunct(self): """ We test cluster._retry is called if an the connection is defunct in the middle of a query Finally we verify the driver recovers correctly in the event of a network partition @since 3.12 @expected_result the driver is able to query even if a host is marked as down in the middle of the query, it will go to the next one if the timeout hasn't expired @test_category connection """ number_of_dcs = 3 nodes_per_dc = 2 query_to_prime = "INSERT INTO test3rf.test (k, v) VALUES (0, 1);" idle_heartbeat_timeout = 1 idle_heartbeat_interval = 5 simulacron_cluster = start_and_prime_cluster_defaults( number_of_dcs, nodes_per_dc) dc_ids = sorted(simulacron_cluster.data_center_ids) last_host = dc_ids.pop() prime_query(query_to_prime, cluster_name="{}/{}".format( simulacron_cluster.cluster_name, last_host)) roundrobin_lbp = OrderedRoundRobinPolicy() cluster = Cluster( compression=False, idle_heartbeat_interval=idle_heartbeat_interval, idle_heartbeat_timeout=idle_heartbeat_timeout, execution_profiles={ EXEC_PROFILE_DEFAULT: ExecutionProfile(load_balancing_policy=roundrobin_lbp) }) session = cluster.connect(wait_for_all_pools=True) self.addCleanup(cluster.shutdown) # This simulates we only have access to one DC for dc_id in dc_ids: datacenter_path = "{}/{}".format(simulacron_cluster.cluster_name, dc_id) prime_query(query_to_prime, then=NO_THEN, cluster_name=datacenter_path) prime_request( PrimeOptions(then=NO_THEN, cluster_name=datacenter_path)) # Only the last datacenter will respond, therefore the first host won't # We want to make sure the returned hosts are 127.0.0.1, 127.0.0.2, ... 127.0.0.8 roundrobin_lbp._position = 0 # After 3 + 1 seconds the connection should be marked and down and another host retried response_future = session.execute_async( query_to_prime, timeout=4 * idle_heartbeat_interval + idle_heartbeat_timeout) response_future.result() self.assertGreater(len(response_future.attempted_hosts), 1) # No error should be raised here since the hosts have been marked # as down and there's still 1 DC available for _ in range(10): session.execute(query_to_prime) # Might take some time to close the previous connections and reconnect time.sleep(10) assert_quiescent_pool_state(self, cluster) clear_queries() time.sleep(10) assert_quiescent_pool_state(self, cluster)
def test_retry_after_defunct(self): """ We test cluster._retry is called if an the connection is defunct in the middle of a query Finally we verify the driver recovers correctly in the event of a network partition @since 3.12 @expected_result the driver is able to query even if a host is marked as down in the middle of the query, it will go to the next one if the timeout hasn't expired @test_category connection """ number_of_dcs = 3 nodes_per_dc = 2 query_to_prime = "INSERT INTO test3rf.test (k, v) VALUES (0, 1);" idle_heartbeat_timeout = 1 idle_heartbeat_interval = 5 simulacron_cluster = start_and_prime_cluster_defaults(number_of_dcs, nodes_per_dc) dc_ids = sorted(simulacron_cluster.data_center_ids) last_host = dc_ids.pop() prime_query(query_to_prime, cluster_name="{}/{}".format(simulacron_cluster.cluster_name, last_host)) roundrobin_lbp = OrderedRoundRobinPolicy() cluster = Cluster(compression=False, idle_heartbeat_interval=idle_heartbeat_interval, idle_heartbeat_timeout=idle_heartbeat_timeout, execution_profiles={ EXEC_PROFILE_DEFAULT: ExecutionProfile(load_balancing_policy=roundrobin_lbp)}) session = cluster.connect(wait_for_all_pools=True) self.addCleanup(cluster.shutdown) # This simulates we only have access to one DC for dc_id in dc_ids: datacenter_path = "{}/{}".format(simulacron_cluster.cluster_name, dc_id) prime_query(query_to_prime, then=NO_THEN, cluster_name=datacenter_path) prime_request(PrimeOptions(then=NO_THEN, cluster_name=datacenter_path)) # Only the last datacenter will respond, therefore the first host won't # We want to make sure the returned hosts are 127.0.0.1, 127.0.0.2, ... 127.0.0.8 roundrobin_lbp._position = 0 # After 3 + 1 seconds the connection should be marked and down and another host retried response_future = session.execute_async(query_to_prime, timeout=4 * idle_heartbeat_interval + idle_heartbeat_timeout) response_future.result() self.assertGreater(len(response_future.attempted_hosts), 1) # No error should be raised here since the hosts have been marked # as down and there's still 1 DC available for _ in range(10): session.execute(query_to_prime) # Might take some time to close the previous connections and reconnect time.sleep(10) assert_quiescent_pool_state(self, cluster) clear_queries() time.sleep(10) assert_quiescent_pool_state(self, cluster)
def test_heartbeat_defunct_deadlock(self): """ Ensure that there is no deadlock when request is in-flight and heartbeat defuncts connection @since 3.16 @jira_ticket PYTHON-1044 @expected_result an OperationTimeout is raised and no deadlock occurs @test_category connection """ start_and_prime_singledc() # This is all about timing. We will need the QUERY response future to time out and the heartbeat to defunct # at the same moment. The latter will schedule a QUERY retry to another node in case the pool is not # already shut down. If and only if the response future timeout falls in between the retry scheduling and # its execution the deadlock occurs. The odds are low, so we need to help fate a bit: # 1) Make one heartbeat messages be sent to every node # 2) Our QUERY goes always to the same host # 3) This host needs to defunct first # 4) Open a small time window for the response future timeout, i.e. block executor threads for retry # execution and last connection to defunct query_to_prime = "SELECT * from testkesypace.testtable" query_host = "127.0.0.2" heartbeat_interval = 1 heartbeat_timeout = 1 lag = 0.05 never = 9999 class PatchedRoundRobinPolicy(RoundRobinPolicy): # Send always to same host def make_query_plan(self, working_keyspace=None, query=None): if query and query.query_string == query_to_prime: return filter(lambda h: h == query_host, self._live_hosts) else: return super(PatchedRoundRobinPolicy, self).make_query_plan() class PatchedCluster(Cluster): # Make sure that QUERY connection will timeout first def get_connection_holders(self): holders = super(PatchedCluster, self).get_connection_holders() return sorted( holders, reverse=True, key=lambda v: int(v._connection.host == query_host)) # Block executor thread like closing a dead socket could do def connection_factory(self, *args, **kwargs): conn = super(PatchedCluster, self).connection_factory(*args, **kwargs) conn.defunct = late(seconds=2 * lag)(conn.defunct) return conn cluster = PatchedCluster( protocol_version=PROTOCOL_VERSION, compression=False, idle_heartbeat_interval=heartbeat_interval, idle_heartbeat_timeout=heartbeat_timeout, load_balancing_policy=PatchedRoundRobinPolicy()) session = cluster.connect() self.addCleanup(cluster.shutdown) prime_query(query_to_prime, then={"delay_in_ms": never}) # Make heartbeat due time.sleep(heartbeat_interval) future = session.execute_async(query_to_prime, timeout=heartbeat_interval + heartbeat_timeout + 3 * lag) # Delay thread execution like kernel could do future._retry_task = late(seconds=4 * lag)(future._retry_task) prime_request( PrimeOptions(then={ "result": "no_result", "delay_in_ms": never })) prime_request(RejectConnections("unbind")) self.assertRaisesRegexp(OperationTimedOut, "Connection defunct by heartbeat", future.result)
def test_speculative_execution(self): """ Test to ensure that speculative execution honors LBP, and that they retry appropriately. This test will use various LBP, and ConstantSpeculativeExecutionPolicy settings and ensure the proper number of hosts are queried @since 3.7.0 @jira_ticket PYTHON-218 @expected_result speculative retries should honor max retries, idempotent state of queries, and underlying lbp. @test_category metadata """ query_to_prime = "INSERT INTO test3rf.test (k, v) VALUES (0, 1);" prime_query(query_to_prime, then={"delay_in_ms": 10000}) statement = SimpleStatement(query_to_prime, is_idempotent=True) statement_non_idem = SimpleStatement(query_to_prime, is_idempotent=False) # This LBP should repeat hosts up to around 30 result = self.session.execute(statement, execution_profile='spec_ep_brr') self.assertEqual(7, len(result.response_future.attempted_hosts)) # This LBP should keep host list to 3 result = self.session.execute(statement, execution_profile='spec_ep_rr') self.assertEqual(3, len(result.response_future.attempted_hosts)) # Spec_execution policy should limit retries to 1 result = self.session.execute(statement, execution_profile='spec_ep_rr_lim') self.assertEqual(2, len(result.response_future.attempted_hosts)) # Spec_execution policy should not be used if the query is not idempotent result = self.session.execute(statement_non_idem, execution_profile='spec_ep_brr') self.assertEqual(1, len(result.response_future.attempted_hosts)) # Default policy with non_idem query result = self.session.execute(statement_non_idem, timeout=12) self.assertEqual(1, len(result.response_future.attempted_hosts)) # Should be able to run an idempotent query against default execution policy with no speculative_execution_policy result = self.session.execute(statement, timeout=12) self.assertEqual(1, len(result.response_future.attempted_hosts)) # Test timeout with spec_ex with self.assertRaises(OperationTimedOut): self.session.execute(statement, execution_profile='spec_ep_rr', timeout=.5) prepared_query_to_prime = "SELECT * FROM test3rf.test where k = ?" when = {"params": {"k": "0"}, "param_types": {"k": "ascii"}} prime_query(prepared_query_to_prime, when=when, then={"delay_in_ms": 4000}) # PYTHON-736 Test speculation policy works with a prepared statement prepared_statement = self.session.prepare(prepared_query_to_prime) # non-idempotent result = self.session.execute(prepared_statement, ("0", ), execution_profile='spec_ep_brr') self.assertEqual(1, len(result.response_future.attempted_hosts)) # idempotent prepared_statement.is_idempotent = True result = self.session.execute(prepared_statement, ("0", ), execution_profile='spec_ep_brr') self.assertLess(1, len(result.response_future.attempted_hosts))
def test_heart_beat_timeout(self): """ Test to ensure the hosts are marked as down after a OTO is received. Also to ensure this happens within the expected timeout @since 3.10 @jira_ticket PYTHON-762 @expected_result all the hosts have been marked as down at some point @test_category metadata """ number_of_dcs = 3 nodes_per_dc = 20 query_to_prime = "INSERT INTO test3rf.test (k, v) VALUES (0, 1);" idle_heartbeat_timeout = 5 idle_heartbeat_interval = 1 start_and_prime_cluster_defaults(number_of_dcs, nodes_per_dc) listener = TrackDownListener() executor = ThreadTracker(max_workers=8) # We need to disable compression since it's not supported in simulacron cluster = Cluster( compression=False, idle_heartbeat_interval=idle_heartbeat_interval, idle_heartbeat_timeout=idle_heartbeat_timeout, executor_threads=8, execution_profiles={ EXEC_PROFILE_DEFAULT: ExecutionProfile(load_balancing_policy=RoundRobinPolicy()) }) self.addCleanup(cluster.shutdown) cluster.scheduler.shutdown() cluster.executor = executor cluster.scheduler = _Scheduler(executor) session = cluster.connect(wait_for_all_pools=True) cluster.register_listener(listener) log = logging.getLogger() log.setLevel('CRITICAL') self.addCleanup(log.setLevel, "DEBUG") prime_query(query_to_prime, then=NO_THEN) futures = [] for _ in range(number_of_dcs * nodes_per_dc): future = session.execute_async(query_to_prime) futures.append(future) for f in futures: f._event.wait() self.assertIsInstance(f._final_exception, OperationTimedOut) prime_request(PrimeOptions(then=NO_THEN)) # We allow from some extra time for all the hosts to be to on_down # The callbacks should start happening after idle_heartbeat_timeout + idle_heartbeat_interval time.sleep((idle_heartbeat_timeout + idle_heartbeat_interval) * 2.5) for host in cluster.metadata.all_hosts(): self.assertIn(host, listener.hosts_marked_down) # In this case HostConnection._replace shouldn't be called self.assertNotIn("_replace", executor.called_functions)
def test_heart_beat_timeout(self): """ Test to ensure the hosts are marked as down after a OTO is received. Also to ensure this happens within the expected timeout @since 3.10 @jira_ticket PYTHON-762 @expected_result all the hosts have been marked as down at some point @test_category metadata """ number_of_dcs = 3 nodes_per_dc = 20 query_to_prime = "INSERT INTO test3rf.test (k, v) VALUES (0, 1);" idle_heartbeat_timeout = 5 idle_heartbeat_interval = 1 start_and_prime_cluster_defaults(number_of_dcs, nodes_per_dc) listener = TrackDownListener() executor = ThreadTracker(max_workers=8) # We need to disable compression since it's not supported in simulacron cluster = Cluster(compression=False, idle_heartbeat_interval=idle_heartbeat_interval, idle_heartbeat_timeout=idle_heartbeat_timeout, executor_threads=8, execution_profiles={ EXEC_PROFILE_DEFAULT: ExecutionProfile(load_balancing_policy=RoundRobinPolicy())}) self.addCleanup(cluster.shutdown) cluster.scheduler.shutdown() cluster.executor = executor cluster.scheduler = _Scheduler(executor) session = cluster.connect(wait_for_all_pools=True) cluster.register_listener(listener) log = logging.getLogger() log.setLevel('CRITICAL') self.addCleanup(log.setLevel, "DEBUG") prime_query(query_to_prime, then=NO_THEN) futures = [] for _ in range(number_of_dcs * nodes_per_dc): future = session.execute_async(query_to_prime) futures.append(future) for f in futures: f._event.wait() self.assertIsInstance(f._final_exception, OperationTimedOut) prime_request(PrimeOptions(then=NO_THEN)) # We allow from some extra time for all the hosts to be to on_down # The callbacks should start happening after idle_heartbeat_timeout + idle_heartbeat_interval time.sleep((idle_heartbeat_timeout + idle_heartbeat_interval) * 2.5) for host in cluster.metadata.all_hosts(): self.assertIn(host, listener.hosts_marked_down) # In this case HostConnection._replace shouldn't be called self.assertNotIn("_replace", executor.called_functions)