def test_driver_recovers_nework_isolation(self):
        start_and_prime_singledc()

        idle_heartbeat_timeout = 3
        idle_heartbeat_interval = 1

        listener = TrackDownListener()

        cluster = Cluster(['127.0.0.1'],
                          load_balancing_policy=RoundRobinPolicy(),
                          idle_heartbeat_timeout=idle_heartbeat_timeout,
                          idle_heartbeat_interval=idle_heartbeat_interval,
                          executor_threads=16)
        session = cluster.connect(wait_for_all_pools=True)

        cluster.register_listener(listener)

        prime_request(PrimeOptions(then=NO_THEN))
        prime_request(RejectConnections(RejectType.REJECT_STARTUP))

        time.sleep((idle_heartbeat_timeout + idle_heartbeat_interval) * 2)

        for host in cluster.metadata.all_hosts():
            self.assertIn(host, listener.hosts_marked_down)

        self.assertRaises(NoHostAvailable, session.execute, "SELECT * from system.local")

        clear_queries()
        prime_request(AcceptConnections())

        time.sleep(idle_heartbeat_timeout + idle_heartbeat_interval + 2)

        self.assertIsNotNone(session.execute("SELECT * from system.local"))
    def test_driver_recovers_nework_isolation(self):
        start_and_prime_singledc()

        idle_heartbeat_timeout = 3
        idle_heartbeat_interval = 1

        listener = TrackDownListener()

        cluster = Cluster(['127.0.0.1'],
                          load_balancing_policy=RoundRobinPolicy(),
                          idle_heartbeat_timeout=idle_heartbeat_timeout,
                          idle_heartbeat_interval=idle_heartbeat_interval,
                          executor_threads=16)
        session = cluster.connect(wait_for_all_pools=True)

        cluster.register_listener(listener)

        prime_request(PrimeOptions(then=NO_THEN))
        prime_request(RejectConnections(RejectType.REJECT_STARTUP))

        time.sleep((idle_heartbeat_timeout + idle_heartbeat_interval) * 2)

        for host in cluster.metadata.all_hosts():
            self.assertIn(host, listener.hosts_marked_down)

        self.assertRaises(NoHostAvailable, session.execute,
                          "SELECT * from system.local")

        clear_queries()
        prime_request(AcceptConnections())

        time.sleep(idle_heartbeat_timeout + idle_heartbeat_interval + 2)

        self.assertIsNotNone(session.execute("SELECT * from system.local"))
    def test_retry_after_defunct(self):
        """
        We test cluster._retry is called if an the connection is defunct
        in the middle of a query

        Finally we verify the driver recovers correctly in the event
        of a network partition

        @since 3.12
        @expected_result the driver is able to query even if a host is marked
        as down in the middle of the query, it will go to the next one if the timeout
        hasn't expired

        @test_category connection
        """
        number_of_dcs = 3
        nodes_per_dc = 2

        query_to_prime = "INSERT INTO test3rf.test (k, v) VALUES (0, 1);"

        idle_heartbeat_timeout = 1
        idle_heartbeat_interval = 5

        simulacron_cluster = start_and_prime_cluster_defaults(
            number_of_dcs, nodes_per_dc)

        dc_ids = sorted(simulacron_cluster.data_center_ids)
        last_host = dc_ids.pop()
        prime_query(query_to_prime,
                    cluster_name="{}/{}".format(
                        simulacron_cluster.cluster_name, last_host))

        roundrobin_lbp = OrderedRoundRobinPolicy()
        cluster = Cluster(
            compression=False,
            idle_heartbeat_interval=idle_heartbeat_interval,
            idle_heartbeat_timeout=idle_heartbeat_timeout,
            execution_profiles={
                EXEC_PROFILE_DEFAULT:
                ExecutionProfile(load_balancing_policy=roundrobin_lbp)
            })

        session = cluster.connect(wait_for_all_pools=True)
        self.addCleanup(cluster.shutdown)

        # This simulates we only have access to one DC
        for dc_id in dc_ids:
            datacenter_path = "{}/{}".format(simulacron_cluster.cluster_name,
                                             dc_id)
            prime_query(query_to_prime,
                        then=NO_THEN,
                        cluster_name=datacenter_path)
            prime_request(
                PrimeOptions(then=NO_THEN, cluster_name=datacenter_path))

        # Only the last datacenter will respond, therefore the first host won't
        # We want to make sure the returned hosts are 127.0.0.1,  127.0.0.2, ... 127.0.0.8
        roundrobin_lbp._position = 0

        # After 3 + 1 seconds the connection should be marked and down and another host retried
        response_future = session.execute_async(
            query_to_prime,
            timeout=4 * idle_heartbeat_interval + idle_heartbeat_timeout)
        response_future.result()
        self.assertGreater(len(response_future.attempted_hosts), 1)

        # No error should be raised here since the hosts have been marked
        # as down and there's still 1 DC available
        for _ in range(10):
            session.execute(query_to_prime)

        # Might take some time to close the previous connections and reconnect
        time.sleep(10)
        assert_quiescent_pool_state(self, cluster)
        clear_queries()

        time.sleep(10)
        assert_quiescent_pool_state(self, cluster)
Exemple #4
0
 def tearDown(self):
     clear_queries()
Exemple #5
0
 def tearDown(self):
     clear_queries()
     stop_simulacron()
    def test_retry_after_defunct(self):
        """
        We test cluster._retry is called if an the connection is defunct
        in the middle of a query

        Finally we verify the driver recovers correctly in the event
        of a network partition

        @since 3.12
        @expected_result the driver is able to query even if a host is marked
        as down in the middle of the query, it will go to the next one if the timeout
        hasn't expired

        @test_category connection
        """
        number_of_dcs = 3
        nodes_per_dc = 2

        query_to_prime = "INSERT INTO test3rf.test (k, v) VALUES (0, 1);"

        idle_heartbeat_timeout = 1
        idle_heartbeat_interval = 5

        simulacron_cluster = start_and_prime_cluster_defaults(number_of_dcs, nodes_per_dc)

        dc_ids = sorted(simulacron_cluster.data_center_ids)
        last_host = dc_ids.pop()
        prime_query(query_to_prime,
                    cluster_name="{}/{}".format(simulacron_cluster.cluster_name, last_host))

        roundrobin_lbp = OrderedRoundRobinPolicy()
        cluster = Cluster(compression=False,
                          idle_heartbeat_interval=idle_heartbeat_interval,
                          idle_heartbeat_timeout=idle_heartbeat_timeout,
                          execution_profiles={
                              EXEC_PROFILE_DEFAULT: ExecutionProfile(load_balancing_policy=roundrobin_lbp)})

        session = cluster.connect(wait_for_all_pools=True)
        self.addCleanup(cluster.shutdown)

        # This simulates we only have access to one DC
        for dc_id in dc_ids:
            datacenter_path = "{}/{}".format(simulacron_cluster.cluster_name, dc_id)
            prime_query(query_to_prime, then=NO_THEN, cluster_name=datacenter_path)
            prime_request(PrimeOptions(then=NO_THEN, cluster_name=datacenter_path))

        # Only the last datacenter will respond, therefore the first host won't
        # We want to make sure the returned hosts are 127.0.0.1,  127.0.0.2, ... 127.0.0.8
        roundrobin_lbp._position = 0

        # After 3 + 1 seconds the connection should be marked and down and another host retried
        response_future = session.execute_async(query_to_prime, timeout=4 * idle_heartbeat_interval
                                                                        + idle_heartbeat_timeout)
        response_future.result()
        self.assertGreater(len(response_future.attempted_hosts), 1)

        # No error should be raised here since the hosts have been marked
        # as down and there's still 1 DC available
        for _ in range(10):
            session.execute(query_to_prime)

        # Might take some time to close the previous connections and reconnect
        time.sleep(10)
        assert_quiescent_pool_state(self, cluster)
        clear_queries()

        time.sleep(10)
        assert_quiescent_pool_state(self, cluster)
Exemple #7
0
 def tearDown(self):
     clear_queries()
     stop_simulacron()
 def tearDown(self):
     clear_queries()
Exemple #9
0
    def test_retry_policy_on_request_error(self):
        """
        Test to verify that on_request_error is called properly.

        @since 3.18
        @jira_ticket PYTHON-1064
        @expected_result the appropriate retry policy is called

        @test_category connection
        """
        overloaded_error = {"result": "overloaded", "message": "overloaded"}

        bootstrapping_error = {
            "result": "is_bootstrapping",
            "message": "isbootstrapping"
        }

        truncate_error = {
            "result": "truncate_error",
            "message": "truncate_error"
        }

        server_error = {"result": "server_error", "message": "server_error"}

        # Test the on_request_error call
        retry_policy = CounterRetryPolicy()
        self.set_cluster(retry_policy)

        for prime_error, exc in [(overloaded_error, OverloadedErrorMessage),
                                 (bootstrapping_error,
                                  IsBootstrappingErrorMessage),
                                 (truncate_error, TruncateError),
                                 (server_error, ServerError)]:

            clear_queries()
            query_to_prime = "SELECT * from simulacron_keyspace.simulacron_table;"
            prime_query(query_to_prime,
                        then=prime_error,
                        rows=None,
                        column_types=None)
            rf = self.session.execute_async(query_to_prime)

            with self.assertRaises(exc):
                rf.result()

            self.assertEqual(len(rf.attempted_hosts), 1)  # no retry

        self.assertEqual(next(retry_policy.request_error), 4)

        # Test that by default, retry on next host
        retry_policy = RetryPolicy()
        self.set_cluster(retry_policy)

        for e in [
                overloaded_error, bootstrapping_error, truncate_error,
                server_error
        ]:
            clear_queries()
            query_to_prime = "SELECT * from simulacron_keyspace.simulacron_table;"
            prime_query(query_to_prime, then=e, rows=None, column_types=None)
            rf = self.session.execute_async(query_to_prime)

            with self.assertRaises(NoHostAvailable):
                rf.result()

            self.assertEqual(len(rf.attempted_hosts), 3)  # all 3 nodes failed