def simultaneous_bootstrap_test(self):
        """
        Attempt to bootstrap two nodes at once, to assert the second bootstrapped node fails, and does not interfere.

        Start a one node cluster and run a stress write workload.
        Start up a second node, and wait for the first node to detect it has joined the cluster.
        While the second node is bootstrapping, start a third node. This should fail.

        @jira_ticket CASSANDRA-7069
        @jira_ticket CASSANDRA-9484
        """

        bootstrap_error = (
            "Other bootstrapping/leaving/moving nodes detected,"
            " cannot bootstrap while cassandra.consistent.rangemovement is true"
        )

        self.ignore_log_patterns.append(bootstrap_error)

        cluster = self.cluster
        cluster.populate(1)
        cluster.start(wait_for_binary_proto=True)

        node1, = cluster.nodelist()

        node1.stress([
            'write', 'n=500K', '-schema', 'replication(factor=1)', '-rate',
            'threads=10'
        ])

        node2 = new_node(cluster)
        node2.start(wait_other_notice=True)

        node3 = new_node(cluster, remote_debug_port='2003')
        process = node3.start(wait_other_notice=False)
        stdout, stderr = process.communicate()
        self.assertIn(bootstrap_error, stderr, msg=stderr)
        time.sleep(.5)
        self.assertFalse(node3.is_running(),
                         msg="Two nodes bootstrapped simultaneously")

        node2.watch_log_for("Starting listening for CQL clients")

        session = self.patient_exclusive_cql_connection(node2)

        # Repeat the select count(*) query, to help catch
        # bugs like 9484, where count(*) fails at higher
        # data loads.
        for _ in xrange(5):
            assert_one(session,
                       "SELECT count(*) from keyspace1.standard1", [500000],
                       cl=ConsistencyLevel.ONE)
    def decommissioned_wiped_node_can_gossip_to_single_seed_test(self):
        """
        @jira_ticket CASSANDRA-8072
        @jira_ticket CASSANDRA-8422
        Test that if we decommission a node, kill it and wipe its data, it can join a cluster with a single
        seed node.
        """
        cluster = self.cluster
        cluster.populate(1)
        cluster.start(wait_for_binary_proto=True)

        # Add a new node, bootstrap=True ensures that it is not a seed
        node2 = new_node(cluster, bootstrap=True)
        node2.start(wait_for_binary_proto=True, wait_other_notice=True)

        # Decommision the new node and kill it
        debug("Decommissioning & stopping node2")
        node2.decommission()
        node2.stop(wait_other_notice=False)

        # Wipe its data
        for data_dir in node2.data_directories():
            debug("Deleting {}".format(data_dir))
            shutil.rmtree(data_dir)

        commitlog_dir = os.path.join(node2.get_path(), 'commitlogs')
        debug("Deleting {}".format(commitlog_dir))
        shutil.rmtree(commitlog_dir)

        # Now start it, it should be allowed to join
        mark = node2.mark_log()
        debug("Restarting wiped node2")
        node2.start(wait_other_notice=False)
        node2.watch_log_for("JOINING:", from_mark=mark)
    def disk_balance_bootstrap_test(self):
        cluster = self.cluster
        # apparently we have legitimate errors in the log when bootstrapping (see bootstrap_test.py)
        self.allow_log_errors = True
        cluster.set_configuration_options(
            values={'allocate_tokens_for_keyspace': 'keyspace1'})
        cluster.populate(4).start(wait_for_binary_proto=True)
        node1 = cluster.nodes['node1']

        node1.stress([
            'write', 'n=10k', '-rate', 'threads=100', '-schema',
            'replication(factor=3)'
        ])
        cluster.flush()
        node5 = new_node(cluster)
        node5.start(wait_for_binary_proto=True)
        self.assert_balanced(node5)

        cluster.cleanup()

        self.assert_balanced(node5)

        if DISABLE_VNODES:
            for node in cluster.nodelist():
                node.nodetool('relocatesstables')
            self.assertTrue(
                len(
                    node5.grep_log(
                        "No sstables to RELOCATE for keyspace1.standard1")) > 0
            )

        for node in cluster.nodelist():
            self.assert_balanced(node)
    def simple_bootstrap_test(self):
        cluster = self.cluster
        tokens = cluster.balanced_tokens(2)
        cluster.set_configuration_options(values={'num_tokens': 1})

        debug("[node1, node2] tokens: %r" % (tokens, ))

        keys = 10000

        # Create a single node cluster
        cluster.populate(1)
        node1 = cluster.nodelist()[0]
        node1.set_configuration_options(values={'initial_token': tokens[0]})
        cluster.start(wait_other_notice=True)

        session = self.patient_cql_connection(node1)
        self.create_ks(session, 'ks', 1)
        self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})

        # record the size before inserting any of our own data
        empty_size = node1.data_size()
        debug("node1 empty size : %s" % float(empty_size))

        insert_statement = session.prepare(
            "INSERT INTO ks.cf (key, c1, c2) VALUES (?, 'value1', 'value2')")
        execute_concurrent_with_args(session, insert_statement,
                                     [['k%d' % k] for k in range(keys)])

        node1.flush()
        node1.compact()
        initial_size = node1.data_size()
        debug("node1 size before bootstrapping node2: %s" %
              float(initial_size))

        # Reads inserted data all during the bootstrap process. We shouldn't
        # get any error
        reader = self.go(lambda _: query_c1c2(
            session, random.randint(0, keys - 1), ConsistencyLevel.ONE))

        # Bootstrapping a new node
        node2 = new_node(cluster)
        node2.set_configuration_options(values={'initial_token': tokens[1]})
        node2.start(wait_for_binary_proto=True)
        node2.compact()

        reader.check()
        node1.cleanup()
        debug("node1 size after cleanup: %s" % float(node1.data_size()))
        node1.compact()
        debug("node1 size after compacting: %s" % float(node1.data_size()))
        time.sleep(.5)
        reader.check()

        debug("node2 size after compacting: %s" % float(node2.data_size()))

        size1 = float(node1.data_size())
        size2 = float(node2.data_size())
        assert_almost_equal(size1, size2, error=0.3)
        assert_almost_equal(float(initial_size - empty_size),
                            2 * (size1 - float(empty_size)))
    def read_from_bootstrapped_node_test(self):
        """Test bootstrapped node sees existing data, eg. CASSANDRA-6648"""
        cluster = self.cluster
        cluster.populate(3)
        version = cluster.version()
        cluster.start()

        node1 = cluster.nodes['node1']
        if version < "2.1":
            node1.stress(['-n', '10000'])
        else:
            node1.stress(['write', 'n=10000', '-rate', 'threads=8'])

        session = self.patient_cql_connection(node1)
        stress_table = 'keyspace1.standard1' if self.cluster.version(
        ) >= '2.1' else '"Keyspace1"."Standard1"'
        original_rows = list(
            session.execute("SELECT * FROM %s" % (stress_table, )))

        node4 = new_node(cluster)
        node4.start(wait_for_binary_proto=True)

        session = self.patient_exclusive_cql_connection(node4)
        new_rows = list(session.execute("SELECT * FROM %s" % (stress_table, )))
        self.assertEquals(original_rows, new_rows)
    def read_from_bootstrapped_node_test(self):
        """
        Test bootstrapped node sees existing data
        @jira_ticket CASSANDRA-6648
        """
        cluster = self.cluster
        cluster.populate(3)
        cluster.start()

        node1 = cluster.nodes['node1']
        node1.stress([
            'write', 'n=10K', 'no-warmup', '-rate', 'threads=8', '-schema',
            'replication(factor=2)'
        ])

        session = self.patient_cql_connection(node1)
        stress_table = 'keyspace1.standard1'
        original_rows = list(
            session.execute("SELECT * FROM %s" % (stress_table, )))

        node4 = new_node(cluster)
        node4.start(wait_for_binary_proto=True)

        session = self.patient_exclusive_cql_connection(node4)
        new_rows = list(session.execute("SELECT * FROM %s" % (stress_table, )))
        self.assertEquals(original_rows, new_rows)
    def disk_balance_bootstrap_test(self):
        cluster = self.cluster
        # apparently we have legitimate errors in the log when bootstrapping (see bootstrap_test.py)
        self.allow_log_errors = True
        cluster.set_configuration_options(values={'allocate_tokens_for_keyspace': 'keyspace1'})
        cluster.populate(4).start(wait_for_binary_proto=True)
        node1 = cluster.nodes['node1']

        node1.stress(['write', 'n=10k', '-rate', 'threads=100', '-schema', 'replication(factor=3)'])
        cluster.flush()
        node5 = new_node(cluster)
        node5.start(wait_for_binary_proto=True)
        self.assert_balanced(node5)

        cluster.cleanup()

        self.assert_balanced(node5)

        if DISABLE_VNODES:
            for node in cluster.nodelist():
                node.nodetool('relocatesstables')
            self.assertTrue(len(node5.grep_log("No sstables to RELOCATE for keyspace1.standard1")) > 0)

        for node in cluster.nodelist():
            self.assert_balanced(node)
    def manual_bootstrap_test(self):
        """
            Test adding a new node and bootstrapping it manually. No auto_bootstrap.
            This test also verify that all data are OK after the addition of the new node.
            @jira_ticket CASSANDRA-9022
        """
        cluster = self.cluster
        cluster.populate(2).start(wait_other_notice=True)
        (node1, node2) = cluster.nodelist()

        node1.stress([
            'write', 'n=1K', '-schema', 'replication(factor=2)', '-rate',
            'threads=1', '-pop', 'dist=UNIFORM(1..1000)'
        ])

        session = self.patient_exclusive_cql_connection(node2)
        stress_table = 'keyspace1.standard1'

        original_rows = list(session.execute("SELECT * FROM %s" %
                                             stress_table))

        # Add a new node
        node3 = new_node(cluster, bootstrap=False)
        node3.start(wait_for_binary_proto=True)
        node3.repair()
        node1.cleanup()

        current_rows = list(session.execute("SELECT * FROM %s" % stress_table))
        self.assertEquals(original_rows, current_rows)
Beispiel #9
0
    def decommissioned_wiped_node_can_join_test(self):
        """
        @jira_ticket CASSANDRA-9765
        Test that if we decommission a node and then wipe its data, it can join the cluster.
        """
        cluster = self.cluster
        cluster.populate(3)
        cluster.start(wait_for_binary_proto=True)

        stress_table = 'keyspace1.standard1'

        # write some data
        node1 = cluster.nodelist()[0]
        node1.stress(['write', 'n=10K', 'no-warmup', '-rate', 'threads=8'])

        session = self.patient_cql_connection(node1)
        original_rows = list(session.execute("SELECT * FROM {}".format(stress_table,)))

        # Add a new node, bootstrap=True ensures that it is not a seed
        node4 = new_node(cluster, bootstrap=True)
        node4.start(wait_for_binary_proto=True, wait_other_notice=True)

        session = self.patient_cql_connection(node4)
        self.assertEquals(original_rows, list(session.execute("SELECT * FROM {}".format(stress_table,))))

        # Decommission the new node and wipe its data
        node4.decommission()
        node4.stop()
        self._cleanup(node4)
        # Now start it, it should be allowed to join
        mark = node4.mark_log()
        node4.start(wait_other_notice=True)
        node4.watch_log_for("JOINING:", from_mark=mark)
    def decommissioned_wiped_node_can_join_test(self):
        """
        @jira_ticket CASSANDRA-9765
        Test that if we decommission a node and then wipe its data, it can join the cluster.
        """
        cluster = self.cluster
        cluster.populate(3)
        cluster.start(wait_for_binary_proto=True)

        stress_table = 'keyspace1.standard1'

        # write some data
        node1 = cluster.nodelist()[0]
        node1.stress(['write', 'n=10K', '-rate', 'threads=8'])

        session = self.patient_cql_connection(node1)
        original_rows = list(session.execute("SELECT * FROM {}".format(stress_table,)))

        # Add a new node, bootstrap=True ensures that it is not a seed
        node4 = new_node(cluster, bootstrap=True)
        node4.start(wait_for_binary_proto=True, wait_other_notice=True)

        session = self.patient_cql_connection(node4)
        self.assertEquals(original_rows, list(session.execute("SELECT * FROM {}".format(stress_table,))))

        # Decommission the new node and wipe its data
        node4.decommission()
        node4.stop()
        self._cleanup(node4)
        # Now start it, it should be allowed to join
        mark = node4.mark_log()
        node4.start(wait_other_notice=True)
        node4.watch_log_for("JOINING:", from_mark=mark)
    def _do_upgrade(self, login_keyspace=True):
        cluster = self.cluster
        node1 = cluster.nodelist()[0]

        node1.flush()
        time.sleep(.5)
        node1.stop(wait_other_notice=True)

        node1.set_install_dir(install_dir=self.default_install_dir)
        node1.start(wait_other_notice=True, wait_for_binary_proto=True)

        if self.bootstrap:
            cluster.set_install_dir(install_dir=self.default_install_dir)
            # Add a new node, bootstrap=True ensures that it is not a seed
            node2 = new_node(cluster, bootstrap=True)
            node2.start(wait_for_binary_proto=True, jvm_args=self.jvm_args)

            temp_files = self.glob_data_dirs(os.path.join('*', "tmp", "*.dat"))
            debug("temp files: " + str(temp_files))
            self.assertEquals(0, len(temp_files), "Temporary files were not cleaned up.")

        cursor = self.patient_cql_connection(node1)
        if login_keyspace:
            cursor.execute('USE ks')
        return cursor
Beispiel #12
0
    def _wiped_node_cannot_join_test(self, gently):
        """
        @jira_ticket CASSANDRA-9765
        Test that if we stop a node and wipe its data then the node cannot join
        when it is not a seed. Test both a nice shutdown or a forced shutdown, via
        the gently parameter.
        """
        cluster = self.cluster
        cluster.populate(3)
        cluster.start(wait_for_binary_proto=True)

        stress_table = 'keyspace1.standard1'

        # write some data
        node1 = cluster.nodelist()[0]
        node1.stress(['write', 'n=10K', 'no-warmup', '-rate', 'threads=8'])

        session = self.patient_cql_connection(node1)
        original_rows = list(session.execute("SELECT * FROM {}".format(stress_table,)))

        # Add a new node, bootstrap=True ensures that it is not a seed
        node4 = new_node(cluster, bootstrap=True)
        node4.start(wait_for_binary_proto=True)

        session = self.patient_cql_connection(node4)
        self.assertEquals(original_rows, list(session.execute("SELECT * FROM {}".format(stress_table,))))

        # Stop the new node and wipe its data
        node4.stop(gently=gently)
        self._cleanup(node4)
        # Now start it, it should not be allowed to join.
        mark = node4.mark_log()
        node4.start(no_wait=True, wait_other_notice=False)
        node4.watch_log_for("A node with address /127.0.0.4 already exists, cancelling join", from_mark=mark)
    def decommissioned_wiped_node_can_gossip_to_single_seed_test(self):
        """
        @jira_ticket CASSANDRA-8072
        @jira_ticket CASSANDRA-8422
        Test that if we decommission a node, kill it and wipe its data, it can join a cluster with a single
        seed node.
        """
        cluster = self.cluster
        cluster.populate(1)
        cluster.start(wait_for_binary_proto=True)

        # Add a new node, bootstrap=True ensures that it is not a seed
        node2 = new_node(cluster, bootstrap=True)
        node2.start(wait_for_binary_proto=True, wait_other_notice=True)

        # Decommision the new node and kill it
        debug("Decommissioning & stopping node2")
        node2.decommission()
        node2.stop(wait_other_notice=False)

        # Wipe its data
        for data_dir in node2.data_directories():
            debug("Deleting {}".format(data_dir))
            shutil.rmtree(data_dir)

        commitlog_dir = os.path.join(node2.get_path(), 'commitlogs')
        debug("Deleting {}".format(commitlog_dir))
        shutil.rmtree(commitlog_dir)

        # Now start it, it should be allowed to join
        mark = node2.mark_log()
        debug("Restarting wiped node2")
        node2.start(wait_other_notice=False)
        node2.watch_log_for("JOINING:", from_mark=mark)
    def add_node_after_mv_test(self):
        """Test that materialized views work as expected when adding a node."""

        session = self.prepare()

        session.execute("CREATE TABLE t (id int PRIMARY KEY, v int)")
        session.execute(("CREATE MATERIALIZED VIEW t_by_v AS SELECT * FROM t "
                         "WHERE v IS NOT NULL AND id IS NOT NULL PRIMARY KEY (v, id)"))

        for i in xrange(1000):
            session.execute("INSERT INTO t (id, v) VALUES ({v}, {v})".format(v=i))

        for i in xrange(1000):
            assert_one(session, "SELECT * FROM t_by_v WHERE v = {}".format(i), [i, i])

        node4 = new_node(self.cluster)
        node4.start(wait_for_binary_proto=True)

        session2 = self.patient_exclusive_cql_connection(node4)

        for i in xrange(1000):
            assert_one(session2, "SELECT * FROM ks.t_by_v WHERE v = {}".format(i), [i, i])

        for i in xrange(1000, 1100):
            session.execute("INSERT INTO t (id, v) VALUES ({v}, {v})".format(v=i))

        for i in xrange(1000, 1100):
            assert_one(session, "SELECT * FROM t_by_v WHERE v = {}".format(i), [i, i])
    def manual_bootstrap_test(self):
        """
            Test adding a new node and bootstrapping it manually. No auto_bootstrap.
            This test also verify that all data are OK after the addition of the new node.
            @jira_ticket CASSANDRA-9022
        """
        cluster = self.cluster
        cluster.populate(2).start(wait_other_notice=True)
        (node1, node2) = cluster.nodelist()

        node1.stress(['write', 'n=1K', '-schema', 'replication(factor=2)',
                      '-rate', 'threads=1', '-pop', 'dist=UNIFORM(1..1000)'])

        session = self.patient_exclusive_cql_connection(node2)
        stress_table = 'keyspace1.standard1'

        original_rows = list(session.execute("SELECT * FROM %s" % stress_table))

        # Add a new node
        node3 = new_node(cluster, bootstrap=False)
        node3.start(wait_for_binary_proto=True)
        node3.repair()
        node1.cleanup()

        current_rows = list(session.execute("SELECT * FROM %s" % stress_table))
        self.assertEquals(original_rows, current_rows)
Beispiel #16
0
    def _do_upgrade(self, login_keyspace=True):
        cluster = self.cluster
        node1 = cluster.nodelist()[0]

        node1.flush()
        time.sleep(.5)
        node1.stop(wait_other_notice=True)

        node1.set_install_dir(install_dir=self.default_install_dir)
        node1.start(wait_other_notice=True, wait_for_binary_proto=True)

        if self.bootstrap:
            cluster.set_install_dir(install_dir=self.default_install_dir)
            # Add a new node, bootstrap=True ensures that it is not a seed
            node2 = new_node(cluster, bootstrap=True)
            node2.start(wait_for_binary_proto=True, jvm_args=self.jvm_args)

            temp_files = self.glob_data_dirs(os.path.join('*', "tmp", "*.dat"))
            debug("temp files: " + str(temp_files))
            self.assertEquals(0, len(temp_files),
                              "Temporary files were not cleaned up.")

        cursor = self.patient_cql_connection(node1)
        if login_keyspace:
            cursor.execute('USE ks')
        return cursor
    def local_quorum_bootstrap_test(self):
        """
        Test that CL local_quorum works while a node is bootstrapping.
        @jira_ticket CASSANDRA-8058
        """

        cluster = self.cluster
        cluster.populate([1, 1])
        cluster.start()

        node1 = cluster.nodes['node1']
        yaml_config = """
        # Create the keyspace and table
        keyspace: keyspace1
        keyspace_definition: |
          CREATE KEYSPACE keyspace1 WITH replication = {'class': 'NetworkTopologyStrategy', 'dc1': 1, 'dc2': 1};
        table: users
        table_definition:
          CREATE TABLE users (
            username text,
            first_name text,
            last_name text,
            email text,
            PRIMARY KEY(username)
          ) WITH compaction = {'class':'SizeTieredCompactionStrategy'};
        insert:
          partitions: fixed(1)
          batchtype: UNLOGGED
        queries:
          read:
            cql: select * from users where username = ?
            fields: samerow
        """
        with tempfile.NamedTemporaryFile(mode='w+') as stress_config:
            stress_config.write(yaml_config)
            stress_config.flush()
            node1.stress([
                'user', 'profile=' + stress_config.name, 'n=2M', 'no-warmup',
                'ops(insert=1)', '-rate', 'threads=50'
            ])

        node3 = new_node(cluster, data_center='dc2')
        node3.start(no_wait=True)
        time.sleep(3)

        with tempfile.TemporaryFile(mode='w+') as tmpfile:
            node1.stress([
                'user', 'profile=' + stress_config.name, 'ops(insert=1)',
                'n=500K', 'no-warmup', 'cl=LOCAL_QUORUM', '-rate', 'threads=5',
                '-errors', 'retries=2'
            ],
                         stdout=tmpfile,
                         stderr=subprocess.STDOUT)
            tmpfile.seek(0)
            output = tmpfile.read()

        debug(output)
        regex = re.compile("Operation.+error inserting key.+Exception")
        failure = regex.search(output)
        self.assertIsNone(failure, "Error during stress while bootstrapping")
 def test_cleanup(self):
     """
     @jira_ticket CASSANDRA-11179
     Make sure we remove processed files during cleanup
     """
     cluster = self.cluster
     cluster.set_configuration_options(values={'concurrent_compactors': 4})
     cluster.populate(1)
     cluster.start(wait_for_binary_proto=True)
     node1, = cluster.nodelist()
     for x in xrange(0, 5):
         node1.stress([
             'write', 'n=100k', '-schema',
             'compaction(strategy=SizeTieredCompactionStrategy,enabled=false)',
             'replication(factor=1)', '-rate', 'threads=10'
         ])
         node1.flush()
     node2 = new_node(cluster)
     node2.start(wait_for_binary_proto=True, wait_other_notice=True)
     event = threading.Event()
     failed = threading.Event()
     jobs = 1
     thread = threading.Thread(
         target=self._monitor_datadir,
         args=(node1, event,
               len(node1.get_sstables("keyspace1",
                                      "standard1")), jobs, failed))
     thread.start()
     node1.nodetool("cleanup -j {} keyspace1 standard1".format(jobs))
     event.set()
     thread.join()
     self.assertFalse(failed.is_set())
    def _wiped_node_cannot_join_test(self, gently):
        """
        @jira_ticket CASSANDRA-9765
        Test that if we stop a node and wipe its data then the node cannot join
        when it is not a seed. Test both a nice shutdown or a forced shutdown, via
        the gently parameter.
        """
        cluster = self.cluster
        cluster.populate(3)
        cluster.start(wait_for_binary_proto=True)

        stress_table = 'keyspace1.standard1'

        # write some data
        node1 = cluster.nodelist()[0]
        node1.stress(['write', 'n=10K', '-rate', 'threads=8'])

        session = self.patient_cql_connection(node1)
        original_rows = list(session.execute("SELECT * FROM {}".format(stress_table,)))

        # Add a new node, bootstrap=True ensures that it is not a seed
        node4 = new_node(cluster, bootstrap=True)
        node4.start(wait_for_binary_proto=True)

        session = self.patient_cql_connection(node4)
        self.assertEquals(original_rows, list(session.execute("SELECT * FROM {}".format(stress_table,))))

        # Stop the new node and wipe its data
        node4.stop(gently=gently)
        self._cleanup(node4)
        # Now start it, it should not be allowed to join.
        mark = node4.mark_log()
        node4.start(no_wait=True, wait_other_notice=False)
        node4.watch_log_for("A node with address /127.0.0.4 already exists, cancelling join", from_mark=mark)
Beispiel #20
0
    def bootstrap_with_reset_bootstrap_state_test(self):
        """Test bootstrap with resetting bootstrap progress"""

        cluster = self.cluster
        cluster.set_configuration_options(values={'stream_throughput_outbound_megabits_per_sec': 1})
        cluster.populate(2).start(wait_other_notice=True)

        node1 = cluster.nodes['node1']
        node1.stress(['write', 'n=100K', '-schema', 'replication(factor=2)'])
        node1.flush()

        # kill node1 in the middle of streaming to let it fail
        t = InterruptBootstrap(node1)
        t.start()

        # start bootstrapping node3 and wait for streaming
        node3 = new_node(cluster)
        try:
            node3.start()
        except NodeError:
            pass  # node doesn't start as expected
        t.join()
        node1.start()

        # restart node3 bootstrap with resetting bootstrap progress
        node3.stop()
        mark = node3.mark_log()
        node3.start(jvm_args=["-Dcassandra.reset_bootstrap_progress=true"])
        # check if we reset bootstrap state
        node3.watch_log_for("Resetting bootstrap progress to start fresh", from_mark=mark)
        # wait for node3 ready to query
        node3.watch_log_for("Listening for thrift clients...", from_mark=mark)

        # check if 2nd bootstrap succeeded
        assert_bootstrap_state(self, node3, 'COMPLETED')
    def local_quorum_bootstrap_test(self):
        """
        Test that CL local_quorum works while a node is bootstrapping.
        @jira_ticket CASSANDRA-8058
        """

        cluster = self.cluster
        cluster.populate([1, 1])
        cluster.start()

        node1 = cluster.nodes['node1']
        yaml_config = """
        # Create the keyspace and table
        keyspace: keyspace1
        keyspace_definition: |
          CREATE KEYSPACE keyspace1 WITH replication = {'class': 'NetworkTopologyStrategy', 'dc1': 1, 'dc2': 1};
        table: users
        table_definition:
          CREATE TABLE users (
            username text,
            first_name text,
            last_name text,
            email text,
            PRIMARY KEY(username)
          ) WITH compaction = {'class':'SizeTieredCompactionStrategy'};
        insert:
          partitions: fixed(1)
          batchtype: UNLOGGED
        queries:
          read:
            cql: select * from users where username = ?
            fields: samerow
        """
        stress_config = tempfile.NamedTemporaryFile(mode='w+', delete=False)
        stress_config.write(yaml_config)
        stress_config.close()
        node1.stress(['user', 'profile=' + stress_config.name, 'n=2000000',
                      'ops(insert=1)', '-rate', 'threads=50'])

        node3 = new_node(cluster, data_center='dc2')
        node3.start(no_wait=True)
        time.sleep(3)

        with tempfile.TemporaryFile(mode='w+') as tmpfile:
            node1.stress(['user', 'profile=' + stress_config.name, 'ops(insert=1)',
                          'n=500K', 'cl=LOCAL_QUORUM',
                          '-rate', 'threads=5',
                          '-errors', 'retries=2'],
                         stdout=tmpfile, stderr=subprocess.STDOUT)
            os.unlink(stress_config.name)

            tmpfile.seek(0)
            output = tmpfile.read()

        debug(output)
        regex = re.compile("Operation.+error inserting key.+Exception")
        failure = regex.search(output)
        self.assertIsNone(failure, "Error during stress while bootstrapping")
    def simultaneous_bootstrap_test(self):
        """
        Attempt to bootstrap two nodes at once, to assert the second bootstrapped node fails, and does not interfere.

        Start a one node cluster and run a stress write workload.
        Start up a second node, and wait for the first node to detect it has joined the cluster.
        While the second node is bootstrapping, start a third node. This should fail.

        @jira_ticket CASSANDRA-7069
        @jira_ticket CASSANDRA-9484
        """

        bootstrap_error = ("Other bootstrapping/leaving/moving nodes detected,"
                           " cannot bootstrap while cassandra.consistent.rangemovement is true")

        self.ignore_log_patterns.append(bootstrap_error)

        cluster = self.cluster
        cluster.populate(1)
        cluster.start(wait_for_binary_proto=True)

        node1, = cluster.nodelist()

        node1.stress(['write', 'n=500K', '-schema', 'replication(factor=1)',
                      '-rate', 'threads=10'])

        node2 = new_node(cluster)
        node2.start(wait_other_notice=True)

        node3 = new_node(cluster, remote_debug_port='2003')
        process = node3.start(wait_other_notice=False)
        stdout, stderr = process.communicate()
        self.assertIn(bootstrap_error, stderr, msg=stderr)
        time.sleep(.5)
        self.assertFalse(node3.is_running(), msg="Two nodes bootstrapped simultaneously")

        node2.watch_log_for("Starting listening for CQL clients")

        session = self.patient_exclusive_cql_connection(node2)

        # Repeat the select count(*) query, to help catch
        # bugs like 9484, where count(*) fails at higher
        # data loads.
        for _ in xrange(5):
            assert_one(session, "SELECT count(*) from keyspace1.standard1", [500000], cl=ConsistencyLevel.ONE)
Beispiel #23
0
 def _bootstrap_new_node(self):
     # Check we can bootstrap a new node on the upgraded cluster:
     debug("Adding a node to the cluster")
     nnode = new_node(self.cluster, remote_debug_port=str(2000 + len(self.cluster.nodes)))
     nnode.start(use_jna=True, wait_other_notice=True, wait_for_binary_proto=True)
     self._write_values()
     self._increment_counters()
     self._check_values()
     self._check_counters()
 def _bootstrap_new_node(self):
     # Check we can bootstrap a new node on the upgraded cluster:
     debug("Adding a node to the cluster")
     nnode = new_node(self.cluster, remote_debug_port=str(2000 + len(self.cluster.nodes)))
     nnode.start(use_jna=True, wait_other_notice=True, wait_for_binary_proto=True)
     self._write_values()
     self._increment_counters()
     self._check_values()
     self._check_counters()
    def add_dc_after_mv_test(self):
        """Test that materialized views work as expected when adding a datacenter."""

        session = self.prepare()

        debug("Creating schema")
        session.execute("CREATE TABLE t (id int PRIMARY KEY, v int)")
        session.execute(("CREATE MATERIALIZED VIEW t_by_v AS SELECT * FROM t "
                         "WHERE v IS NOT NULL AND id IS NOT NULL PRIMARY KEY (v, id)"))

        debug("Writing 1k to base")
        for i in xrange(1000):
            session.execute("INSERT INTO t (id, v) VALUES ({v}, {v})".format(v=i))

        debug("Reading 1k from view")
        for i in xrange(1000):
            assert_one(session, "SELECT * FROM t_by_v WHERE v = {}".format(i), [i, i])

        debug("Reading 1k from base")
        for i in xrange(1000):
            assert_one(session, "SELECT * FROM t WHERE id = {}".format(i), [i, i])

        debug("Bootstrapping new node in another dc")
        node4 = new_node(self.cluster, data_center='dc2')
        node4.start(wait_other_notice=True, wait_for_binary_proto=True)

        debug("Bootstrapping new node in another dc")
        node5 = new_node(self.cluster, remote_debug_port='1414', data_center='dc2')
        node5.start()

        session2 = self.patient_exclusive_cql_connection(node4)

        debug("Verifying data from new node in view")
        for i in xrange(1000):
            assert_one(session2, "SELECT * FROM ks.t_by_v WHERE v = {}".format(i), [i, i])

        debug("Inserting 100 into base")
        for i in xrange(1000, 1100):
            session.execute("INSERT INTO t (id, v) VALUES ({v}, {v})".format(v=i))

        debug("Verify 100 in view")
        for i in xrange(1000, 1100):
            assert_one(session, "SELECT * FROM t_by_v WHERE v = {}".format(i), [i, i])
    def resumable_bootstrap_test(self):
        """
        Test resuming bootstrap after data streaming failure
        """

        cluster = self.cluster
        cluster.populate(2).start(wait_other_notice=True)

        node1 = cluster.nodes['node1']
        node1.stress(['write', 'n=100K', 'cl=TWO', '-schema', 'replication(factor=2)'])
        cluster.flush()

        # kill node1 in the middle of streaming to let it fail
        t = InterruptBootstrap(node1)
        t.start()

        # start bootstrapping node3 and wait for streaming
        node3 = new_node(cluster)
        node3.set_configuration_options(values={'stream_throughput_outbound_megabits_per_sec': 1})
        # keep timeout low so that test won't hang
        node3.set_configuration_options(values={'streaming_socket_timeout_in_ms': 1000})
        try:
            node3.start(wait_other_notice=False)
        except NodeError:
            pass  # node doesn't start as expected
        t.join()

        # wait for node3 ready to query
        node3.watch_log_for("Starting listening for CQL clients")
        mark = node3.mark_log()
        # check if node3 is still in bootstrap mode
        session = self.patient_exclusive_cql_connection(node3)
        rows = list(session.execute("SELECT bootstrapped FROM system.local WHERE key='local'"))
        assert len(rows) == 1
        assert rows[0][0] == 'IN_PROGRESS', rows[0][0]
        # bring back node1 and invoke nodetool bootstrap to resume bootstrapping
        node1.start(wait_other_notice=True)
        node3.nodetool('bootstrap resume')

        node3.watch_log_for("Resume complete", from_mark=mark)
        rows = list(session.execute("SELECT bootstrapped FROM system.local WHERE key='local'"))
        assert rows[0][0] == 'COMPLETED', rows[0][0]

        # cleanup to guarantee each node will only have sstables of its ranges
        cluster.cleanup()

        debug("Check data is present")
        # Let's check stream bootstrap completely transferred data
        stdout, stderr = node3.stress(['read', 'n=100k', "no-warmup", '-schema',
                                       'replication(factor=2)', '-rate', 'threads=8'],
                                      capture_output=True)

        if stdout and "FAILURE" in stdout:
            debug(stdout)
            assert False, "Cannot read inserted data after bootstrap"
    def simple_bootstrap_test(self):
        cluster = self.cluster
        tokens = cluster.balanced_tokens(2)
        cluster.set_configuration_options(values={'num_tokens': 1})

        debug("[node1, node2] tokens: %r" % (tokens,))

        keys = 10000

        # Create a single node cluster
        cluster.populate(1)
        node1 = cluster.nodelist()[0]
        node1.set_configuration_options(values={'initial_token': tokens[0]})
        cluster.start(wait_other_notice=True)

        session = self.patient_cql_connection(node1)
        self.create_ks(session, 'ks', 1)
        self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})

        # record the size before inserting any of our own data
        empty_size = node1.data_size()
        debug("node1 empty size : %s" % float(empty_size))

        insert_statement = session.prepare("INSERT INTO ks.cf (key, c1, c2) VALUES (?, 'value1', 'value2')")
        execute_concurrent_with_args(session, insert_statement, [['k%d' % k] for k in range(keys)])

        node1.flush()
        node1.compact()
        initial_size = node1.data_size()
        debug("node1 size before bootstrapping node2: %s" % float(initial_size))

        # Reads inserted data all during the bootstrap process. We shouldn't
        # get any error
        reader = self.go(lambda _: query_c1c2(session, random.randint(0, keys - 1), ConsistencyLevel.ONE))

        # Bootstrapping a new node
        node2 = new_node(cluster)
        node2.set_configuration_options(values={'initial_token': tokens[1]})
        node2.start(wait_for_binary_proto=True)
        node2.compact()

        reader.check()
        node1.cleanup()
        debug("node1 size after cleanup: %s" % float(node1.data_size()))
        node1.compact()
        debug("node1 size after compacting: %s" % float(node1.data_size()))
        time.sleep(.5)
        reader.check()

        debug("node2 size after compacting: %s" % float(node2.data_size()))

        size1 = float(node1.data_size())
        size2 = float(node2.data_size())
        assert_almost_equal(size1, size2, error=0.3)
        assert_almost_equal(float(initial_size - empty_size), 2 * (size1 - float(empty_size)))
    def resumable_bootstrap_test(self):
        """
        Test resuming bootstrap after data streaming failure
        """

        cluster = self.cluster
        cluster.set_configuration_options(
            values={'stream_throughput_outbound_megabits_per_sec': 1})
        cluster.populate(2).start(wait_other_notice=True)

        node1 = cluster.nodes['node1']
        node1.stress([
            'write', 'n=100K', 'no-warmup', 'cl=TWO', '-schema',
            'replication(factor=2)', '-rate', 'threads=50'
        ])
        cluster.flush()

        # kill node1 in the middle of streaming to let it fail
        t = InterruptBootstrap(node1)
        t.start()

        # start bootstrapping node3 and wait for streaming
        node3 = new_node(cluster)
        # keep timeout low so that test won't hang
        node3.set_configuration_options(
            values={'streaming_socket_timeout_in_ms': 1000})
        node3.start(wait_other_notice=False, wait_for_binary_proto=True)
        t.join()

        # wait for node3 ready to query
        node3.watch_log_for("Starting listening for CQL clients")
        mark = node3.mark_log()
        # check if node3 is still in bootstrap mode
        assert_bootstrap_state(self, node3, 'IN_PROGRESS')

        # bring back node1 and invoke nodetool bootstrap to resume bootstrapping
        node1.start(wait_other_notice=True)
        node3.nodetool('bootstrap resume')

        node3.watch_log_for("Resume complete", from_mark=mark)
        assert_bootstrap_state(self, node3, 'COMPLETED')

        # cleanup to guarantee each node will only have sstables of its ranges
        cluster.cleanup()

        debug("Check data is present")
        # Let's check stream bootstrap completely transferred data
        stdout, stderr = node3.stress([
            'read', 'n=100k', 'no-warmup', '-schema', 'replication(factor=2)',
            '-rate', 'threads=8'
        ],
                                      capture_output=True)

        if stdout is not None:
            self.assertNotIn("FAILURE", stdout)
Beispiel #29
0
    def consistent_reads_after_bootstrap_test(self):
        debug("Creating a ring")
        cluster = self.cluster
        cluster.set_configuration_options(values={
            'hinted_handoff_enabled':
            False,
            'write_request_timeout_in_ms':
            60000,
            'read_request_timeout_in_ms':
            60000,
            'dynamic_snitch_badness_threshold':
            0.0
        },
                                          batch_commitlog=True)

        cluster.populate(2).start()
        node1, node2 = cluster.nodelist()
        cluster.start(wait_for_binary_proto=True, wait_other_notice=True)

        debug("Set to talk to node 2")
        n2session = self.patient_cql_connection(node2)
        self.create_ks(n2session, 'ks', 2)
        create_c1c2_table(self, n2session)

        debug("Generating some data for all nodes")
        insert_c1c2(n2session,
                    keys=range(10, 20),
                    consistency=ConsistencyLevel.ALL)

        node1.flush()
        debug("Taking down node1")
        node1.stop(wait_other_notice=True)

        debug("Writing data to only node2")
        insert_c1c2(n2session,
                    keys=range(30, 1000),
                    consistency=ConsistencyLevel.ONE)
        node2.flush()

        debug("Restart node1")
        node1.start(wait_other_notice=True)

        debug("Bootstraping node3")
        node3 = new_node(cluster)
        node3.start(wait_for_binary_proto=True)

        n3session = self.patient_cql_connection(node3)
        n3session.execute("USE ks")
        debug("Checking that no data was lost")
        for n in xrange(10, 20):
            query_c1c2(n3session, n, ConsistencyLevel.ALL)

        for n in xrange(30, 1000):
            query_c1c2(n3session, n, ConsistencyLevel.ALL)
Beispiel #30
0
        def bootstrap_on_write_survey_and_join(cluster, token):
            node2 = new_node(cluster)
            node2.set_configuration_options(values={'initial_token': token})
            node2.start(jvm_args=["-Dcassandra.write_survey=true"], wait_for_binary_proto=True)

            self.assertTrue(len(node2.grep_log('Startup complete, but write survey mode is active, not becoming an active ring member.')))
            assert_bootstrap_state(self, node2, 'IN_PROGRESS')

            node2.nodetool("join")
            self.assertTrue(len(node2.grep_log('Leaving write survey mode and joining ring at operator request')))
            return node2
Beispiel #31
0
    def _bootstrap_test_with_replica_down(self, consistent_range_movement, rf=2):
        """
        Test to check consistent bootstrap will not succeed when there are insufficient replicas
        @jira_ticket CASSANDRA-11848
        """
        cluster = self.cluster

        cluster.populate(2)
        node1, node2 = cluster.nodelist()

        node3_token = None
        # Make token assignment deterministic
        if DISABLE_VNODES:
            cluster.set_configuration_options(values={'num_tokens': 1})
            tokens = cluster.balanced_tokens(3)
            debug("non-vnode tokens: %r" % (tokens,))
            node1.set_configuration_options(values={'initial_token': tokens[0]})
            node2.set_configuration_options(values={'initial_token': tokens[2]})
            node3_token = tokens[1]  # Add node 3 between node1 and node2

        cluster.start()

        node1.stress(['write', 'n=10K', 'no-warmup', '-rate', 'threads=8', '-schema', 'replication(factor={})'.format(rf)])

        # change system_auth keyspace to 2 (default is 1) to avoid
        # "Unable to find sufficient sources for streaming" warning
        if cluster.cassandra_version() >= '2.2.0':
            session = self.patient_cql_connection(node1)
            session.execute("""
                ALTER KEYSPACE system_auth
                    WITH replication = {'class':'SimpleStrategy', 'replication_factor':2};
            """)

        # Stop node2, so node3 will not be able to perform consistent range movement
        node2.stop(wait_other_notice=True)

        successful_bootstrap_expected = not consistent_range_movement

        node3 = new_node(cluster, token=node3_token)
        node3.start(wait_for_binary_proto=successful_bootstrap_expected, wait_other_notice=successful_bootstrap_expected,
                    jvm_args=["-Dcassandra.consistent.rangemovement={}".format(consistent_range_movement)])

        if successful_bootstrap_expected:
            # with rf=1 and cassandra.consistent.rangemovement=false, missing sources are ignored
            if not consistent_range_movement and rf == 1:
                node3.watch_log_for("Unable to find sufficient sources for streaming range")
            self.assertTrue(node3.is_running())
            assert_bootstrap_state(self, node3, 'COMPLETED')
        else:
            if consistent_range_movement:
                node3.watch_log_for("A node required to move the data consistently is down")
            else:
                node3.watch_log_for("Unable to find sufficient sources for streaming range")
            assert_not_running(node3)
    def _wiped_node_cannot_join_test(self, gently):
        """
        @jira_ticket CASSANDRA-9765
        Test that if we stop a node and wipe its data then the node cannot join
        when it is not a seed. Test both a nice shutdown or a forced shutdown, via
        the gently parameter.
        """
        cluster = self.cluster
        cluster.populate(3)
        cluster.start(wait_for_binary_proto=True)

        version = cluster.version()
        stress_table = 'keyspace1.standard1' if self.cluster.version(
        ) >= '2.1' else '"Keyspace1"."Standard1"'

        # write some data
        node1 = cluster.nodelist()[0]
        if version < "2.1":
            node1.stress(['-n', '10000'])
        else:
            node1.stress(['write', 'n=10000', '-rate', 'threads=8'])

        session = self.patient_cql_connection(node1)
        original_rows = list(
            session.execute("SELECT * FROM {}".format(stress_table, )))

        # Add a new node, bootstrap=True ensures that it is not a seed
        node2 = new_node(cluster, bootstrap=True)
        node2.start(wait_for_binary_proto=True)

        session = self.patient_cql_connection(node2)
        self.assertEquals(
            original_rows,
            list(session.execute("SELECT * FROM {}".format(stress_table, ))))

        # Stop the new node and wipe its data
        node2.stop(gently=gently)
        data_dir = os.path.join(node2.get_path(), 'data')
        commitlog_dir = os.path.join(node2.get_path(), 'commitlogs')
        debug("Deleting {}".format(data_dir))
        shutil.rmtree(data_dir)
        shutil.rmtree(commitlog_dir)

        # Now start it, it should not be allowed to join.
        mark = node2.mark_log()
        node2.start(no_wait=True)
        node2.watch_log_for(
            "A node with address /127.0.0.4 already exists, cancelling join",
            from_mark=mark)
    def failed_bootstap_wiped_node_can_join_test(self):
        """
        @jira_ticket CASSANDRA-9765
        Test that if a node fails to bootstrap, it can join the cluster even if the data is wiped.
        """
        cluster = self.cluster
        cluster.populate(1)
        cluster.start(wait_for_binary_proto=True)

        version = cluster.version()
        stress_table = 'keyspace1.standard1' if self.cluster.version(
        ) >= '2.1' else '"Keyspace1"."Standard1"'

        # write some data, enough for the bootstrap to fail later on
        node1 = cluster.nodelist()[0]
        if version < "2.1":
            node1.stress(['-n', '100000'])
        else:
            node1.stress(['write', 'n=100000', '-rate', 'threads=8'])
        node1.flush()

        session = self.patient_cql_connection(node1)
        original_rows = list(
            session.execute("SELECT * FROM {}".format(stress_table, )))

        # Add a new node, bootstrap=True ensures that it is not a seed
        node2 = new_node(cluster, bootstrap=True)
        node2.set_configuration_options(
            values={'stream_throughput_outbound_megabits_per_sec': 1})

        # kill node2 in the middle of bootstrap
        t = KillOnBootstrap(node2)
        t.start()

        node2.start()
        t.join()
        self.assertFalse(node2.is_running())

        # wipe any data for node2
        data_dir = os.path.join(node2.get_path(), 'data')
        commitlog_dir = os.path.join(node2.get_path(), 'commitlogs')
        debug("Deleting {}".format(data_dir))
        shutil.rmtree(data_dir)
        shutil.rmtree(commitlog_dir)

        # Now start it again, it should be allowed to join
        mark = node2.mark_log()
        node2.start(wait_other_notice=True)
        node2.watch_log_for("JOINING:", from_mark=mark)
    def disk_balance_bootstrap_test(self):
        cluster = self.cluster
        if not DISABLE_VNODES:
            cluster.set_configuration_options(values={'num_tokens': 256})
        # apparently we have legitimate errors in the log when bootstrapping (see bootstrap_test.py)
        self.allow_log_errors = True
        cluster.set_configuration_options(values={'allocate_tokens_for_keyspace': 'keyspace1'})
        cluster.populate(4).start(wait_for_binary_proto=True)
        node1 = cluster.nodes['node1']

        node1.stress(['write', 'n=50k', 'no-warmup', '-rate', 'threads=100', '-schema', 'replication(factor=3)', 'compaction(strategy=SizeTieredCompactionStrategy,enabled=false)'])
        cluster.flush()
        node5 = new_node(cluster)
        node5.start(wait_for_binary_proto=True)
        self.assert_balanced(node5)
    def resumable_bootstrap_test(self):
        """
        Test resuming bootstrap after data streaming failure
        """

        cluster = self.cluster
        cluster.set_configuration_options(values={'stream_throughput_outbound_megabits_per_sec': 1})
        cluster.populate(2).start(wait_other_notice=True)

        node1 = cluster.nodes['node1']
        node1.stress(['write', 'n=100K', 'no-warmup', 'cl=TWO', '-schema', 'replication(factor=2)', '-rate', 'threads=50'])
        cluster.flush()

        # kill node1 in the middle of streaming to let it fail
        t = InterruptBootstrap(node1)
        t.start()

        # start bootstrapping node3 and wait for streaming
        node3 = new_node(cluster)
        # keep timeout low so that test won't hang
        node3.set_configuration_options(values={'streaming_socket_timeout_in_ms': 1000})
        node3.start(wait_other_notice=False, wait_for_binary_proto=True)
        t.join()

        # wait for node3 ready to query
        node3.watch_log_for("Starting listening for CQL clients")
        mark = node3.mark_log()
        # check if node3 is still in bootstrap mode
        assert_bootstrap_state(self, node3, 'IN_PROGRESS')

        # bring back node1 and invoke nodetool bootstrap to resume bootstrapping
        node1.start(wait_other_notice=True)
        node3.nodetool('bootstrap resume')

        node3.watch_log_for("Resume complete", from_mark=mark)
        assert_bootstrap_state(self, node3, 'COMPLETED')

        # cleanup to guarantee each node will only have sstables of its ranges
        cluster.cleanup()

        debug("Check data is present")
        # Let's check stream bootstrap completely transferred data
        stdout, stderr = node3.stress(['read', 'n=100k', 'no-warmup', '-schema',
                                       'replication(factor=2)', '-rate', 'threads=8'],
                                      capture_output=True)

        if stdout is not None:
            self.assertNotIn("FAILURE", stdout)
    def simple_bootstrap_test_nodata(self):
        """
        @jira_ticket CASSANDRA-11010
        Test that bootstrap completes if streaming from nodes with no data
        """

        cluster = self.cluster
        # Create a two-node cluster
        cluster.populate(2)
        cluster.start(wait_other_notice=True)

        # Bootstrapping a new node
        node3 = new_node(cluster)
        node3.start(wait_for_binary_proto=True, wait_other_notice=True)

        assert_bootstrap_state(self, node3, 'COMPLETED')
    def simple_bootstrap_test_nodata(self):
        """
        @jira_ticket CASSANDRA-11010
        Test that bootstrap completes if streaming from nodes with no data
        """

        cluster = self.cluster
        # Create a two-node cluster
        cluster.populate(2)
        cluster.start(wait_other_notice=True)

        # Bootstrapping a new node
        node3 = new_node(cluster)
        node3.start(wait_for_binary_proto=True, wait_other_notice=True)

        assert_bootstrap_state(self, node3, 'COMPLETED')
    def failed_bootstap_wiped_node_can_join_test(self):
        """
        @jira_ticket CASSANDRA-9765
        Test that if a node fails to bootstrap, it can join the cluster even if the data is wiped.
        """
        cluster = self.cluster
        cluster.populate(1)
        cluster.start(wait_for_binary_proto=True)

        version = cluster.version()
        stress_table = 'keyspace1.standard1' if self.cluster.version() >= '2.1' else '"Keyspace1"."Standard1"'

        # write some data, enough for the bootstrap to fail later on
        node1 = cluster.nodelist()[0]
        if version < "2.1":
            node1.stress(['-n', '100000'])
        else:
            node1.stress(['write', 'n=100000', '-rate', 'threads=8'])
        node1.flush()

        session = self.patient_cql_connection(node1)
        original_rows = list(session.execute("SELECT * FROM {}".format(stress_table,)))

        # Add a new node, bootstrap=True ensures that it is not a seed
        node2 = new_node(cluster, bootstrap=True)
        node2.set_configuration_options(values={'stream_throughput_outbound_megabits_per_sec': 1})

        # kill node2 in the middle of bootstrap
        t = KillOnBootstrap(node2)
        t.start()

        node2.start()
        t.join()
        self.assertFalse(node2.is_running())

        # wipe any data for node2
        data_dir = os.path.join(node2.get_path(), 'data')
        commitlog_dir = os.path.join(node2.get_path(), 'commitlogs')
        debug("Deleting {}".format(data_dir))
        shutil.rmtree(data_dir)
        shutil.rmtree(commitlog_dir)

        # Now start it again, it should be allowed to join
        mark = node2.mark_log()
        node2.start(wait_other_notice=True)
        node2.watch_log_for("JOINING:", from_mark=mark)
    def decommissioned_wiped_node_can_join_test(self):
        """
        @jira_ticket CASSANDRA-9765
        Test that if we decommission a node and then wipe its data, it can join the cluster.
        """
        cluster = self.cluster
        cluster.populate(3)
        cluster.start(wait_for_binary_proto=True)

        version = cluster.version()
        stress_table = 'keyspace1.standard1' if self.cluster.version(
        ) >= '2.1' else '"Keyspace1"."Standard1"'

        # write some data
        node1 = cluster.nodelist()[0]
        if version < "2.1":
            node1.stress(['-n', '10000'])
        else:
            node1.stress(['write', 'n=10K', '-rate', 'threads=8'])

        session = self.patient_cql_connection(node1)
        original_rows = list(
            session.execute("SELECT * FROM {}".format(stress_table, )))

        # Add a new node, bootstrap=True ensures that it is not a seed
        node2 = new_node(cluster, bootstrap=True)
        node2.start(wait_for_binary_proto=True, wait_other_notice=True)

        session = self.patient_cql_connection(node2)
        self.assertEquals(
            original_rows,
            list(session.execute("SELECT * FROM {}".format(stress_table, ))))

        # Decommision the new node and wipe its data
        node2.decommission()
        node2.stop(wait_other_notice=True)
        data_dir = os.path.join(node2.get_path(), 'data')
        commitlog_dir = os.path.join(node2.get_path(), 'commitlogs')
        debug("Deleting {}".format(data_dir))
        shutil.rmtree(data_dir)
        shutil.rmtree(commitlog_dir)

        # Now start it, it should be allowed to join
        mark = node2.mark_log()
        node2.start(wait_other_notice=True)
        node2.watch_log_for("JOINING:", from_mark=mark)
    def resumable_bootstrap_test(self):
        """Test resuming bootstrap after data streaming failure"""

        cluster = self.cluster
        cluster.populate(2).start(wait_other_notice=True)

        node1 = cluster.nodes['node1']
        node1.stress(['write', 'n=100000', '-schema', 'replication(factor=2)'])
        node1.flush()

        # kill node1 in the middle of streaming to let it fail
        t = InterruptBootstrap(node1)
        t.start()

        # start bootstrapping node3 and wait for streaming
        node3 = new_node(cluster)
        node3.set_configuration_options(
            values={'stream_throughput_outbound_megabits_per_sec': 1})
        # keep timeout low so that test won't hang
        node3.set_configuration_options(
            values={'streaming_socket_timeout_in_ms': 1000})
        try:
            node3.start()
        except NodeError:
            pass  # node doesn't start as expected
        t.join()

        # wait for node3 ready to query
        node3.watch_log_for("Starting listening for CQL clients")
        mark = node3.mark_log()
        # check if node3 is still in bootstrap mode
        session = self.exclusive_cql_connection(node3)
        rows = session.execute(
            "SELECT bootstrapped FROM system.local WHERE key='local'")
        assert len(rows) == 1
        assert rows[0][0] == 'IN_PROGRESS', rows[0][0]
        # bring back node1 and invoke nodetool bootstrap to resume bootstrapping
        node1.start(wait_other_notice=True)
        node3.nodetool('bootstrap resume')
        # check if we skipped already retrieved ranges
        node3.watch_log_for("already available. Skipping streaming.")
        node3.watch_log_for("Resume complete", from_mark=mark)
        rows = session.execute(
            "SELECT bootstrapped FROM system.local WHERE key='local'")
        assert rows[0][0] == 'COMPLETED', rows[0][0]
    def simple_bootstrap_test_nodata(self):
        """
        @jira_ticket CASSANDRA-11010
        Test that bootstrap completes if streaming from nodes with no data
        """

        cluster = self.cluster
        # Create a two-node cluster
        cluster.populate(2)
        cluster.start(wait_other_notice=True)

        # Bootstrapping a new node
        node3 = new_node(cluster)
        node3.start(wait_for_binary_proto=True, wait_other_notice=True)

        session = self.patient_exclusive_cql_connection(node3)
        rows = session.execute("SELECT bootstrapped FROM system.local WHERE key='local'")
        self.assertEqual(rows[0][0], 'COMPLETED')
    def resumable_bootstrap_test(self):
        """
        Test resuming bootstrap after data streaming failure
        """

        cluster = self.cluster
        cluster.populate(2)

        node1 = cluster.nodes['node1']
        # set up byteman
        node1.byteman_port = '8100'
        node1.import_config_files()

        cluster.start(wait_other_notice=True)
        # kill stream to node3 in the middle of streaming to let it fail
        node1.byteman_submit(['./stream_failure.btm'])
        node1.stress(['write', 'n=1K', 'no-warmup', 'cl=TWO', '-schema', 'replication(factor=2)', '-rate', 'threads=50'])
        cluster.flush()

        # start bootstrapping node3 and wait for streaming
        node3 = new_node(cluster)
        node3.start(wait_other_notice=False, wait_for_binary_proto=True)

        # wait for node3 ready to query
        node3.watch_log_for("Starting listening for CQL clients")
        mark = node3.mark_log()
        # check if node3 is still in bootstrap mode
        assert_bootstrap_state(self, node3, 'IN_PROGRESS')

        # bring back node1 and invoke nodetool bootstrap to resume bootstrapping
        node3.nodetool('bootstrap resume')

        node3.watch_log_for("Resume complete", from_mark=mark)
        assert_bootstrap_state(self, node3, 'COMPLETED')

        # cleanup to guarantee each node will only have sstables of its ranges
        cluster.cleanup()

        debug("Check data is present")
        # Let's check stream bootstrap completely transferred data
        stdout, stderr, _ = node3.stress(['read', 'n=1k', 'no-warmup', '-schema', 'replication(factor=2)', '-rate', 'threads=8'])

        if stdout is not None:
            self.assertNotIn("FAILURE", stdout)
Beispiel #43
0
    def decommissioned_wiped_node_can_join_test(self):
        """
        @jira_ticket CASSANDRA-9765
        Test that if we decommission a node and then wipe its data, it can join the cluster.
        """
        cluster = self.cluster
        cluster.set_log_level('TRACE')
        cluster.populate(3)
        cluster.start(wait_for_binary_proto=True)

        version = cluster.version()
        stress_table = 'keyspace1.standard1' if self.cluster.version() >= '2.1' else '"Keyspace1"."Standard1"'

        # write some data
        node1 = cluster.nodelist()[0]
        if version < "2.1":
            node1.stress(['-n', '10000'])
        else:
            node1.stress(['write', 'n=10000', '-rate', 'threads=8'])

        session = self.patient_cql_connection(node1)
        original_rows = list(session.execute("SELECT * FROM {}".format(stress_table,)))

        # Add a new node, bootstrap=True ensures that it is not a seed
        node2 = new_node(cluster, bootstrap=True)
        node2.start(wait_for_binary_proto=True)

        session = self.patient_cql_connection(node2)
        self.assertEquals(original_rows, list(session.execute("SELECT * FROM {}".format(stress_table,))))
        session.shutdown()  # Ensure all sockets to node2 are released

        # Decommision the new node and wipe its data
        node2.decommission()
        node2.stop(gently=False)
        data_dir = os.path.join(node2.get_path(), 'data')
        commitlog_dir = os.path.join(node2.get_path(), 'commitlogs')
        debug("Deleting {}".format(data_dir))
        shutil.rmtree(data_dir)
        shutil.rmtree(commitlog_dir)

        # Now start it, it should be allowed to join
        mark = node2.mark_log()
        node2.start(wait_other_notice=True)
        node2.watch_log_for("JOINING:", from_mark=mark)
    def read_from_bootstrapped_node_test(self):
        """Test bootstrapped node sees existing data, eg. CASSANDRA-6648"""
        cluster = self.cluster
        cluster.populate(3)
        cluster.start()

        node1 = cluster.nodes['node1']
        node1.stress(['write', 'n=10000', '-rate', 'threads=8'])

        session = self.patient_cql_connection(node1)
        stress_table = 'keyspace1.standard1'
        original_rows = list(session.execute("SELECT * FROM %s" % (stress_table,)))

        node4 = new_node(cluster)
        node4.start(wait_for_binary_proto=True)

        session = self.patient_exclusive_cql_connection(node4)
        new_rows = list(session.execute("SELECT * FROM %s" % (stress_table,)))
        self.assertEquals(original_rows, new_rows)
    def simple_bootstrap_test_nodata(self):
        """
        @jira_ticket CASSANDRA-11010
        Test that bootstrap completes if streaming from nodes with no data
        """

        cluster = self.cluster
        # Create a two-node cluster
        cluster.populate(2)
        cluster.start(wait_other_notice=True)

        # Bootstrapping a new node
        node3 = new_node(cluster)
        node3.start(wait_for_binary_proto=True, wait_other_notice=True)

        session = self.patient_exclusive_cql_connection(node3)
        rows = session.execute(
            "SELECT bootstrapped FROM system.local WHERE key='local'")
        self.assertEqual(rows[0][0], 'COMPLETED')
    def resumable_bootstrap_test(self):
        """
        Test resuming bootstrap after data streaming failure
        """

        cluster = self.cluster
        cluster.populate(2).start(wait_other_notice=True)

        node1 = cluster.nodes['node1']
        node1.stress(['write', 'n=100000', '-schema', 'replication(factor=2)'])
        node1.flush()

        # kill node1 in the middle of streaming to let it fail
        t = InterruptBootstrap(node1)
        t.start()

        # start bootstrapping node3 and wait for streaming
        node3 = new_node(cluster)
        node3.set_configuration_options(values={'stream_throughput_outbound_megabits_per_sec': 1})
        # keep timeout low so that test won't hang
        node3.set_configuration_options(values={'streaming_socket_timeout_in_ms': 1000})
        try:
            node3.start()
        except NodeError:
            pass  # node doesn't start as expected
        t.join()

        # wait for node3 ready to query
        node3.watch_log_for("Starting listening for CQL clients")
        mark = node3.mark_log()
        # check if node3 is still in bootstrap mode
        session = self.exclusive_cql_connection(node3)
        rows = list(session.execute("SELECT bootstrapped FROM system.local WHERE key='local'"))
        assert len(rows) == 1
        assert rows[0][0] == 'IN_PROGRESS', rows[0][0]
        # bring back node1 and invoke nodetool bootstrap to resume bootstrapping
        node1.start(wait_other_notice=True)
        node3.nodetool('bootstrap resume')
        # check if we skipped already retrieved ranges
        node3.watch_log_for("already available. Skipping streaming.")
        node3.watch_log_for("Resume complete", from_mark=mark)
        rows = list(session.execute("SELECT bootstrapped FROM system.local WHERE key='local'"))
        assert rows[0][0] == 'COMPLETED', rows[0][0]
    def _wiped_node_cannot_join_test(self, gently):
        """
        @jira_ticket CASSANDRA-9765
        Test that if we stop a node and wipe its data then the node cannot join
        when it is not a seed. Test both a nice shutdown or a forced shutdown, via
        the gently parameter.
        """
        cluster = self.cluster
        cluster.populate(3)
        cluster.start(wait_for_binary_proto=True)

        version = cluster.version()
        stress_table = 'keyspace1.standard1' if self.cluster.version() >= '2.1' else '"Keyspace1"."Standard1"'

        # write some data
        node1 = cluster.nodelist()[0]
        if version < "2.1":
            node1.stress(['-n', '10000'])
        else:
            node1.stress(['write', 'n=10000', '-rate', 'threads=8'])

        session = self.patient_cql_connection(node1)
        original_rows = list(session.execute("SELECT * FROM {}".format(stress_table,)))

        # Add a new node, bootstrap=True ensures that it is not a seed
        node2 = new_node(cluster, bootstrap=True)
        node2.start(wait_for_binary_proto=True)

        session = self.patient_cql_connection(node2)
        self.assertEquals(original_rows, list(session.execute("SELECT * FROM {}".format(stress_table,))))

        # Stop the new node and wipe its data
        node2.stop(gently=gently)
        data_dir = os.path.join(node2.get_path(), 'data')
        commitlog_dir = os.path.join(node2.get_path(), 'commitlogs')
        debug("Deleting {}".format(data_dir))
        shutil.rmtree(data_dir)
        shutil.rmtree(commitlog_dir)

        # Now start it, it should not be allowed to join.
        mark = node2.mark_log()
        node2.start(no_wait=True)
        node2.watch_log_for("A node with address /127.0.0.4 already exists, cancelling join", from_mark=mark)
    def consistent_reads_after_bootstrap_test(self):
        debug("Creating a ring")
        cluster = self.cluster
        cluster.set_configuration_options(values={'hinted_handoff_enabled': False, 'write_request_timeout_in_ms': 60000,
                                          'read_request_timeout_in_ms': 60000, 'dynamic_snitch_badness_threshold': 0.0}, batch_commitlog=True)

        cluster.populate(2).start()
        node1, node2 = cluster.nodelist()
        cluster.start(wait_for_binary_proto=True, wait_other_notice=True)

        debug("Set to talk to node 2")
        n2session = self.patient_cql_connection(node2)
        self.create_ks(n2session, 'ks', 2)
        create_c1c2_table(self, n2session)

        debug("Generating some data for all nodes")
        for n in xrange(10, 20):
            insert_c1c2(n2session, n, ConsistencyLevel.ALL)

        node1.flush()
        debug("Taking down node1")
        node1.stop(wait_other_notice=True)

        debug("Writing data to only node2")
        for n in xrange(30, 1000):
            insert_c1c2(n2session, n, ConsistencyLevel.ONE)
        node2.flush()

        debug("Restart node1")
        node1.start(wait_other_notice=True)

        debug("Boostraping node3")
        node3 = new_node(cluster)
        node3.start(wait_for_binary_proto=True)

        n3session = self.patient_cql_connection(node3)
        n3session.execute("USE ks")
        debug("Checking that no data was lost")
        for n in xrange(10, 20):
            query_c1c2(n3session, n, ConsistencyLevel.ALL)

        for n in xrange(30, 1000):
            query_c1c2(n3session, n, ConsistencyLevel.ALL)
    def read_from_bootstrapped_node_test(self):
        """Test bootstrapped node sees existing data, eg. CASSANDRA-6648"""
        cluster = self.cluster
        cluster.populate(3)
        version = cluster.version()
        cluster.start()

        node1 = cluster.nodes['node1']
        if version < "2.1":
            node1.stress(['-n', '10000'])
        else:
            node1.stress(['write', 'n=10000', '-rate', 'threads=8'])

        node4 = new_node(cluster)
        node4.start()

        session = self.patient_cql_connection(node4)
        stress_table = 'keyspace1.standard1' if self.cluster.version() >= '2.1' else '"Keyspace1"."Standard1"'
        rows = session.execute('select * from %s limit 10' % stress_table)
        assert len(list(rows)) == 10
Beispiel #50
0
    def disk_balance_bootstrap_test(self):
        cluster = self.cluster
        if not DISABLE_VNODES:
            cluster.set_configuration_options(values={'num_tokens': 256})
        # apparently we have legitimate errors in the log when bootstrapping (see bootstrap_test.py)
        self.allow_log_errors = True
        cluster.set_configuration_options(
            values={'allocate_tokens_for_keyspace': 'keyspace1'})
        cluster.populate(4).start(wait_for_binary_proto=True)
        node1 = cluster.nodes['node1']

        node1.stress([
            'write', 'n=50k', '-rate', 'threads=100', '-schema',
            'replication(factor=3)',
            'compaction(strategy=SizeTieredCompactionStrategy,enabled=false)'
        ])
        cluster.flush()
        node5 = new_node(cluster)
        node5.start(wait_for_binary_proto=True)
        self.assert_balanced(node5)
    def failed_bootstrap_wiped_node_can_join_test(self):
        """
        @jira_ticket CASSANDRA-9765
        Test that if a node fails to bootstrap, it can join the cluster even if the data is wiped.
        """
        cluster = self.cluster
        cluster.populate(1)
        cluster.set_configuration_options(
            values={'stream_throughput_outbound_megabits_per_sec': 1})
        cluster.start(wait_for_binary_proto=True)

        stress_table = 'keyspace1.standard1'

        # write some data, enough for the bootstrap to fail later on
        node1 = cluster.nodelist()[0]
        node1.stress(['write', 'n=100K', 'no-warmup', '-rate', 'threads=8'])
        node1.flush()

        session = self.patient_cql_connection(node1)
        original_rows = list(
            session.execute("SELECT * FROM {}".format(stress_table, )))

        # Add a new node, bootstrap=True ensures that it is not a seed
        node2 = new_node(cluster, bootstrap=True)

        # kill node2 in the middle of bootstrap
        t = KillOnBootstrap(node2)
        t.start()

        node2.start()
        t.join()
        self.assertFalse(node2.is_running())

        # wipe any data for node2
        self._cleanup(node2)
        # Now start it again, it should be allowed to join
        mark = node2.mark_log()
        node2.start(wait_other_notice=True)
        node2.watch_log_for("JOINING:", from_mark=mark)
    def failed_bootstrap_wiped_node_can_join_test(self):
        """
        @jira_ticket CASSANDRA-9765
        Test that if a node fails to bootstrap, it can join the cluster even if the data is wiped.
        """
        cluster = self.cluster
        cluster.populate(1)
        cluster.set_configuration_options(values={'stream_throughput_outbound_megabits_per_sec': 1})
        cluster.start(wait_for_binary_proto=True)

        stress_table = 'keyspace1.standard1'

        # write some data, enough for the bootstrap to fail later on
        node1 = cluster.nodelist()[0]
        node1.stress(['write', 'n=100K', 'no-warmup', '-rate', 'threads=8'])
        node1.flush()

        session = self.patient_cql_connection(node1)
        original_rows = list(session.execute("SELECT * FROM {}".format(stress_table,)))

        # Add a new node, bootstrap=True ensures that it is not a seed
        node2 = new_node(cluster, bootstrap=True)

        # kill node2 in the middle of bootstrap
        t = KillOnBootstrap(node2)
        t.start()

        node2.start()
        t.join()
        self.assertFalse(node2.is_running())

        # wipe any data for node2
        self._cleanup(node2)
        # Now start it again, it should be allowed to join
        mark = node2.mark_log()
        node2.start(wait_other_notice=True)
        node2.watch_log_for("JOINING:", from_mark=mark)
    def manual_bootstrap_test(self):
        """Test adding a new node and bootstrappig it manually. No auto_bootstrap.
           This test also verify that all data are OK after the addition of the new node.
           eg. CASSANDRA-9022
        """
        cluster = self.cluster
        cluster.populate(2).start(wait_other_notice=True)
        (node1, node2) = cluster.nodelist()

        if cluster.version() < "2.1":
            node1.stress(['-o', 'insert', '-n', '1000', '-l', '2', '-t', '1'])
        else:
            node1.stress(['write', 'n=1000', '-schema', 'replication(factor=2)',
                          '-rate', 'threads=1', '-pop', 'dist=UNIFORM(1..1000)'])

        # Add a new node
        node3 = new_node(cluster, bootstrap=False)
        node3.start()
        node3.repair()
        node1.cleanup()

        # Verify the data
        with tempfile.TemporaryFile(mode='w+') as tmpfile:
            if cluster.version() < "2.1":
                node2.stress(['-o', 'read', '-n', '1000', '-e', 'ALL', '-t', '1'],
                             stdout=tmpfile, stderr=subprocess.STDOUT)
            else:
                node2.stress(['read', 'n=1000', 'cl=ALL', '-rate', 'threads=1',
                              '-pop', 'dist=UNIFORM(1..1000)'],
                             stdout=tmpfile, stderr=subprocess.STDOUT)

            tmpfile.seek(0)
            output = tmpfile.read()

        debug(output)
        failure = output.find("Data returned was not validated")
        self.assertEqual(failure, -1, "Stress failed to validate all data")
 def bootstrap(cluster, token):
     node2 = new_node(cluster)
     node2.set_configuration_options(values={'initial_token': token})
     node2.start(wait_for_binary_proto=True)
     return node2
    def decommission_test(self):
        cluster = self.cluster

        tokens = cluster.balanced_tokens(4)
        cluster.populate(4, tokens=tokens).start()
        node1, node2, node3, node4 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        self.create_ks(session, 'ks', 2)
        self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'})

        insert_c1c2(session, n=10000, consistency=ConsistencyLevel.QUORUM)

        cluster.flush()
        sizes = [node.data_size() for node in cluster.nodelist() if node.is_running()]
        init_size = sizes[0]
        assert_almost_equal(*sizes)

        time.sleep(.5)
        node4.decommission()
        node4.stop()
        cluster.cleanup()
        time.sleep(.5)

        # Check we can get all the keys
        for n in xrange(0, 10000):
            query_c1c2(session, n, ConsistencyLevel.QUORUM)

        sizes = [node.data_size() for node in cluster.nodelist() if node.is_running()]
        three_node_sizes = sizes
        assert_almost_equal(sizes[0], sizes[1])
        assert_almost_equal((2.0 / 3.0) * sizes[0], sizes[2])
        assert_almost_equal(sizes[2], init_size)

        if cluster.version() <= '1.2':
            node3.stop(wait_other_notice=True)
            node1.removeToken(tokens[2])
            time.sleep(.5)
            cluster.cleanup()
            time.sleep(.5)

            # Check we can get all the keys
            for n in xrange(0, 10000):
                query_c1c2(session, n, ConsistencyLevel.QUORUM)

            sizes = [node.data_size() for node in cluster.nodelist() if node.is_running()]
            assert_almost_equal(*sizes)
            assert_almost_equal(sizes[0], 2 * init_size)

            node5 = new_node(cluster, token=(tokens[2] + 1)).start()
            time.sleep(.5)
            cluster.cleanup()
            time.sleep(.5)
            cluster.compact()
            time.sleep(.5)

            # Check we can get all the keys
            for n in xrange(0, 10000):
                query_c1c2(session, n, ConsistencyLevel.QUORUM)

            sizes = [node.data_size() for node in cluster.nodelist() if node.is_running()]
            # We should be back to the earlir 3 nodes situation
            for i in xrange(0, len(sizes)):
                assert_almost_equal(sizes[i], three_node_sizes[i])
    def multi_dc_replace_with_rf1_test(self):
        """
        Test that multi-dc replace works when rf=1 on each dc
        """
        cluster = self.cluster
        cluster.populate([1, 1])
        cluster.start()
        node1, node2 = cluster.nodelist()

        node1 = cluster.nodes['node1']
        yaml_config = """
        # Create the keyspace and table
        keyspace: keyspace1
        keyspace_definition: |
          CREATE KEYSPACE keyspace1 WITH replication = {'class': 'NetworkTopologyStrategy', 'dc1': 1, 'dc2': 1};
        table: users
        table_definition:
          CREATE TABLE users (
            username text,
            first_name text,
            last_name text,
            email text,
            PRIMARY KEY(username)
          ) WITH compaction = {'class':'SizeTieredCompactionStrategy'};
        insert:
          partitions: fixed(1)
          batchtype: UNLOGGED
        queries:
          read:
            cql: select * from users where username = ?
            fields: samerow
        """
        with tempfile.NamedTemporaryFile(mode='w+') as stress_config:
            stress_config.write(yaml_config)
            stress_config.flush()
            node1.stress([
                'user', 'profile=' + stress_config.name, 'n=10k', 'no-warmup',
                'ops(insert=1)', '-rate', 'threads=50'
            ])

        session = self.patient_cql_connection(node1)

        # change system_auth keyspace to 2 (default is 1) to avoid
        # "Unable to find sufficient sources for streaming" warning
        if cluster.cassandra_version() >= '2.2.0':
            session.execute("""
                ALTER KEYSPACE system_auth
                    WITH replication = {'class':'SimpleStrategy', 'replication_factor':2};
            """)

        # Save initial data
        stress_table = 'keyspace1.users'
        query = SimpleStatement('select * from %s LIMIT 1' % stress_table,
                                consistency_level=ConsistencyLevel.TWO)
        initial_data = rows_to_list(session.execute(query))

        # stop node to replace
        debug("Stopping node to replace.")
        node2.stop(wait_other_notice=True)

        node3 = new_node(cluster, data_center='dc2')
        node3.start(replace_address='127.0.0.2', wait_for_binary_proto=True)

        assert_bootstrap_state(self, node3, 'COMPLETED')

        # Check that keyspace was replicated from dc1 to dc2
        self.assertFalse(
            node3.grep_log(
                "Unable to find sufficient sources for streaming range"))

        # query should work again with node1 stopped
        node1.stop(wait_other_notice=True)
        debug("Verifying data on new node.")
        session = self.patient_exclusive_cql_connection(node3)
        assert_all(session,
                   'SELECT * from {} LIMIT 1'.format(stress_table),
                   expected=initial_data,
                   cl=ConsistencyLevel.LOCAL_ONE)
    def resumable_bootstrap_test(self):
        """
        Test resuming bootstrap after data streaming failure
        """

        cluster = self.cluster
        cluster.populate(2).start(wait_other_notice=True)

        node1 = cluster.nodes['node1']
        node1.stress([
            'write', 'n=100K', 'cl=TWO', '-schema', 'replication(factor=2)',
            '-rate', 'threads=50'
        ])
        cluster.flush()

        # kill node1 in the middle of streaming to let it fail
        t = InterruptBootstrap(node1)
        t.start()

        # start bootstrapping node3 and wait for streaming
        node3 = new_node(cluster)
        node3.set_configuration_options(
            values={'stream_throughput_outbound_megabits_per_sec': 1})
        # keep timeout low so that test won't hang
        node3.set_configuration_options(
            values={'streaming_socket_timeout_in_ms': 1000})
        try:
            node3.start(wait_other_notice=False)
        except NodeError:
            pass  # node doesn't start as expected
        t.join()

        # wait for node3 ready to query
        node3.watch_log_for("Starting listening for CQL clients")
        mark = node3.mark_log()
        # check if node3 is still in bootstrap mode
        session = self.patient_exclusive_cql_connection(node3)
        rows = list(
            session.execute(
                "SELECT bootstrapped FROM system.local WHERE key='local'"))
        self.assertEqual(len(rows), 1)
        self.assertEqual(rows[0][0], 'IN_PROGRESS')
        # bring back node1 and invoke nodetool bootstrap to resume bootstrapping
        node1.start(wait_other_notice=True)
        node3.nodetool('bootstrap resume')

        node3.watch_log_for("Resume complete", from_mark=mark)
        rows = list(
            session.execute(
                "SELECT bootstrapped FROM system.local WHERE key='local'"))
        self.assertEqual(rows[0][0], 'COMPLETED')

        # cleanup to guarantee each node will only have sstables of its ranges
        cluster.cleanup()

        debug("Check data is present")
        # Let's check stream bootstrap completely transferred data
        stdout, stderr = node3.stress([
            'read', 'n=100k', "no-warmup", '-schema', 'replication(factor=2)',
            '-rate', 'threads=8'
        ],
                                      capture_output=True)

        if stdout and "FAILURE" in stdout:
            debug(stdout)
            assert False, "Cannot read inserted data after bootstrap"