def test_simultaneous_bootstrap(self): """ Attempt to bootstrap two nodes at once, to assert the second bootstrapped node fails, and does not interfere. Start a one node cluster and run a stress write workload. Start up a second node, and wait for the first node to detect it has joined the cluster. While the second node is bootstrapping, start a third node. This should fail. @jira_ticket CASSANDRA-7069 @jira_ticket CASSANDRA-9484 """ bootstrap_error = "Other bootstrapping/leaving/moving nodes detected," \ " cannot bootstrap while cassandra.consistent.rangemovement is true" cluster = self.cluster cluster.set_environment_variable( 'CASSANDRA_TOKEN_PREGENERATION_DISABLED', 'True') cluster.populate(1) cluster.start() node1, = cluster.nodelist() node1.stress([ 'write', 'n=500K', 'no-warmup', '-schema', 'replication(factor=1)', '-rate', 'threads=10' ]) node2 = new_node(cluster) node2.start() for _ in range(30): # wait until node2 shows up ntout = node1.nodetool('status').stdout if re.search(r'UJ\s+' + node2.ip_addr, ntout): break time.sleep(0.1) node3 = new_node(cluster, remote_debug_port='2003') try: node3.start(wait_other_notice=False, verbose=False) except NodeError: pass # node doesn't start as expected time.sleep(.5) node2.watch_log_for("Starting listening for CQL clients") node3.watch_log_for(bootstrap_error) session = self.patient_exclusive_cql_connection(node2) # Repeat the select count(*) query, to help catch # bugs like 9484, where count(*) fails at higher # data loads. for _ in range(5): assert_one(session, "SELECT count(*) from keyspace1.standard1", [500000], cl=ConsistencyLevel.ONE)
def simultaneous_bootstrap_test(self): """ Attempt to bootstrap two nodes at once, to assert the second bootstrapped node fails, and does not interfere. Start a one node cluster and run a stress write workload. Start up a second node, and wait for the first node to detect it has joined the cluster. While the second node is bootstrapping, start a third node. This should fail. @jira_ticket CASSANDRA-7069 @jira_ticket CASSANDRA-9484 """ bootstrap_error = ( "Other bootstrapping/leaving/moving nodes detected," " cannot bootstrap while cassandra.consistent.rangemovement is true" ) self.ignore_log_patterns.append(bootstrap_error) cluster = self.cluster cluster.populate(1) cluster.start(wait_for_binary_proto=True) node1, = cluster.nodelist() node1.stress([ 'write', 'n=500K', 'no-warmup', '-schema', 'replication(factor=1)', '-rate', 'threads=10' ]) node2 = new_node(cluster) node2.start(wait_other_notice=True) node3 = new_node(cluster, remote_debug_port='2003') process = node3.start(wait_other_notice=False) stdout, stderr = process.communicate() self.assertIn(bootstrap_error, stderr, msg=stderr) time.sleep(.5) self.assertFalse(node3.is_running(), msg="Two nodes bootstrapped simultaneously") node2.watch_log_for("Starting listening for CQL clients") session = self.patient_exclusive_cql_connection(node2) # Repeat the select count(*) query, to help catch # bugs like 9484, where count(*) fails at higher # data loads. for _ in xrange(5): assert_one(session, "SELECT count(*) from keyspace1.standard1", [500000], cl=ConsistencyLevel.ONE)
def test_bootstrap_waits_for_streaming_to_finish(self): """ Test that bootstrap completes and is marked as such after streaming finishes. """ cluster = self.cluster cluster.set_environment_variable('CASSANDRA_TOKEN_PREGENERATION_DISABLED', 'True') logger.debug("Create a cluster") cluster.populate(1) node1 = cluster.nodelist()[0] logger.debug("Start node 1") node1.start(wait_for_binary_proto=True) logger.debug("Insert 10k rows") node1.stress(['write', 'n=10K', 'no-warmup', '-rate', 'threads=8', '-schema', 'replication(factor=2)']) logger.debug("Bootstrap node 2 with delay") node2 = new_node(cluster, byteman_port='4200') node2.update_startup_byteman_script('./byteman/bootstrap_5s_sleep.btm') node2.start(wait_for_binary_proto=True) assert_bootstrap_state(self, node2, 'COMPLETED') assert node2.grep_log('Bootstrap completed', filename='debug.log')
def decommissioned_wiped_node_can_gossip_to_single_seed_test(self): """ @jira_ticket CASSANDRA-8072 @jira_ticket CASSANDRA-8422 Test that if we decommission a node, kill it and wipe its data, it can join a cluster with a single seed node. """ cluster = self.cluster cluster.populate(1) cluster.start(wait_for_binary_proto=True) # Add a new node, bootstrap=True ensures that it is not a seed node2 = new_node(cluster, bootstrap=True) node2.start(wait_for_binary_proto=True, wait_other_notice=True) # Decommision the new node and kill it debug("Decommissioning & stopping node2") node2.decommission() node2.stop(wait_other_notice=False) # Wipe its data for data_dir in node2.data_directories(): debug("Deleting {}".format(data_dir)) shutil.rmtree(data_dir) commitlog_dir = os.path.join(node2.get_path(), 'commitlogs') debug("Deleting {}".format(commitlog_dir)) shutil.rmtree(commitlog_dir) # Now start it, it should be allowed to join mark = node2.mark_log() debug("Restarting wiped node2") node2.start(wait_other_notice=False) node2.watch_log_for("JOINING:", from_mark=mark)
def _do_upgrade(self, login_keyspace=True): cluster = self.cluster node1 = cluster.nodelist()[0] node1.flush() time.sleep(.5) node1.stop(wait_other_notice=True) node1.set_install_dir(install_dir=self.default_install_dir) node1.start(wait_other_notice=True, wait_for_binary_proto=True) if self.bootstrap: cluster.set_install_dir(install_dir=self.default_install_dir) # Add a new node, bootstrap=True ensures that it is not a seed node2 = new_node(cluster, bootstrap=True) node2.start(wait_for_binary_proto=True, jvm_args=self.jvm_args) temp_files = self.glob_data_dirs(os.path.join('*', "tmp", "*.dat")) debug("temp files: " + str(temp_files)) self.assertEquals(0, len(temp_files), "Temporary files were not cleaned up.") cursor = self.patient_cql_connection(node1) if login_keyspace: cursor.execute('USE ks') return cursor
def test_cleanup(self): """ @jira_ticket CASSANDRA-11179 Make sure we remove processed files during cleanup """ cluster = self.cluster cluster.set_configuration_options(values={'concurrent_compactors': 4}) cluster.populate(1) cluster.start(wait_for_binary_proto=True) node1, = cluster.nodelist() for x in range(0, 5): node1.stress([ 'write', 'n=100k', 'no-warmup', '-schema', 'compaction(strategy=SizeTieredCompactionStrategy,enabled=false)', 'replication(factor=1)', '-rate', 'threads=10' ]) node1.flush() node2 = new_node(cluster) node2.start(wait_for_binary_proto=True, wait_other_notice=True) event = threading.Event() failed = threading.Event() jobs = 1 thread = threading.Thread( target=self._monitor_datadir, args=(node1, event, len(node1.get_sstables("keyspace1", "standard1")), jobs, failed)) thread.setDaemon(True) thread.start() node1.nodetool("cleanup -j {} keyspace1 standard1".format(jobs)) event.set() thread.join() assert not failed.is_set()
def _wiped_node_cannot_join_test(self, gently): """ @jira_ticket CASSANDRA-9765 Test that if we stop a node and wipe its data then the node cannot join when it is not a seed. Test both a nice shutdown or a forced shutdown, via the gently parameter. """ cluster = self.cluster cluster.populate(3) cluster.start(wait_for_binary_proto=True) stress_table = 'keyspace1.standard1' # write some data node1 = cluster.nodelist()[0] node1.stress(['write', 'n=10K', 'no-warmup', '-rate', 'threads=8']) session = self.patient_cql_connection(node1) original_rows = list(session.execute("SELECT * FROM {}".format(stress_table,))) # Add a new node, bootstrap=True ensures that it is not a seed node4 = new_node(cluster, bootstrap=True) node4.start(wait_for_binary_proto=True) session = self.patient_cql_connection(node4) assert original_rows == list(session.execute("SELECT * FROM {}".format(stress_table,))) # Stop the new node and wipe its data node4.stop(gently=gently) self._cleanup(node4) # Now start it, it should not be allowed to join. mark = node4.mark_log() node4.start(no_wait=True, wait_other_notice=False) node4.watch_log_for("A node with address {} already exists, cancelling join".format(node4.address_for_current_version_slashy()), from_mark=mark)
def bootstrap_test(self): """ Test repaired data remains in sync after a bootstrap """ cluster = self.cluster cluster.set_configuration_options(values={'hinted_handoff_enabled': False, 'commitlog_sync_period_in_ms': 500}) cluster.populate(3).start() node1, node2, node3 = cluster.nodelist() session = self.patient_exclusive_cql_connection(node3) session.execute("CREATE KEYSPACE ks WITH REPLICATION={'class':'SimpleStrategy', 'replication_factor': 2}") session.execute("CREATE TABLE ks.tbl (k INT PRIMARY KEY, v INT)") # insert some data stmt = SimpleStatement("INSERT INTO ks.tbl (k,v) VALUES (%s, %s)") for i in range(1000): session.execute(stmt, (i, i)) node1.repair(options=['ks']) for i in range(1000): v = i + 1000 session.execute(stmt, (v, v)) # everything should be in sync for node in [node1, node2, node3]: result = node.repair(options=['ks', '--validate']) self.assertIn("Repaired data is in sync", result.stdout) node4 = new_node(self.cluster) node4.start(wait_for_binary_proto=True) self.assertEqual(len(self.cluster.nodelist()), 4) # everything should still be in sync for node in self.cluster.nodelist(): result = node.repair(options=['ks', '--validate']) self.assertIn("Repaired data is in sync", result.stdout)
def _do_upgrade(self, login_keyspace=True): cluster = self.cluster node1 = cluster.nodelist()[0] node1.flush() time.sleep(.5) node1.stop(wait_other_notice=True) node1.set_install_dir(install_dir=self.fixture_dtest_setup.default_install_dir) node1.start(wait_other_notice=True, wait_for_binary_proto=True) if self.fixture_dtest_setup.bootstrap: cluster.set_install_dir(install_dir=self.fixture_dtest_setup.default_install_dir) # Add a new node, bootstrap=True ensures that it is not a seed node2 = new_node(cluster, bootstrap=True) node2.start(wait_for_binary_proto=True, jvm_args=self.fixture_dtest_setup.jvm_args) temp_files = self.glob_data_dirs(os.path.join('*', "tmp", "*.dat")) logger.debug("temp files: " + str(temp_files)) assert 0 == len(temp_files), "Temporary files were not cleaned up." cursor = self.patient_cql_connection(node1) if login_keyspace: cursor.execute('USE ks') return cursor
def test_manual_bootstrap(self): """ Test adding a new node and bootstrapping it manually. No auto_bootstrap. This test also verify that all data are OK after the addition of the new node. @jira_ticket CASSANDRA-9022 """ cluster = self.cluster cluster.populate(2).start(wait_other_notice=True) (node1, node2) = cluster.nodelist() node1.stress([ 'write', 'n=1K', 'no-warmup', '-schema', 'replication(factor=2)', '-rate', 'threads=1', '-pop', 'dist=UNIFORM(1..1000)' ]) session = self.patient_exclusive_cql_connection(node2) stress_table = 'keyspace1.standard1' original_rows = list(session.execute("SELECT * FROM %s" % stress_table)) # Add a new node node3 = new_node(cluster, bootstrap=False) node3.start(wait_for_binary_proto=True) node3.repair() node1.cleanup() current_rows = list(session.execute("SELECT * FROM %s" % stress_table)) assert original_rows == current_rows
def test_manual_bootstrap(self): """ Test adding a new node and bootstrapping it manually. No auto_bootstrap. This test also verify that all data are OK after the addition of the new node. @jira_ticket CASSANDRA-9022 """ cluster = self.cluster cluster.populate(2).start(wait_other_notice=True) (node1, node2) = cluster.nodelist() node1.stress(['write', 'n=1K', 'no-warmup', '-schema', 'replication(factor=2)', '-rate', 'threads=1', '-pop', 'dist=UNIFORM(1..1000)']) session = self.patient_exclusive_cql_connection(node2) stress_table = 'keyspace1.standard1' original_rows = list(session.execute("SELECT * FROM %s" % stress_table)) # Add a new node node3 = new_node(cluster, bootstrap=False) node3.start(wait_for_binary_proto=True) node3.repair() node1.cleanup() current_rows = list(session.execute("SELECT * FROM %s" % stress_table)) assert original_rows == current_rows
def test_bootstrap_with_reset_bootstrap_state(self): """Test bootstrap with resetting bootstrap progress""" cluster = self.cluster cluster.set_configuration_options(values={'stream_throughput_outbound_megabits_per_sec': 1}) cluster.populate(2).start(wait_other_notice=True) node1 = cluster.nodes['node1'] node1.stress(['write', 'n=100K', '-schema', 'replication(factor=2)']) node1.flush() # kill node1 in the middle of streaming to let it fail t = InterruptBootstrap(node1) t.start() # start bootstrapping node3 and wait for streaming node3 = new_node(cluster) try: node3.start() except NodeError: pass # node doesn't start as expected t.join() node1.start() # restart node3 bootstrap with resetting bootstrap progress node3.stop(signal_event=signal.SIGKILL) mark = node3.mark_log() node3.start(jvm_args=["-Dcassandra.reset_bootstrap_progress=true"]) # check if we reset bootstrap state node3.watch_log_for("Resetting bootstrap progress to start fresh", from_mark=mark) # wait for node3 ready to query node3.wait_for_binary_interface(from_mark=mark) # check if 2nd bootstrap succeeded assert_bootstrap_state(self, node3, 'COMPLETED')
def test_decommissioned_wiped_node_can_join(self): """ @jira_ticket CASSANDRA-9765 Test that if we decommission a node and then wipe its data, it can join the cluster. """ cluster = self.cluster cluster.populate(3) cluster.start(wait_for_binary_proto=True) stress_table = 'keyspace1.standard1' # write some data node1 = cluster.nodelist()[0] node1.stress(['write', 'n=10K', 'no-warmup', '-rate', 'threads=8']) session = self.patient_cql_connection(node1) original_rows = list( session.execute("SELECT * FROM {}".format(stress_table, ))) # Add a new node, bootstrap=True ensures that it is not a seed node4 = new_node(cluster, bootstrap=True) node4.start(wait_for_binary_proto=True, wait_other_notice=True) session = self.patient_cql_connection(node4) assert original_rows == list( session.execute("SELECT * FROM {}".format(stress_table, ))) # Decommission the new node and wipe its data node4.decommission() node4.stop() self._cleanup(node4) # Now start it, it should be allowed to join mark = node4.mark_log() node4.start(wait_other_notice=True) node4.watch_log_for("JOINING:", from_mark=mark)
def test_read_from_bootstrapped_node(self): """ Test bootstrapped node sees existing data @jira_ticket CASSANDRA-6648 """ cluster = self.cluster cluster.populate(3) cluster.start() node1 = cluster.nodes['node1'] node1.stress([ 'write', 'n=10K', 'no-warmup', '-rate', 'threads=8', '-schema', 'replication(factor=2)' ]) session = self.patient_cql_connection(node1) stress_table = 'keyspace1.standard1' original_rows = list( session.execute("SELECT * FROM %s" % (stress_table, ))) node4 = new_node(cluster) node4.start(wait_for_binary_proto=True) session = self.patient_exclusive_cql_connection(node4) new_rows = list(session.execute("SELECT * FROM %s" % (stress_table, ))) assert original_rows == new_rows
def test_local_quorum_bootstrap(self): """ Test that CL local_quorum works while a node is bootstrapping. @jira_ticket CASSANDRA-8058 """ cluster = self.cluster cluster.populate([1, 1]) cluster.start() node1 = cluster.nodes['node1'] yaml_config = """ # Create the keyspace and table keyspace: keyspace1 keyspace_definition: | CREATE KEYSPACE keyspace1 WITH replication = {'class': 'NetworkTopologyStrategy', 'dc1': 1, 'dc2': 1}; table: users table_definition: CREATE TABLE users ( username text, first_name text, last_name text, email text, PRIMARY KEY(username) ) WITH compaction = {'class':'SizeTieredCompactionStrategy'}; insert: partitions: fixed(1) batchtype: UNLOGGED queries: read: cql: select * from users where username = ? fields: samerow """ with tempfile.NamedTemporaryFile(mode='w+') as stress_config: stress_config.write(yaml_config) stress_config.flush() node1.stress([ 'user', 'profile=' + stress_config.name, 'n=200K', 'no-warmup', 'ops(insert=1)', '-rate', 'threads=10' ]) node3 = new_node(cluster, data_center='dc2') node3.start(jvm_args=["-Dcassandra.write_survey=true"], no_wait=True) time.sleep(5) ntout = node1.nodetool('status').stdout assert re.search(r'UJ\s+' + node3.ip_addr, ntout), ntout out, err, _ = node1.stress([ 'user', 'profile=' + stress_config.name, 'ops(insert=1)', 'n=10k', 'no-warmup', 'cl=LOCAL_QUORUM', '-rate', 'threads=10', '-errors', 'retries=2' ]) ntout = node1.nodetool('status').stdout assert re.search(r'UJ\s+' + node3.ip_addr, ntout), ntout logger.debug(out) assert_stderr_clean(err) regex = re.compile("Operation.+error inserting key.+Exception") failure = regex.search(str(out)) assert failure is None, "Error during stress while bootstrapping"
def test_decommissioned_wiped_node_can_join(self): """ @jira_ticket CASSANDRA-9765 Test that if we decommission a node and then wipe its data, it can join the cluster. """ cluster = self.cluster cluster.populate(3) cluster.start(wait_for_binary_proto=True) stress_table = 'keyspace1.standard1' # write some data node1 = cluster.nodelist()[0] node1.stress(['write', 'n=10K', 'no-warmup', '-rate', 'threads=8']) session = self.patient_cql_connection(node1) original_rows = list(session.execute("SELECT * FROM {}".format(stress_table,))) # Add a new node, bootstrap=True ensures that it is not a seed node4 = new_node(cluster, bootstrap=True) node4.start(wait_for_binary_proto=True, wait_other_notice=True) session = self.patient_cql_connection(node4) assert original_rows == list(session.execute("SELECT * FROM {}".format(stress_table,))) # Decommission the new node and wipe its data node4.decommission() node4.stop() self._cleanup(node4) # Now start it, it should be allowed to join mark = node4.mark_log() node4.start(wait_other_notice=True) node4.watch_log_for("JOINING:", from_mark=mark)
def test_cleanup(self): """ @jira_ticket CASSANDRA-11179 Make sure we remove processed files during cleanup """ cluster = self.cluster cluster.set_configuration_options(values={'concurrent_compactors': 4}) cluster.populate(1) cluster.start(wait_for_binary_proto=True) node1, = cluster.nodelist() for x in range(0, 5): node1.stress(['write', 'n=100k', 'no-warmup', '-schema', 'compaction(strategy=SizeTieredCompactionStrategy,enabled=false)', 'replication(factor=1)', '-rate', 'threads=10']) node1.flush() node2 = new_node(cluster) node2.start(wait_for_binary_proto=True, wait_other_notice=True) event = threading.Event() failed = threading.Event() jobs = 1 thread = threading.Thread(target=self._monitor_datadir, args=(node1, event, len(node1.get_sstables("keyspace1", "standard1")), jobs, failed)) thread.setDaemon(True) thread.start() node1.nodetool("cleanup -j {} keyspace1 standard1".format(jobs)) event.set() thread.join() assert not failed.is_set()
def _bootstrap_test_with_replica_down(self, consistent_range_movement, rf=2): """ Test to check consistent bootstrap will not succeed when there are insufficient replicas @jira_ticket CASSANDRA-11848 """ cluster = self.cluster cluster.set_environment_variable('CASSANDRA_TOKEN_PREGENERATION_DISABLED', 'True') cluster.populate(2) node1, node2 = cluster.nodelist() node3_token = None # Make token assignment deterministic if not self.dtest_config.use_vnodes: cluster.set_configuration_options(values={'num_tokens': 1}) tokens = cluster.balanced_tokens(3) logger.debug("non-vnode tokens: %r" % (tokens,)) node1.set_configuration_options(values={'initial_token': tokens[0]}) node2.set_configuration_options(values={'initial_token': tokens[2]}) node3_token = tokens[1] # Add node 3 between node1 and node2 cluster.start() node1.stress(['write', 'n=10K', 'no-warmup', '-rate', 'threads=8', '-schema', 'replication(factor={})'.format(rf)]) # change system_auth keyspace to 2 (default is 1) to avoid # "Unable to find sufficient sources for streaming" warning if cluster.cassandra_version() >= '2.2.0': session = self.patient_cql_connection(node1) session.execute(""" ALTER KEYSPACE system_auth WITH replication = {'class':'SimpleStrategy', 'replication_factor':2}; """) # Stop node2, so node3 will not be able to perform consistent range movement node2.stop(wait_other_notice=True) successful_bootstrap_expected = not consistent_range_movement node3 = new_node(cluster, token=node3_token) node3.start(wait_for_binary_proto=successful_bootstrap_expected, wait_other_notice=successful_bootstrap_expected, jvm_args=["-Dcassandra.consistent.rangemovement={}".format(consistent_range_movement)]) if successful_bootstrap_expected: # with rf=1 and cassandra.consistent.rangemovement=false, missing sources are ignored if not consistent_range_movement and rf == 1: node3.watch_log_for("Unable to find sufficient sources for streaming range") assert node3.is_running() assert_bootstrap_state(self, node3, 'COMPLETED') else: if consistent_range_movement: if cluster.version() < '4.0': node3.watch_log_for("A node required to move the data consistently is down") else: node3.watch_log_for("Necessary replicas for strict consistency were removed by source filters") else: node3.watch_log_for("Unable to find sufficient sources for streaming range") assert_not_running(node3)
def _disk_balance_after_boundary_change_test(self, lcs): """ @jira_ticket CASSANDRA-13948 - Creates a 1 node cluster with 5 disks and insert data with compaction disabled - Bootstrap a node2 to make disk boundary changes on node1 - Enable compaction on node1 and check disks are balanced - Decommission node1 to make disk boundary changes on node2 - Enable compaction on node2 and check disks are balanced """ cluster = self.cluster if self.dtest_config.use_vnodes: cluster.set_configuration_options(values={'num_tokens': 1024}) num_disks = 5 cluster.set_datadir_count(num_disks) cluster.set_configuration_options(values={'concurrent_compactors': num_disks}) logger.debug("Starting node1 with {} data dirs and concurrent_compactors".format(num_disks)) cluster.populate(1).start(wait_for_binary_proto=True) [node1] = cluster.nodelist() session = self.patient_cql_connection(node1) # reduce system_distributed RF to 1 so we don't require forceful decommission session.execute("ALTER KEYSPACE system_distributed WITH REPLICATION = {'class':'SimpleStrategy', 'replication_factor':'1'};") session.execute("ALTER KEYSPACE system_traces WITH REPLICATION = {'class':'SimpleStrategy', 'replication_factor':'1'};") num_flushes = 10 keys_per_flush = 10000 keys_to_write = num_flushes * keys_per_flush compaction_opts = "LeveledCompactionStrategy,sstable_size_in_mb=1" if lcs else "SizeTieredCompactionStrategy" logger.debug("Writing {} keys in {} flushes (compaction_opts={})".format(keys_to_write, num_flushes, compaction_opts)) total_keys = num_flushes * keys_per_flush current_keys = 0 while current_keys < total_keys: start_key = current_keys + 1 end_key = current_keys + keys_per_flush logger.debug("Writing keys {}..{} and flushing".format(start_key, end_key)) node1.stress(['write', 'n={}'.format(keys_per_flush), "no-warmup", "cl=ALL", "-pop", "seq={}..{}".format(start_key, end_key), "-rate", "threads=1", "-schema", "replication(factor=1)", "compaction(strategy={},enabled=false)".format(compaction_opts)]) node1.nodetool('flush keyspace1 standard1') current_keys = end_key # Add a new node, so disk boundaries will change logger.debug("Bootstrap node2 and flush") node2 = new_node(cluster, bootstrap=True) node2.start(wait_for_binary_proto=True, jvm_args=["-Dcassandra.migration_task_wait_in_seconds=10"], set_migration_task=False) node2.flush() self._assert_balanced_after_boundary_change(node1, total_keys, lcs) logger.debug("Decommissioning node1") node1.decommission() node1.stop() self._assert_balanced_after_boundary_change(node2, total_keys, lcs)
def test_simultaneous_bootstrap(self): """ Attempt to bootstrap two nodes at once, to assert the second bootstrapped node fails, and does not interfere. Start a one node cluster and run a stress write workload. Start up a second node, and wait for the first node to detect it has joined the cluster. While the second node is bootstrapping, start a third node. This should fail. @jira_ticket CASSANDRA-7069 @jira_ticket CASSANDRA-9484 """ bootstrap_error = "Other bootstrapping/leaving/moving nodes detected," \ " cannot bootstrap while cassandra.consistent.rangemovement is true" cluster = self.cluster cluster.populate(1) cluster.start(wait_for_binary_proto=True) node1, = cluster.nodelist() node1.stress(['write', 'n=500K', 'no-warmup', '-schema', 'replication(factor=1)', '-rate', 'threads=10']) node2 = new_node(cluster) node2.start(wait_other_notice=True) node3 = new_node(cluster, remote_debug_port='2003') try: node3.start(wait_other_notice=False, verbose=False) except NodeError: pass # node doesn't start as expected time.sleep(.5) node2.watch_log_for("Starting listening for CQL clients") node3.watch_log_for(bootstrap_error) session = self.patient_exclusive_cql_connection(node2) # Repeat the select count(*) query, to help catch # bugs like 9484, where count(*) fails at higher # data loads. for _ in range(5): assert_one(session, "SELECT count(*) from keyspace1.standard1", [500000], cl=ConsistencyLevel.ONE)
def bootstrap_with_compatibility_flag_on(cluster, token): node2 = new_node(cluster) node2.set_configuration_options(values={'initial_token': token}) # cassandra.force_3_0_protocol_version parameter is needed to allow schema # changes during the bootstrapping for upgrades from 3.0.14+ to anything upwards for 3.0.x or 3.x clusters. # @jira_ticket CASSANDRA-13004 for detailed context on `cassandra.force_3_0_protocol_version` flag node2.start(jvm_args=["-Dcassandra.force_3_0_protocol_version=true"], wait_for_binary_proto=True) return node2
def test_remove(self): """Test a mix of ring change operations across a mix of transient and repaired/unrepaired data""" node4 = new_node(self.cluster, bootstrap=True, token='00040') patch_start(node4) node4.start(wait_for_binary_proto=True, wait_other_notice=True) main_session = self.patient_cql_connection(self.node1) nodes = [self.node1, self.node2, self.node3] #We want the node being removed to have no data on it so nodetool remove always gets all the necessary data #from survivors node4_id = node4.nodetool('info').stdout[25:61] node4.stop(wait_other_notice=True) for i in range(0, 40): print("Inserting " + str(i)) self.insert_row(i, i, i, main_session) sessions = [ self.exclusive_cql_connection(node) for node in [self.node1, self.node2, self.node3] ] expected = [ gen_expected(range(0, 11), range(21, 40)), gen_expected(range(0, 21), range(31, 40)), gen_expected(range(11, 31)) ] # Every node should some of its fully replicated data and one and two should have some transient data self.check_expected(sessions, expected) nodes[0].nodetool('removenode ' + node4_id) #Give streaming time to occur, it's asynchronous from removenode completing at other ndoes import time time.sleep(15) # Everyone should have everything except expected = [ gen_expected(range(0, 40)), gen_expected(range(0, 40)), gen_expected(range(0, 40)) ] self.check_replication(sessions, exactly=3) self.check_expected(sessions, expected) repair_nodes(nodes) cleanup_nodes(nodes) self.check_replication(sessions, exactly=2) expected = [ gen_expected(range(0, 11), range(21, 40)), gen_expected(range(0, 21), range(31, 40)), gen_expected(range(11, 31)) ] self.check_expected(sessions, expected)
def _bootstrap_new_node(self): # Check we can bootstrap a new node on the upgraded cluster: logger.debug("Adding a node to the cluster") nnode = new_node(self.cluster, remote_debug_port=str(2000 + len(self.cluster.nodes))) nnode.start(use_jna=True, wait_other_notice=240, wait_for_binary_proto=True) self._write_values() self._increment_counters() self._check_values() self._check_counters()
def _bootstrap_new_node(self): # Check we can bootstrap a new node on the upgraded cluster: debug("Adding a node to the cluster") nnode = new_node(self.cluster, remote_debug_port=str(2000 + len(self.cluster.nodes))) nnode.start(use_jna=True, wait_other_notice=240, wait_for_binary_proto=True) self._write_values() self._increment_counters() self._check_values() self._check_counters()
def test_decommission(self): """Test decommissioning a node correctly streams out all the data""" node4 = new_node(self.cluster, bootstrap=True, token='00040') patch_start(node4) node4.start(wait_for_binary_proto=True, wait_other_notice=True) main_session = self.patient_cql_connection(self.node1) nodes = [self.node1, self.node2, self.node3, node4] for i in range(0, 40, 2): print("Inserting " + str(i)) self.insert_row(i, i, i, main_session) # Make sure at least a little data is repaired repair_nodes(nodes) # Ensure that there is at least some transient data around, because of this if it's missing after bootstrap # We know we failed to get it from the transient replica losing the range entirely nodes[1].stop(wait_other_notice=True) for i in range(1, 40, 2): print("Inserting " + str(i)) self.insert_row(i, i, i, main_session) nodes[1].start(wait_for_binary_proto=True, wait_other_notice=True) sessions = [self.exclusive_cql_connection(node) for node in [self.node1, self.node2, self.node3, node4]] expected = [gen_expected(range(0, 11), range(31, 40)), gen_expected(range(0, 21, 2)), gen_expected(range(1, 11, 2), range(11, 31)), gen_expected(range(11, 20, 2), range(21, 40))] self.check_expected(sessions, expected) #node1 has transient data we want to see streamed out on move nodes[3].nodetool('decommission') nodes = nodes[:-1] sessions = sessions[:-1] expected = [gen_expected(range(0, 11), range(11, 21, 2), range(21, 40)), gen_expected(range(0, 21, 2), range(21, 30, 2), range(31, 40)), gen_expected(range(1, 11, 2), range(11, 31), range(31, 40, 2))] cleanup_nodes(nodes) self.check_replication(sessions, gte=2, lte=3) self.check_expected(sessions, expected) repair_nodes(nodes) #There should be no transient data anywhere expected = [gen_expected(range(0, 11), range(21, 40)), gen_expected(range(0, 21), range(31, 40)), gen_expected(range(11, 31))] self.check_expected(sessions, expected, nodes, cleanup=True) self.check_replication(sessions, exactly=2)
def _bootstrap_test_with_replica_down(self, consistent_range_movement, rf=2): """ Test to check consistent bootstrap will not succeed when there are insufficient replicas @jira_ticket CASSANDRA-11848 """ cluster = self.cluster cluster.populate(2) node1, node2 = cluster.nodelist() node3_token = None # Make token assignment deterministic if not self.dtest_config.use_vnodes: cluster.set_configuration_options(values={'num_tokens': 1}) tokens = cluster.balanced_tokens(3) logger.debug("non-vnode tokens: %r" % (tokens,)) node1.set_configuration_options(values={'initial_token': tokens[0]}) node2.set_configuration_options(values={'initial_token': tokens[2]}) node3_token = tokens[1] # Add node 3 between node1 and node2 cluster.start() node1.stress(['write', 'n=10K', 'no-warmup', '-rate', 'threads=8', '-schema', 'replication(factor={})'.format(rf)]) # change system_auth keyspace to 2 (default is 1) to avoid # "Unable to find sufficient sources for streaming" warning if cluster.cassandra_version() >= '2.2.0': session = self.patient_cql_connection(node1) session.execute(""" ALTER KEYSPACE system_auth WITH replication = {'class':'SimpleStrategy', 'replication_factor':2}; """) # Stop node2, so node3 will not be able to perform consistent range movement node2.stop(wait_other_notice=True) successful_bootstrap_expected = not consistent_range_movement node3 = new_node(cluster, token=node3_token) node3.start(wait_for_binary_proto=successful_bootstrap_expected, wait_other_notice=successful_bootstrap_expected, jvm_args=["-Dcassandra.consistent.rangemovement={}".format(consistent_range_movement)]) if successful_bootstrap_expected: # with rf=1 and cassandra.consistent.rangemovement=false, missing sources are ignored if not consistent_range_movement and rf == 1: node3.watch_log_for("Unable to find sufficient sources for streaming range") assert node3.is_running() assert_bootstrap_state(self, node3, 'COMPLETED') else: if consistent_range_movement: if cluster.version() < '4.0': node3.watch_log_for("A node required to move the data consistently is down") else: node3.watch_log_for("Necessary replicas for strict consistency were removed by source filters") else: node3.watch_log_for("Unable to find sufficient sources for streaming range") assert_not_running(node3)
def test_resumable_bootstrap(self): """ Test resuming bootstrap after data streaming failure """ cluster = self.cluster cluster.populate(2) node1 = cluster.nodes['node1'] # set up byteman node1.byteman_port = '8100' node1.import_config_files() cluster.start(wait_other_notice=True) # kill stream to node3 in the middle of streaming to let it fail if cluster.version() < '4.0': node1.byteman_submit(['./byteman/pre4.0/stream_failure.btm']) else: node1.byteman_submit(['./byteman/4.0/stream_failure.btm']) node1.stress([ 'write', 'n=1K', 'no-warmup', 'cl=TWO', '-schema', 'replication(factor=2)', '-rate', 'threads=50' ]) cluster.flush() # start bootstrapping node3 and wait for streaming node3 = new_node(cluster) node3.start(wait_other_notice=False, wait_for_binary_proto=True) # wait for node3 ready to query node3.watch_log_for("Starting listening for CQL clients") mark = node3.mark_log() # check if node3 is still in bootstrap mode retry_till_success(assert_bootstrap_state, tester=self, node=node3, expected_bootstrap_state='IN_PROGRESS', timeout=120) # bring back node1 and invoke nodetool bootstrap to resume bootstrapping node3.nodetool('bootstrap resume') node3.watch_log_for("Resume complete", from_mark=mark) assert_bootstrap_state(self, node3, 'COMPLETED') # cleanup to guarantee each node will only have sstables of its ranges cluster.cleanup() logger.debug("Check data is present") # Let's check stream bootstrap completely transferred data stdout, stderr, _ = node3.stress([ 'read', 'n=1k', 'no-warmup', '-schema', 'replication(factor=2)', '-rate', 'threads=8' ]) if stdout is not None: assert "FAILURE" not in stdout
def bootstrap_on_write_survey_and_join(cluster, token): node2 = new_node(cluster) node2.set_configuration_options(values={'initial_token': token}) node2.start(jvm_args=["-Dcassandra.write_survey=true"], wait_for_binary_proto=True) assert len(node2.grep_log('Startup complete, but write survey mode is active, not becoming an active ring member.')) assert_bootstrap_state(self, node2, 'IN_PROGRESS') node2.nodetool("join") assert len(node2.grep_log('Leaving write survey mode and joining ring at operator request')) return node2
def local_quorum_bootstrap_test(self): """ Test that CL local_quorum works while a node is bootstrapping. @jira_ticket CASSANDRA-8058 """ cluster = self.cluster cluster.populate([1, 1]) cluster.start() node1 = cluster.nodes['node1'] yaml_config = """ # Create the keyspace and table keyspace: keyspace1 keyspace_definition: | CREATE KEYSPACE keyspace1 WITH replication = {'class': 'NetworkTopologyStrategy', 'dc1': 1, 'dc2': 1}; table: users table_definition: CREATE TABLE users ( username text, first_name text, last_name text, email text, PRIMARY KEY(username) ) WITH compaction = {'class':'SizeTieredCompactionStrategy'}; insert: partitions: fixed(1) batchtype: UNLOGGED queries: read: cql: select * from users where username = ? fields: samerow """ with tempfile.NamedTemporaryFile(mode='w+') as stress_config: stress_config.write(yaml_config) stress_config.flush() node1.stress(['user', 'profile=' + stress_config.name, 'n=2M', 'no-warmup', 'ops(insert=1)', '-rate', 'threads=50']) node3 = new_node(cluster, data_center='dc2') node3.start(no_wait=True) time.sleep(3) out, err, _ = node1.stress(['user', 'profile=' + stress_config.name, 'ops(insert=1)', 'n=500K', 'no-warmup', 'cl=LOCAL_QUORUM', '-rate', 'threads=5', '-errors', 'retries=2']) debug(out) assert_stderr_clean(err) regex = re.compile("Operation.+error inserting key.+Exception") failure = regex.search(out) self.assertIsNone(failure, "Error during stress while bootstrapping")
def move_test(self, move_token, expected_after_move, expected_after_repair): """Helper method to run a move test cycle""" node4 = new_node(self.cluster, bootstrap=True, token='00040') patch_start(node4) node4.start(wait_for_binary_proto=True) main_session = self.patient_cql_connection(self.node1) nodes = [self.node1, self.node2, self.node3, node4] for i in range(0, 40, 2): print("Inserting " + str(i)) self.insert_row(i, i, i, main_session) # Make sure at least a little data is repaired repair_nodes(nodes) # Ensure that there is at least some transient data around, because of this if it's missing after bootstrap # We know we failed to get it from the transient replica losing the range entirely nodes[1].stop(wait_other_notice=True) for i in range(1, 40, 2): print("Inserting " + str(i)) self.insert_row(i, i, i, main_session) nodes[1].start(wait_for_binary_proto=True) sessions = [ self.exclusive_cql_connection(node) for node in [self.node1, self.node2, self.node3, node4] ] expected = [ gen_expected(range(0, 11), range(31, 40)), gen_expected(range(0, 21, 2)), gen_expected(range(1, 11, 2), range(11, 31)), gen_expected(range(11, 20, 2), range(21, 40)) ] self.check_expected(sessions, expected) self.check_replication(sessions, exactly=2) nodes[0].nodetool('move %s' % move_token) cleanup_nodes(nodes) self.check_replication(sessions, gte=2, lte=3) self.check_expected(sessions, expected=expected_after_move) repair_nodes(nodes) self.check_expected(sessions, expected_after_repair, nodes, cleanup=True) self.check_replication(sessions, exactly=2)
def test_consistent_reads_after_bootstrap(self): logger.debug("Creating a ring") cluster = self.cluster cluster.set_configuration_options( values={ 'hinted_handoff_enabled': False, 'write_request_timeout_in_ms': 60000, 'read_request_timeout_in_ms': 60000, 'dynamic_snitch_badness_threshold': 0.0 }) cluster.set_batch_commitlog(enabled=True) cluster.populate(2) node1, node2 = cluster.nodelist() cluster.start(wait_for_binary_proto=True, wait_other_notice=True) logger.debug("Set to talk to node 2") n2session = self.patient_cql_connection(node2) create_ks(n2session, 'ks', 2) create_c1c2_table(self, n2session) logger.debug("Generating some data for all nodes") insert_c1c2(n2session, keys=list(range(10, 20)), consistency=ConsistencyLevel.ALL) node1.flush() logger.debug("Taking down node1") node1.stop(wait_other_notice=True) logger.debug("Writing data to only node2") insert_c1c2(n2session, keys=list(range(30, 1000)), consistency=ConsistencyLevel.ONE) node2.flush() logger.debug("Restart node1") node1.start(wait_other_notice=True) logger.debug("Bootstraping node3") node3 = new_node(cluster) node3.start(wait_for_binary_proto=True) n3session = self.patient_cql_connection(node3) n3session.execute("USE ks") logger.debug("Checking that no data was lost") for n in range(10, 20): query_c1c2(n3session, n, ConsistencyLevel.ALL) for n in range(30, 1000): query_c1c2(n3session, n, ConsistencyLevel.ALL)
def disk_balance_bootstrap_test(self): cluster = self.cluster if not DISABLE_VNODES: cluster.set_configuration_options(values={'num_tokens': 256}) # apparently we have legitimate errors in the log when bootstrapping (see bootstrap_test.py) self.allow_log_errors = True cluster.populate(4).start(wait_for_binary_proto=True) node1 = cluster.nodes['node1'] node1.stress(['write', 'n=50k', 'no-warmup', '-rate', 'threads=100', '-schema', 'replication(factor=3)', 'compaction(strategy=SizeTieredCompactionStrategy,enabled=false)']) cluster.flush() node5 = new_node(cluster) node5.start(wait_for_binary_proto=True) self.assert_balanced(node5)
def test_resumable_bootstrap(self): """ Test resuming bootstrap after data streaming failure """ cluster = self.cluster cluster.set_environment_variable( 'CASSANDRA_TOKEN_PREGENERATION_DISABLED', 'True') cluster.populate(2) node1 = cluster.nodes['node1'] # set up byteman node1.byteman_port = '8100' node1.import_config_files() cluster.start() # kill stream to node3 in the middle of streaming to let it fail if cluster.version() < '4.0': node1.byteman_submit([self.byteman_submit_path_pre_4_0]) else: node1.byteman_submit([self.byteman_submit_path_4_0]) node1.stress([ 'write', 'n=1K', 'no-warmup', 'cl=TWO', '-schema', 'replication(factor=2)', '-rate', 'threads=50' ]) cluster.flush() # start bootstrapping node3 and wait for streaming node3 = new_node(cluster) node3.start(wait_other_notice=False) # let streaming fail as we expect node3.watch_log_for('Some data streaming failed') # bring back node3 and invoke nodetool bootstrap to resume bootstrapping node3.nodetool('bootstrap resume') node3.wait_for_binary_interface() assert_bootstrap_state(self, node3, 'COMPLETED') # cleanup to guarantee each node will only have sstables of its ranges cluster.cleanup() logger.debug("Check data is present") # Let's check stream bootstrap completely transferred data stdout, stderr, _ = node3.stress([ 'read', 'n=1k', 'no-warmup', '-schema', 'replication(factor=2)', '-rate', 'threads=8' ]) if stdout is not None: assert "FAILURE" not in stdout
def test_simple_bootstrap_small_keepalive_period(self): """ @jira_ticket CASSANDRA-11841 Test that bootstrap completes if it takes longer than streaming_socket_timeout_in_ms or 2*streaming_keep_alive_period_in_secs to receive a single sstable """ cluster = self.cluster yaml_opts = {'streaming_keep_alive_period_in_secs': 2} if cluster.version() < '4.0': yaml_opts['streaming_socket_timeout_in_ms'] = 1000 cluster.set_configuration_options(values=yaml_opts) # Create a single node cluster cluster.populate(1) node1 = cluster.nodelist()[0] logger.debug("Setting up byteman on {}".format(node1.name)) # set up byteman node1.byteman_port = '8100' node1.import_config_files() cluster.start() # Create more than one sstable larger than 1MB node1.stress([ 'write', 'n=1K', '-rate', 'threads=8', '-schema', 'compaction(strategy=SizeTieredCompactionStrategy, enabled=false)' ]) cluster.flush() logger.debug("Submitting byteman script to {} to".format(node1.name)) # Sleep longer than streaming_socket_timeout_in_ms to make sure the node will not be killed node1.byteman_submit([mk_bman_path('stream_5s_sleep.btm')]) # Bootstraping a new node with very small streaming_socket_timeout_in_ms node2 = new_node(cluster) node2.start(wait_for_binary_proto=True) # Shouldn't fail due to streaming socket timeout timeout assert_bootstrap_state(self, node2, 'COMPLETED') if cluster.version() < '4.0': for node in cluster.nodelist(): assert node.grep_log( 'Scheduling keep-alive task with 2s period.', filename='debug.log') assert node.grep_log('Sending keep-alive', filename='debug.log') assert node.grep_log('Received keep-alive', filename='debug.log')
def test_decommissioned_wiped_node_can_gossip_to_single_seed(self): """ @jira_ticket CASSANDRA-8072 @jira_ticket CASSANDRA-8422 Test that if we decommission a node, kill it and wipe its data, it can join a cluster with a single seed node. """ cluster = self.cluster cluster.set_environment_variable( 'CASSANDRA_TOKEN_PREGENERATION_DISABLED', 'True') cluster.populate(1) cluster.start() node1 = cluster.nodelist()[0] # Add a new node, bootstrap=True ensures that it is not a seed node2 = new_node(cluster, bootstrap=True) node2.start(wait_for_binary_proto=True) session = self.patient_cql_connection(node1) if cluster.version() >= '2.2': # reduce system_distributed RF to 2 so we don't require forceful decommission session.execute( "ALTER KEYSPACE system_distributed WITH REPLICATION = {'class':'SimpleStrategy', 'replication_factor':'1'};" ) session.execute( "ALTER KEYSPACE system_traces WITH REPLICATION = {'class':'SimpleStrategy', 'replication_factor':'1'};" ) # Decommision the new node and kill it logger.debug("Decommissioning & stopping node2") node2.decommission() node2.stop(wait_other_notice=False) # Wipe its data for data_dir in node2.data_directories(): logger.debug("Deleting {}".format(data_dir)) shutil.rmtree(data_dir) commitlog_dir = os.path.join(node2.get_path(), 'commitlogs') logger.debug("Deleting {}".format(commitlog_dir)) shutil.rmtree(commitlog_dir) # Now start it, it should be allowed to join mark = node2.mark_log() logger.debug("Restarting wiped node2") node2.start(wait_other_notice=False) node2.watch_log_for("JOINING:", from_mark=mark)
def test_wait_for_schema(self): #start a one node cluster cluster = self.cluster cluster.populate(1, install_byteman=True) node1 = cluster.nodes['node1'] cluster.start() session = self.patient_cql_connection(node1) create_ks(session, 'ks', 2) create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) empty_size = data_size(node1, 'ks', 'cf') keys = 1000 insert_statement = session.prepare( "INSERT INTO ks.cf (key, c1, c2) VALUES (?, 'value1', 'value2')") execute_concurrent_with_args(session, insert_statement, [['k%d' % k] for k in range(keys)]) node1.flush() node1.compact() initial_size = data_size(node1, 'ks', 'cf') #logger.debug("node1 size for ks.cf before bootstrapping node2: %s" % float(initial_size)) node2 = new_node(cluster) node2.set_configuration_options( values={'request_timeout_in_ms': 10000}) mark = node2.mark_log() node1.byteman_submit(['./byteman/migration_request_sleep.btm']) node2.start(jvm_args=["-Dcassandra.migration_task_wait_in_seconds=20"], set_migration_task=False, wait_for_binary_proto=True) node2.watch_log_for('Prepare completed. Receiving', from_mark=mark, timeout=6) node2.flush() node2.compact() #logger.debug("node2 joined with size for ks.cf : %s" % float(data_size(node2, 'ks','cf'))) node1.stop() rows = session.execute('SELECT count(*) from ks.cf') assert rows[0][0] == 1000 cluster.stop()
def test_disk_balance_bootstrap(self): cluster = self.cluster if self.dtest_config.use_vnodes: cluster.set_configuration_options(values={'num_tokens': 256}) # apparently we have legitimate errors in the log when bootstrapping (see bootstrap_test.py) self.fixture_dtest_setup.allow_log_errors = True cluster.populate(4).start(wait_for_binary_proto=True) node1 = cluster.nodes['node1'] node1.stress(['write', 'n=50k', 'no-warmup', '-rate', 'threads=100', '-schema', 'replication(factor=3)', 'compaction(strategy=SizeTieredCompactionStrategy,enabled=false)']) cluster.flush() node5 = new_node(cluster) node5.start(wait_for_binary_proto=True) self.assert_balanced(node5)
def test_simple_bootstrap_nodata(self): """ @jira_ticket CASSANDRA-11010 Test that bootstrap completes if streaming from nodes with no data """ cluster = self.cluster # Create a two-node cluster cluster.populate(2) cluster.start(wait_other_notice=True) # Bootstrapping a new node node3 = new_node(cluster) node3.start(wait_for_binary_proto=True, wait_other_notice=True) assert_bootstrap_state(self, node3, 'COMPLETED')
def test_remove(self): """Test a mix of ring change operations across a mix of transient and repaired/unrepaired data""" node4 = new_node(self.cluster, bootstrap=True, token='00040') patch_start(node4) node4.start(wait_for_binary_proto=True, wait_other_notice=True) main_session = self.patient_cql_connection(self.node1) nodes = [self.node1, self.node2, self.node3] #We want the node being removed to have no data on it so nodetool remove always gets all the necessary data #from survivors node4_id = node4.nodetool('info').stdout[25:61] node4.stop(wait_other_notice=True) for i in range(0, 40): print("Inserting " + str(i)) self.insert_row(i, i, i, main_session) sessions = [self.exclusive_cql_connection(node) for node in [self.node1, self.node2, self.node3]] expected = [gen_expected(range(0, 11), range(21, 40)), gen_expected(range(0, 21), range(31, 40)), gen_expected(range(11, 31))] # Every node should some of its fully replicated data and one and two should have some transient data self.check_expected(sessions, expected) nodes[0].nodetool('removenode ' + node4_id) #Give streaming time to occur, it's asynchronous from removenode completing at other ndoes import time time.sleep(15) # Everyone should have everything except expected = [gen_expected(range(0, 40)), gen_expected(range(0, 40)), gen_expected(range(0,40))] self.check_replication(sessions, exactly=3) self.check_expected(sessions, expected) repair_nodes(nodes) cleanup_nodes(nodes) self.check_replication(sessions, exactly=2) expected = [gen_expected(range(0,11), range(21,40)), gen_expected(range(0,21), range(31, 40)), gen_expected(range(11,31))] self.check_expected(sessions, expected)
def test_simple_bootstrap_nodata(self): """ @jira_ticket CASSANDRA-11010 Test that bootstrap completes if streaming from nodes with no data """ cluster = self.cluster cluster.set_environment_variable('CASSANDRA_TOKEN_PREGENERATION_DISABLED', 'True') # Create a two-node cluster cluster.populate(2) cluster.start() # Bootstrapping a new node node3 = new_node(cluster) node3.start(wait_for_binary_proto=True) assert_bootstrap_state(self, node3, 'COMPLETED')
def simple_bootstrap_test_small_keepalive_period(self): """ @jira_ticket CASSANDRA-11841 Test that bootstrap completes if it takes longer than streaming_socket_timeout_in_ms or 2*streaming_keep_alive_period_in_secs to receive a single sstable """ cluster = self.cluster cluster.set_configuration_options( values={ 'stream_throughput_outbound_megabits_per_sec': 1, 'streaming_socket_timeout_in_ms': 1000, 'streaming_keep_alive_period_in_secs': 1 }) # Create a single node cluster cluster.populate(1) node1 = cluster.nodelist()[0] cluster.start(wait_other_notice=True) # Create more than one sstable larger than 1MB node1.stress([ 'write', 'n=50K', '-rate', 'threads=8', '-schema', 'compaction(strategy=SizeTieredCompactionStrategy, enabled=false)' ]) cluster.flush() node1.stress([ 'write', 'n=50K', '-rate', 'threads=8', '-schema', 'compaction(strategy=SizeTieredCompactionStrategy, enabled=false)' ]) cluster.flush() self.assertGreater(node1.get_sstables("keyspace1", "standard1"), 1) # Bootstraping a new node with very small streaming_socket_timeout_in_ms node2 = new_node(cluster) node2.start(wait_for_binary_proto=True) # Shouldn't fail due to streaming socket timeout timeout assert_bootstrap_state(self, node2, 'COMPLETED') for node in cluster.nodelist(): self.assertTrue( node.grep_log('Scheduling keep-alive task with 1s period.', filename='debug.log')) self.assertTrue( node.grep_log('Sending keep-alive', filename='debug.log')) self.assertTrue( node.grep_log('Received keep-alive', filename='debug.log'))
def resumable_bootstrap_test(self): """ Test resuming bootstrap after data streaming failure """ cluster = self.cluster cluster.populate(2) node1 = cluster.nodes['node1'] # set up byteman node1.byteman_port = '8100' node1.import_config_files() cluster.start(wait_other_notice=True) # kill stream to node3 in the middle of streaming to let it fail if cluster.version() < '4.0': node1.byteman_submit(['./byteman/pre4.0/stream_failure.btm']) else: node1.byteman_submit(['./byteman/4.0/stream_failure.btm']) node1.stress(['write', 'n=1K', 'no-warmup', 'cl=TWO', '-schema', 'replication(factor=2)', '-rate', 'threads=50']) cluster.flush() # start bootstrapping node3 and wait for streaming node3 = new_node(cluster) node3.start(wait_other_notice=False, wait_for_binary_proto=True) # wait for node3 ready to query node3.watch_log_for("Starting listening for CQL clients") mark = node3.mark_log() # check if node3 is still in bootstrap mode assert_bootstrap_state(self, node3, 'IN_PROGRESS') # bring back node1 and invoke nodetool bootstrap to resume bootstrapping node3.nodetool('bootstrap resume') node3.watch_log_for("Resume complete", from_mark=mark) assert_bootstrap_state(self, node3, 'COMPLETED') # cleanup to guarantee each node will only have sstables of its ranges cluster.cleanup() debug("Check data is present") # Let's check stream bootstrap completely transferred data stdout, stderr, _ = node3.stress(['read', 'n=1k', 'no-warmup', '-schema', 'replication(factor=2)', '-rate', 'threads=8']) if stdout is not None: self.assertNotIn("FAILURE", stdout)
def test_consistent_reads_after_bootstrap(self): logger.debug("Creating a ring") cluster = self.cluster cluster.set_configuration_options(values={'hinted_handoff_enabled': False, 'write_request_timeout_in_ms': 60000, 'read_request_timeout_in_ms': 60000, 'dynamic_snitch_badness_threshold': 0.0}) cluster.set_batch_commitlog(enabled=True) cluster.populate(2) node1, node2 = cluster.nodelist() cluster.start(wait_for_binary_proto=True, wait_other_notice=True) logger.debug("Set to talk to node 2") n2session = self.patient_cql_connection(node2) create_ks(n2session, 'ks', 2) create_c1c2_table(self, n2session) logger.debug("Generating some data for all nodes") insert_c1c2(n2session, keys=list(range(10, 20)), consistency=ConsistencyLevel.ALL) node1.flush() logger.debug("Taking down node1") node1.stop(wait_other_notice=True) logger.debug("Writing data to only node2") insert_c1c2(n2session, keys=list(range(30, 1000)), consistency=ConsistencyLevel.ONE) node2.flush() logger.debug("Restart node1") node1.start(wait_other_notice=True) logger.debug("Bootstraping node3") node3 = new_node(cluster) node3.start(wait_for_binary_proto=True) n3session = self.patient_cql_connection(node3) n3session.execute("USE ks") logger.debug("Checking that no data was lost") for n in range(10, 20): query_c1c2(n3session, n, ConsistencyLevel.ALL) for n in range(30, 1000): query_c1c2(n3session, n, ConsistencyLevel.ALL)
def test_decommissioned_wiped_node_can_gossip_to_single_seed(self): """ @jira_ticket CASSANDRA-8072 @jira_ticket CASSANDRA-8422 Test that if we decommission a node, kill it and wipe its data, it can join a cluster with a single seed node. """ cluster = self.cluster cluster.populate(1) cluster.start(wait_for_binary_proto=True) node1 = cluster.nodelist()[0] # Add a new node, bootstrap=True ensures that it is not a seed node2 = new_node(cluster, bootstrap=True) node2.start(wait_for_binary_proto=True, wait_other_notice=True) session = self.patient_cql_connection(node1) if cluster.version() >= '2.2': # reduce system_distributed RF to 2 so we don't require forceful decommission session.execute("ALTER KEYSPACE system_distributed WITH REPLICATION = {'class':'SimpleStrategy', 'replication_factor':'1'};") session.execute("ALTER KEYSPACE system_traces WITH REPLICATION = {'class':'SimpleStrategy', 'replication_factor':'1'};") # Decommision the new node and kill it logger.debug("Decommissioning & stopping node2") node2.decommission() node2.stop(wait_other_notice=False) # Wipe its data for data_dir in node2.data_directories(): logger.debug("Deleting {}".format(data_dir)) shutil.rmtree(data_dir) commitlog_dir = os.path.join(node2.get_path(), 'commitlogs') logger.debug("Deleting {}".format(commitlog_dir)) shutil.rmtree(commitlog_dir) # Now start it, it should be allowed to join mark = node2.mark_log() logger.debug("Restarting wiped node2") node2.start(wait_other_notice=False) node2.watch_log_for("JOINING:", from_mark=mark)
def test_simple_bootstrap_small_keepalive_period(self): """ @jira_ticket CASSANDRA-11841 Test that bootstrap completes if it takes longer than streaming_socket_timeout_in_ms or 2*streaming_keep_alive_period_in_secs to receive a single sstable """ cluster = self.cluster yaml_opts = {'streaming_keep_alive_period_in_secs': 2} if cluster.version() < '4.0': yaml_opts['streaming_socket_timeout_in_ms'] = 1000 cluster.set_configuration_options(values=yaml_opts) # Create a single node cluster cluster.populate(1) node1 = cluster.nodelist()[0] logger.debug("Setting up byteman on {}".format(node1.name)) # set up byteman node1.byteman_port = '8100' node1.import_config_files() cluster.start(wait_other_notice=True) # Create more than one sstable larger than 1MB node1.stress(['write', 'n=1K', '-rate', 'threads=8', '-schema', 'compaction(strategy=SizeTieredCompactionStrategy, enabled=false)']) cluster.flush() logger.debug("Submitting byteman script to {} to".format(node1.name)) # Sleep longer than streaming_socket_timeout_in_ms to make sure the node will not be killed node1.byteman_submit(['./byteman/stream_5s_sleep.btm']) # Bootstraping a new node with very small streaming_socket_timeout_in_ms node2 = new_node(cluster) node2.start(wait_for_binary_proto=True) # Shouldn't fail due to streaming socket timeout timeout assert_bootstrap_state(self, node2, 'COMPLETED') for node in cluster.nodelist(): assert node.grep_log('Scheduling keep-alive task with 2s period.', filename='debug.log') assert node.grep_log('Sending keep-alive', filename='debug.log') assert node.grep_log('Received keep-alive', filename='debug.log')
def move_test(self, move_token, expected_after_move, expected_after_repair): """Helper method to run a move test cycle""" node4 = new_node(self.cluster, bootstrap=True, token='00040') patch_start(node4) node4.start(wait_for_binary_proto=True, wait_other_notice=True) main_session = self.patient_cql_connection(self.node1) nodes = [self.node1, self.node2, self.node3, node4] for i in range(0, 40, 2): print("Inserting " + str(i)) self.insert_row(i, i, i, main_session) # Make sure at least a little data is repaired repair_nodes(nodes) # Ensure that there is at least some transient data around, because of this if it's missing after bootstrap # We know we failed to get it from the transient replica losing the range entirely nodes[1].stop(wait_other_notice=True) for i in range(1, 40, 2): print("Inserting " + str(i)) self.insert_row(i, i, i, main_session) nodes[1].start(wait_for_binary_proto=True, wait_other_notice=True) sessions = [self.exclusive_cql_connection(node) for node in [self.node1, self.node2, self.node3, node4]] expected = [gen_expected(range(0, 11), range(31, 40)), gen_expected(range(0, 21, 2)), gen_expected(range(1, 11, 2), range(11, 31)), gen_expected(range(11, 20, 2), range(21, 40))] self.check_expected(sessions, expected) self.check_replication(sessions, exactly=2) nodes[0].nodetool('move %s' % move_token) cleanup_nodes(nodes) self.check_replication(sessions, gte=2, lte=3) self.check_expected(sessions, expected=expected_after_move) repair_nodes(nodes) self.check_expected(sessions, expected_after_repair, nodes, cleanup=True) self.check_replication(sessions, exactly=2)
def test_resumable_bootstrap(self): """ Test resuming bootstrap after data streaming failure """ cluster = self.cluster cluster.populate(2) node1 = cluster.nodes['node1'] # set up byteman node1.byteman_port = '8100' node1.import_config_files() cluster.start(wait_other_notice=True) # kill stream to node3 in the middle of streaming to let it fail if cluster.version() < '4.0': node1.byteman_submit([self.byteman_submit_path_pre_4_0]) else: node1.byteman_submit([self.byteman_submit_path_4_0]) node1.stress(['write', 'n=1K', 'no-warmup', 'cl=TWO', '-schema', 'replication(factor=2)', '-rate', 'threads=50']) cluster.flush() # start bootstrapping node3 and wait for streaming node3 = new_node(cluster) node3.start(wait_other_notice=False) # let streaming fail as we expect node3.watch_log_for('Some data streaming failed') # bring back node3 and invoke nodetool bootstrap to resume bootstrapping node3.nodetool('bootstrap resume') node3.wait_for_binary_interface() assert_bootstrap_state(self, node3, 'COMPLETED') # cleanup to guarantee each node will only have sstables of its ranges cluster.cleanup() logger.debug("Check data is present") # Let's check stream bootstrap completely transferred data stdout, stderr, _ = node3.stress(['read', 'n=1k', 'no-warmup', '-schema', 'replication(factor=2)', '-rate', 'threads=8']) if stdout is not None: assert "FAILURE" not in stdout
def test_read_from_bootstrapped_node(self): """ Test bootstrapped node sees existing data @jira_ticket CASSANDRA-6648 """ cluster = self.cluster cluster.populate(3) cluster.start() node1 = cluster.nodes['node1'] node1.stress(['write', 'n=10K', 'no-warmup', '-rate', 'threads=8', '-schema', 'replication(factor=2)']) session = self.patient_cql_connection(node1) stress_table = 'keyspace1.standard1' original_rows = list(session.execute("SELECT * FROM %s" % (stress_table,))) node4 = new_node(cluster) node4.start(wait_for_binary_proto=True) session = self.patient_exclusive_cql_connection(node4) new_rows = list(session.execute("SELECT * FROM %s" % (stress_table,))) assert original_rows == new_rows
def test_failed_bootstrap_wiped_node_can_join(self): """ @jira_ticket CASSANDRA-9765 Test that if a node fails to bootstrap, it can join the cluster even if the data is wiped. """ cluster = self.cluster cluster.populate(1) cluster.set_configuration_options(values={'stream_throughput_outbound_megabits_per_sec': 1}) cluster.start(wait_for_binary_proto=True) stress_table = 'keyspace1.standard1' # write some data, enough for the bootstrap to fail later on node1 = cluster.nodelist()[0] node1.stress(['write', 'n=100K', 'no-warmup', '-rate', 'threads=8']) node1.flush() session = self.patient_cql_connection(node1) original_rows = list(session.execute("SELECT * FROM {}".format(stress_table,))) # Add a new node, bootstrap=True ensures that it is not a seed node2 = new_node(cluster, bootstrap=True) # kill node2 in the middle of bootstrap t = KillOnBootstrap(node2) t.start() node2.start() t.join() assert not node2.is_running() # wipe any data for node2 self._cleanup(node2) # Now start it again, it should be allowed to join mark = node2.mark_log() node2.start(wait_other_notice=True) node2.watch_log_for("JOINING:", from_mark=mark)
def simple_bootstrap_test_small_keepalive_period(self): """ @jira_ticket CASSANDRA-11841 Test that bootstrap completes if it takes longer than streaming_socket_timeout_in_ms or 2*streaming_keep_alive_period_in_secs to receive a single sstable """ cluster = self.cluster cluster.set_configuration_options(values={'stream_throughput_outbound_megabits_per_sec': 1, 'streaming_socket_timeout_in_ms': 1000, 'streaming_keep_alive_period_in_secs': 1}) # Create a single node cluster cluster.populate(1) node1 = cluster.nodelist()[0] cluster.start(wait_other_notice=True) # Create more than one sstable larger than 1MB node1.stress(['write', 'n=50K', '-rate', 'threads=8', '-schema', 'compaction(strategy=SizeTieredCompactionStrategy, enabled=false)']) cluster.flush() node1.stress(['write', 'n=50K', '-rate', 'threads=8', '-schema', 'compaction(strategy=SizeTieredCompactionStrategy, enabled=false)']) cluster.flush() self.assertGreater(node1.get_sstables("keyspace1", "standard1"), 1) # Bootstraping a new node with very small streaming_socket_timeout_in_ms node2 = new_node(cluster) node2.start(wait_for_binary_proto=True) # Shouldn't fail due to streaming socket timeout timeout assert_bootstrap_state(self, node2, 'COMPLETED') for node in cluster.nodelist(): self.assertTrue(node.grep_log('Scheduling keep-alive task with 1s period.', filename='debug.log')) self.assertTrue(node.grep_log('Sending keep-alive', filename='debug.log')) self.assertTrue(node.grep_log('Received keep-alive', filename='debug.log'))
def test_bootstrap(self): """ Test repaired data remains in sync after a bootstrap """ self.fixture_dtest_setup.setup_overrides.cluster_options = ImmutableMapping({'hinted_handoff_enabled': 'false', 'commitlog_sync_period_in_ms': 500}) self.init_default_config() self.cluster.populate(3).start() node1, node2, node3 = self.cluster.nodelist() session = self.patient_exclusive_cql_connection(node3) session.execute("CREATE KEYSPACE ks WITH REPLICATION={'class':'SimpleStrategy', 'replication_factor': 2}") session.execute("CREATE TABLE ks.tbl (k INT PRIMARY KEY, v INT)") # insert some data stmt = SimpleStatement("INSERT INTO ks.tbl (k,v) VALUES (%s, %s)") for i in range(1000): session.execute(stmt, (i, i)) node1.repair(options=['ks']) for i in range(1000): v = i + 1000 session.execute(stmt, (v, v)) # everything should be in sync for node in [node1, node2, node3]: result = node.repair(options=['ks', '--validate']) assert "Repaired data is in sync" in result.stdout node4 = new_node(self.cluster) node4.start(wait_for_binary_proto=True) assert len(self.cluster.nodelist()) == 4 # everything should still be in sync for node in self.cluster.nodelist(): result = node.repair(options=['ks', '--validate']) assert "Repaired data is in sync" in result.stdout
def default_bootstrap(cluster, token): node2 = new_node(cluster) node2.set_configuration_options(values={'initial_token': token}) node2.start(wait_for_binary_proto=True) return node2