def create_schema_40(self, session, ks, compression): create_ks(session, ks, rf=2) create_cf(session, "standard1", compression=compression, compact_storage=self.compact()) create_cf(session, "counter1", key_type='text', compression=compression, columns={'column1': 'text', 'v': 'counter static', 'value': 'counter'}, primary_key="key, column1", clustering='column1 ASC', compact_storage=self.compact())
def create_users(self, session): columns = {"password": "******", "gender": "varchar", "session_token": "varchar", "state": "varchar", "birth_year": "bigint"} create_cf(session, 'users', columns=columns) session.execute("CREATE INDEX gender_idx ON users (gender)") session.execute("CREATE INDEX state_idx ON users (state)") session.execute("CREATE INDEX birth_year_idx ON users (birth_year)")
def short_read_delete_test(self): """ Test short reads ultimately leaving no columns alive [#4000] """ cluster = self.cluster # Disable hinted handoff and set batch commit log so this doesn't # interfere with the test cluster.set_configuration_options(values={'hinted_handoff_enabled': False}) cluster.set_batch_commitlog(enabled=True) cluster.populate(2).start(wait_other_notice=True) node1, node2 = cluster.nodelist() session = self.patient_cql_connection(node1) create_ks(session, 'ks', 3) create_cf(session, 'cf', read_repair=0.0) # insert 2 columns in one row insert_columns(self, session, 0, 2) # Delete the row while first node is dead node1.flush() node1.stop(wait_other_notice=True) session = self.patient_cql_connection(node2, 'ks') query = SimpleStatement('DELETE FROM cf WHERE key=\'k0\'', consistency_level=ConsistencyLevel.ONE) session.execute(query) node1.start(wait_other_notice=True) # Query first column session = self.patient_cql_connection(node1, 'ks') assert_none(session, "SELECT c, v FROM cf WHERE key=\'k0\' LIMIT 1", cl=ConsistencyLevel.QUORUM)
def incompressible_data_in_compressed_table_test(self): """ tests for the bug that caused #3370: https://issues.apache.org/jira/browse/CASSANDRA-3370 @jira_ticket CASSANDRA-3370 inserts random data into a compressed table. The compressed SSTable was compared to the uncompressed and was found to indeed be larger then uncompressed. """ cluster = self.cluster cluster.populate(1).start() node1 = cluster.nodelist()[0] time.sleep(.5) session = self.patient_cql_connection(node1) create_ks(session, 'ks', 1) create_cf(session, 'cf', compression="Deflate") # make unique column names, and values that are incompressible for col in xrange(10): col_name = str(col) col_val = os.urandom(5000) col_val = col_val.encode('hex') cql = "UPDATE cf SET v='%s' WHERE KEY='0' AND c='%s'" % (col_val, col_name) # print cql session.execute(cql) node1.flush() time.sleep(2) rows = list(session.execute("SELECT * FROM cf WHERE KEY = '0' AND c < '8'")) self.assertGreater(len(rows), 0)
def test_gc(self): """ Test that tombstone purging doesn't bring back deleted data by writing 2 rows to a table with gc_grace=0, deleting one of those rows, then asserting that it isn't present in the results of SELECT *, before and after a flush and compaction. """ cluster = self.cluster cluster.populate(1).start() [node1] = cluster.nodelist() time.sleep(.5) session = self.patient_cql_connection(node1) create_ks(session, 'ks', 1) create_cf(session, 'cf', gc_grace=0, key_type='int', columns={'c1': 'int'}) session.execute('insert into cf (key, c1) values (1,1)') session.execute('insert into cf (key, c1) values (2,1)') node1.flush() assert rows_to_list(session.execute('select * from cf;')) == [[1, 1], [2, 1]] session.execute('delete from cf where key=1') assert rows_to_list(session.execute('select * from cf;')) == [[2, 1]] node1.flush() time.sleep(.5) node1.compact() time.sleep(.5) assert rows_to_list(session.execute('select * from cf;')) == [[2, 1]]
def test_decommission(self): cluster = self.cluster tokens = cluster.balanced_tokens(4) cluster.populate(4, tokens=tokens).start() node1, node2, node3, node4 = cluster.nodelist() session = self.patient_cql_connection(node1) create_ks(session, 'ks', 2) create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=30000, consistency=ConsistencyLevel.QUORUM) cluster.flush() sizes = [node.data_size() for node in cluster.nodelist() if node.is_running()] init_size = sizes[0] assert_almost_equal(*sizes) time.sleep(.5) node4.decommission() node4.stop() cluster.cleanup() time.sleep(.5) # Check we can get all the keys for n in range(0, 30000): query_c1c2(session, n, ConsistencyLevel.QUORUM) sizes = [node.data_size() for node in cluster.nodelist() if node.is_running()] logger.debug(sizes) assert_almost_equal(sizes[0], sizes[1]) assert_almost_equal((2.0 / 3.0) * sizes[0], sizes[2]) assert_almost_equal(sizes[2], init_size)
def test_ghosts(self): """ Check range ghost are correctly removed by the system """ cluster = self.cluster cluster.populate(1).start() [node1] = cluster.nodelist() time.sleep(.5) session = self.cql_connection(node1) create_ks(session, 'ks', 1) create_cf(session, 'cf', gc_grace=0, columns={'c': 'text'}) rows = 1000 for i in range(0, rows): session.execute("UPDATE cf SET c = 'value' WHERE key = 'k%i'" % i) res = list(session.execute("SELECT * FROM cf LIMIT 10000")) assert_length_equal(res, rows) node1.flush() for i in range(0, rows // 2): session.execute("DELETE FROM cf WHERE key = 'k%i'" % i) res = list(session.execute("SELECT * FROM cf LIMIT 10000")) # no ghosts in 1.2+ assert_length_equal(res, rows / 2) node1.flush() time.sleep(1) # make sure tombstones are collected node1.compact() res = list(session.execute("SELECT * FROM cf LIMIT 10000")) assert_length_equal(res, rows / 2)
def test_move_single_node(self): """ Test moving a node in a single-node cluster (#4200) """ cluster = self.cluster # Create an unbalanced ring cluster.populate(1, tokens=[0]).start() node1 = cluster.nodelist()[0] time.sleep(0.2) session = self.patient_cql_connection(node1) create_ks(session, 'ks', 1) create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=10000, consistency=ConsistencyLevel.ONE) cluster.flush() node1.move(2**25) time.sleep(1) cluster.cleanup() # Check we can get all the keys for n in range(0, 10000): query_c1c2(session, n, ConsistencyLevel.ONE)
def test_cfid(self): """ Test through adding/dropping cf's that the path to sstables for each cf are unique and formatted correctly """ cluster = self.cluster cluster.populate(1).start(wait_other_notice=True) [node1] = cluster.nodelist() session = self.patient_cql_connection(node1) create_ks(session, 'ks', 1) for x in range(0, 5): create_cf(session, 'cf', gc_grace=0, key_type='int', columns={'c1': 'int'}) session.execute('insert into cf (key, c1) values (1,1)') session.execute('insert into cf (key, c1) values (2,1)') node1.flush() session.execute('drop table ks.cf;') # get a list of cf directories try: cfs = os.listdir(node1.get_path() + "/data0/ks") except OSError: self.fail("Path to sstables not valid.") # check that there are 5 unique directories assert len(cfs) == 5 # check that these are in fact column family directories for dire in cfs: assert dire[0:2] == 'cf'
def simple_increment_test(self): """ Simple incrementation test (Created for #3465, that wasn't a bug) """ cluster = self.cluster cluster.populate(3).start() nodes = cluster.nodelist() session = self.patient_cql_connection(nodes[0]) create_ks(session, 'ks', 3) create_cf(session, 'cf', validation="CounterColumnType", columns={'c': 'counter'}) sessions = [self.patient_cql_connection(node, 'ks') for node in nodes] nb_increment = 50 nb_counter = 10 for i in xrange(0, nb_increment): for c in xrange(0, nb_counter): session = sessions[(i + c) % len(nodes)] query = SimpleStatement("UPDATE cf SET c = c + 1 WHERE key = 'counter%i'" % c, consistency_level=ConsistencyLevel.QUORUM) session.execute(query) session = sessions[i % len(nodes)] keys = ",".join(["'counter%i'" % c for c in xrange(0, nb_counter)]) query = SimpleStatement("SELECT key, c FROM cf WHERE key IN (%s)" % keys, consistency_level=ConsistencyLevel.QUORUM) res = list(session.execute(query)) assert_length_equal(res, nb_counter) for c in xrange(0, nb_counter): self.assertEqual(len(res[c]), 2, "Expecting key and counter for counter {}, got {}".format(c, str(res[c]))) self.assertEqual(res[c][1], i + 1, "Expecting counter {} = {}, got {}".format(c, i + 1, res[c][0]))
def test_resumable_decommission(self): """ @jira_ticket CASSANDRA-12008 Test decommission operation is resumable """ self.fixture_dtest_setup.ignore_log_patterns = [r'Streaming error occurred', r'Error while decommissioning node', r'Remote peer 127.0.0.2 failed stream session', r'Remote peer 127.0.0.2:7000 failed stream session'] cluster = self.cluster cluster.set_configuration_options(values={'stream_throughput_outbound_megabits_per_sec': 1}) cluster.populate(3, install_byteman=True).start(wait_other_notice=True) node1, node2, node3 = cluster.nodelist() session = self.patient_cql_connection(node2) # reduce system_distributed RF to 2 so we don't require forceful decommission session.execute("ALTER KEYSPACE system_distributed WITH REPLICATION = {'class':'SimpleStrategy', 'replication_factor':'2'};") create_ks(session, 'ks', 2) create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=10000, consistency=ConsistencyLevel.ALL) # Execute first rebuild, should fail with pytest.raises(ToolError): if cluster.version() >= '4.0': script = ['./byteman/4.0/decommission_failure_inject.btm'] else: script = ['./byteman/pre4.0/decommission_failure_inject.btm'] node2.byteman_submit(script) node2.nodetool('decommission') # Make sure previous ToolError is due to decommission node2.watch_log_for('Error while decommissioning node') # Decommission again mark = node2.mark_log() node2.nodetool('decommission') # Check decommision is done and we skipped transfereed ranges node2.watch_log_for('DECOMMISSIONED', from_mark=mark) node2.grep_log("Skipping transferred range .* of keyspace ks, endpoint {}".format(node2.address_for_current_version_slashy()), filename='debug.log') # Check data is correctly forwarded to node1 and node3 cluster.remove(node2) node3.stop(gently=False) session = self.patient_exclusive_cql_connection(node1) session.execute('USE ks') for i in range(0, 10000): query_c1c2(session, i, ConsistencyLevel.ONE) node1.stop(gently=False) node3.start() session.shutdown() mark = node3.mark_log() node3.watch_log_for('Starting listening for CQL clients', from_mark=mark) session = self.patient_exclusive_cql_connection(node3) session.execute('USE ks') for i in range(0, 10000): query_c1c2(session, i, ConsistencyLevel.ONE)
def putget_2dc_rf2_test(self): """ Simple put-get test for 2 DC with 2 node each (RF=2) -- tests cross-DC efficient writes """ cluster = self.cluster cluster.populate([2, 2]).start() session = self.patient_cql_connection(cluster.nodelist()[0]) create_ks(session, 'ks', {'dc1': 2, 'dc2': 2}) create_cf(session, 'cf') putget(cluster, session)
def putget_2dc_rf1_test(self): """ Simple put-get test for 2 DC with one node each (RF=1) [catches #3539] """ cluster = self.cluster cluster.populate([1, 1]).start() session = self.patient_cql_connection(cluster.nodelist()[0]) create_ks(session, 'ks', {'dc1': 1, 'dc2': 1}) create_cf(session, 'cf') putget(cluster, session)
def _putget(self, compression=None): cluster = self.cluster cluster.populate(3).start() node1, node2, node3 = cluster.nodelist() session = self.patient_cql_connection(node1) create_ks(session, 'ks', 3) create_cf(session, 'cf', compression=compression) putget(cluster, session)
def test_rangeputget(self): """ Simple put/get on ranges of rows, hitting multiple sstables """ cluster = self.cluster cluster.populate(3).start() node1, node2, node3 = cluster.nodelist() session = self.patient_cql_connection(node1) create_ks(session, 'ks', 2) create_cf(session, 'cf') range_putget(cluster, session)
def _deprecated_repair_jmx(self, method, arguments): """ * Launch a two node, two DC cluster * Create a keyspace and table * Insert some data * Call the deprecated repair JMX API based on the arguments passed into this method * Check the node log to see if the correct repair was performed based on the jmx args """ cluster = self.cluster logger.debug("Starting cluster..") cluster.populate([1, 1]) node1, node2 = cluster.nodelist() remove_perf_disable_shared_mem(node1) cluster.start() supports_pull_repair = cluster.version() >= LooseVersion('3.10') session = self.patient_cql_connection(node1) create_ks(session, 'ks', 2) create_cf(session, 'cf', read_repair=0.0, columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=1000, consistency=ConsistencyLevel.ALL) # Run repair mbean = make_mbean('db', 'StorageService') with JolokiaAgent(node1) as jmx: # assert repair runs and returns valid cmd number assert jmx.execute_method(mbean, method, arguments) == 1 # wait for log to start node1.watch_log_for("Starting repair command") # get repair parameters from the log line = node1.grep_log(("Starting repair command #1" + (" \([^\)]+\)" if cluster.version() >= LooseVersion("3.10") else "") + ", repairing keyspace ks with repair options \(parallelism: (?P<parallelism>\w+), primary range: (?P<pr>\w+), " "incremental: (?P<incremental>\w+), job threads: (?P<jobs>\d+), ColumnFamilies: (?P<cfs>.+), dataCenters: (?P<dc>.+), " "hosts: (?P<hosts>.+), # of ranges: (?P<ranges>\d+)(, pull repair: (?P<pullrepair>true|false))?\)")) assert_length_equal(line, 1) line, m = line[0] if supports_pull_repair: assert m.group("pullrepair"), "false" == "Pull repair cannot be enabled through the deprecated API so the pull repair option should always be false." return {"parallelism": m.group("parallelism"), "primary_range": m.group("pr"), "incremental": m.group("incremental"), "job_threads": m.group("jobs"), "column_families": m.group("cfs"), "data_centers": m.group("dc"), "hosts": m.group("hosts"), "ranges": m.group("ranges")}
def test_movement(self): cluster = self.cluster # Create an unbalanced ring cluster.populate(3, tokens=[0, 2**48, 2**62]).start() node1, node2, node3 = cluster.nodelist() session = self.patient_cql_connection(node1) create_ks(session, 'ks', 1) create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=30000, consistency=ConsistencyLevel.ONE) cluster.flush() # Move nodes to balance the cluster def move_node(node, token): mark = node.mark_log() node.move(token) # can't assume 0 is balanced with m3p node.watch_log_for('{} state jump to NORMAL'.format( node.address_for_current_version()), from_mark=mark, timeout=180) time.sleep(3) balancing_tokens = cluster.balanced_tokens(3) move_node(node1, balancing_tokens[0]) move_node(node2, balancing_tokens[1]) move_node(node3, balancing_tokens[2]) time.sleep(1) cluster.cleanup() for node in cluster.nodelist(): # after moving nodes we need to relocate any tokens in the wrong places, and after doing that # we might have overlapping tokens on the disks, so run a major compaction to get balance even if cluster.version() >= '3.2': node.nodetool("relocatesstables") node.nodetool("compact") # Check we can get all the keys for n in range(0, 30000): query_c1c2(session, n, ConsistencyLevel.ONE) # Now the load should be basically even sizes = [node.data_size() for node in [node1, node2, node3]] assert_almost_equal(sizes[0], sizes[1], error=0.05) assert_almost_equal(sizes[0], sizes[2], error=0.05) assert_almost_equal(sizes[1], sizes[2], error=0.05)
def __putget_with_internode_ssl_test(self, internode_compression): cluster = self.cluster debug("***using internode ssl***") generate_ssl_stores(self.test_path) cluster.set_configuration_options({'internode_compression': internode_compression}) cluster.enable_internode_ssl(self.test_path) cluster.populate(3).start() session = self.patient_cql_connection(cluster.nodelist()[0]) create_ks(session, 'ks', 3) create_cf(session, 'cf', compression=None) putget(cluster, session)
def __putget_with_internode_ssl_test(self, internode_compression): cluster = self.cluster logger.debug("***using internode ssl***") generate_ssl_stores(self.fixture_dtest_setup.test_path) cluster.set_configuration_options({'internode_compression': internode_compression}) cluster.enable_internode_ssl(self.fixture_dtest_setup.test_path) cluster.populate(3).start() session = self.patient_cql_connection(cluster.nodelist()[0]) create_ks(session, 'ks', 3) create_cf(session, 'cf', compression=None) putget(cluster, session)
def resumable_decommission_test(self): """ @jira_ticket CASSANDRA-12008 Test decommission operation is resumable """ self.ignore_log_patterns = [r'Streaming error occurred', r'Error while decommissioning node', r'Remote peer 127.0.0.2 failed stream session'] cluster = self.cluster cluster.set_configuration_options(values={'stream_throughput_outbound_megabits_per_sec': 1}) cluster.populate(3, install_byteman=True).start(wait_other_notice=True) node1, node2, node3 = cluster.nodelist() session = self.patient_cql_connection(node2) create_ks(session, 'ks', 2) create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=10000, consistency=ConsistencyLevel.ALL) # Execute first rebuild, should fail with self.assertRaises(ToolError): script = ['./byteman/decommission_failure_inject.btm'] node2.byteman_submit(script) node2.nodetool('decommission') # Make sure previous ToolError is due to decommission node2.watch_log_for('Error while decommissioning node') # Decommission again mark = node2.mark_log() node2.nodetool('decommission') # Check decommision is done and we skipped transfereed ranges node2.watch_log_for('DECOMMISSIONED', from_mark=mark) node2.grep_log("Skipping transferred range .* of keyspace ks, endpoint /127.0.0.3", filename='debug.log') # Check data is correctly forwarded to node1 and node3 cluster.remove(node2) node3.stop(gently=False) session = self.patient_exclusive_cql_connection(node1) session.execute('USE ks') for i in xrange(0, 10000): query_c1c2(session, i, ConsistencyLevel.ONE) node1.stop(gently=False) node3.start() session.shutdown() mark = node3.mark_log() node3.watch_log_for('Starting listening for CQL clients', from_mark=mark) session = self.patient_exclusive_cql_connection(node3) session.execute('USE ks') for i in xrange(0, 10000): query_c1c2(session, i, ConsistencyLevel.ONE)
def test_wait_for_schema(self): #start a one node cluster cluster = self.cluster cluster.populate(1, install_byteman=True) node1 = cluster.nodes['node1'] cluster.start() session = self.patient_cql_connection(node1) create_ks(session, 'ks', 2) create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) empty_size = data_size(node1, 'ks', 'cf') keys = 1000 insert_statement = session.prepare( "INSERT INTO ks.cf (key, c1, c2) VALUES (?, 'value1', 'value2')") execute_concurrent_with_args(session, insert_statement, [['k%d' % k] for k in range(keys)]) node1.flush() node1.compact() initial_size = data_size(node1, 'ks', 'cf') #logger.debug("node1 size for ks.cf before bootstrapping node2: %s" % float(initial_size)) node2 = new_node(cluster) node2.set_configuration_options( values={'request_timeout_in_ms': 10000}) mark = node2.mark_log() node1.byteman_submit(['./byteman/migration_request_sleep.btm']) node2.start(jvm_args=["-Dcassandra.migration_task_wait_in_seconds=20"], set_migration_task=False, wait_for_binary_proto=True) node2.watch_log_for('Prepare completed. Receiving', from_mark=mark, timeout=6) node2.flush() node2.compact() #logger.debug("node2 joined with size for ks.cf : %s" % float(data_size(node2, 'ks','cf'))) node1.stop() rows = session.execute('SELECT count(*) from ks.cf') assert rows[0][0] == 1000 cluster.stop()
def _deprecated_repair_jmx(self, method, arguments): """ * Launch a two node, two DC cluster * Create a keyspace and table * Insert some data * Call the deprecated repair JMX API based on the arguments passed into this method * Check the node log to see if the correct repair was performed based on the jmx args """ cluster = self.cluster logger.debug("Starting cluster..") cluster.populate([1, 1]) node1, node2 = cluster.nodelist() cluster.start() supports_pull_repair = cluster.version() >= LooseVersion('3.10') session = self.patient_cql_connection(node1) create_ks(session, 'ks', 2) create_cf(session, 'cf', read_repair=0.0, columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=1000, consistency=ConsistencyLevel.ALL) # Run repair mbean = make_mbean('db', 'StorageService') with JolokiaAgent(node1) as jmx: # assert repair runs and returns valid cmd number assert jmx.execute_method(mbean, method, arguments) == 1 # wait for log to start node1.watch_log_for("Starting repair command") # get repair parameters from the log line = node1.grep_log((r"Starting repair command #1" + (r" \([^\)]+\)" if cluster.version() >= LooseVersion("3.10") else "") + r", repairing keyspace ks with repair options \(parallelism: (?P<parallelism>\w+), primary range: (?P<pr>\w+), " r"incremental: (?P<incremental>\w+), job threads: (?P<jobs>\d+), ColumnFamilies: (?P<cfs>.+), dataCenters: (?P<dc>.+), " r"hosts: (?P<hosts>.+), # of ranges: (?P<ranges>\d+)(, pull repair: (?P<pullrepair>true|false))?(, ignore unreplicated keyspaces: (?P<ignoreunrepl>true|false))?\)")) assert_length_equal(line, 1) line, m = line[0] if supports_pull_repair: assert m.group("pullrepair"), "false" == "Pull repair cannot be enabled through the deprecated API so the pull repair option should always be false." return {"parallelism": m.group("parallelism"), "primary_range": m.group("pr"), "incremental": m.group("incremental"), "job_threads": m.group("jobs"), "column_families": m.group("cfs"), "data_centers": m.group("dc"), "hosts": m.group("hosts"), "ranges": m.group("ranges")}
def counter_leader_with_partial_view_test(self): """ Test leader election with a starting node. Testing that nodes do not elect as mutation leader a node with a partial view on the cluster. Note that byteman rules can be syntax checked via the following command: sh ./bin/bytemancheck.sh -cp ~/path_to/apache-cassandra-3.0.14-SNAPSHOT.jar ~/path_to/rule.btm @jira_ticket CASSANDRA-13043 """ cluster = self.cluster cluster.populate(3, use_vnodes=True, install_byteman=True) nodes = cluster.nodelist() # Have node 1 and 3 cheat a bit during the leader election for a counter mutation; note that cheating # takes place iff there is an actual chance for node 2 to be picked. nodes[0].update_startup_byteman_script( './byteman/election_counter_leader_favor_node2.btm') nodes[2].update_startup_byteman_script( './byteman/election_counter_leader_favor_node2.btm') cluster.start(wait_for_binary_proto=True) session = self.patient_cql_connection(nodes[0]) create_ks(session, 'ks', 3) create_cf(session, 'cf', validation="CounterColumnType", columns={'c': 'counter'}) # Now stop the node and restart but first install a rule to slow down how fast node 2 will update the list # nodes that are alive nodes[1].stop(wait=True, wait_other_notice=False) nodes[1].update_startup_byteman_script( './byteman/gossip_alive_callback_sleep.btm') nodes[1].start(no_wait=True, wait_other_notice=False) # Until node 2 is fully alive try to force other nodes to pick him as mutation leader. # If CASSANDRA-13043 is fixed, they will not. Otherwise they will do, but since we are slowing down how # fast node 2 updates the list of nodes that are alive, it will just have a partial view on the cluster # and thus will raise an 'UnavailableException' exception. nb_attempts = 50000 for i in xrange(0, nb_attempts): # Change the name of the counter for the sake of randomization q = SimpleStatement( query_string= "UPDATE ks.cf SET c = c + 1 WHERE key = 'counter_%d'" % i, consistency_level=ConsistencyLevel.QUORUM) session.execute(q)
def test_movement(self): cluster = self.cluster # Create an unbalanced ring cluster.populate(3, tokens=[0, 2**48, 2**62]).start() node1, node2, node3 = cluster.nodelist() session = self.patient_cql_connection(node1) create_ks(session, 'ks', 1) create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=30000, consistency=ConsistencyLevel.ONE) cluster.flush() # Move nodes to balance the cluster def move_node(node, token): mark = node.mark_log() node.move(token) # can't assume 0 is balanced with m3p node.watch_log_for('{} state jump to NORMAL'.format(node.address_for_current_version()), from_mark=mark, timeout=180) time.sleep(3) balancing_tokens = cluster.balanced_tokens(3) move_node(node1, balancing_tokens[0]) move_node(node2, balancing_tokens[1]) move_node(node3, balancing_tokens[2]) time.sleep(1) cluster.cleanup() for node in cluster.nodelist(): # after moving nodes we need to relocate any tokens in the wrong places, and after doing that # we might have overlapping tokens on the disks, so run a major compaction to get balance even if cluster.version() >= '3.2': node.nodetool("relocatesstables") node.nodetool("compact") # Check we can get all the keys for n in range(0, 30000): query_c1c2(session, n, ConsistencyLevel.ONE) # Now the load should be basically even sizes = [node.data_size() for node in [node1, node2, node3]] assert_almost_equal(sizes[0], sizes[1], error=0.05) assert_almost_equal(sizes[0], sizes[2], error=0.05) assert_almost_equal(sizes[1], sizes[2], error=0.05)
def test_counter_leader_with_partial_view(self): """ Test leader election with a starting node. Testing that nodes do not elect as mutation leader a node with a partial view on the cluster. Note that byteman rules can be syntax checked via the following command: sh ./bin/bytemancheck.sh -cp ~/path_to/apache-cassandra-3.0.14-SNAPSHOT.jar ~/path_to/rule.btm @jira_ticket CASSANDRA-13043 """ cluster = self.cluster cluster.populate(3, use_vnodes=True, install_byteman=True) nodes = cluster.nodelist() # Have node 1 and 3 cheat a bit during the leader election for a counter mutation; note that cheating # takes place iff there is an actual chance for node 2 to be picked. if cluster.version() < '4.0': nodes[0].update_startup_byteman_script('./byteman/pre4.0/election_counter_leader_favor_node2.btm') nodes[2].update_startup_byteman_script('./byteman/pre4.0/election_counter_leader_favor_node2.btm') else: nodes[0].update_startup_byteman_script('./byteman/4.0/election_counter_leader_favor_node2.btm') nodes[2].update_startup_byteman_script('./byteman/4.0/election_counter_leader_favor_node2.btm') cluster.start(wait_for_binary_proto=True) session = self.patient_cql_connection(nodes[0]) create_ks(session, 'ks', 3) create_cf(session, 'cf', validation="CounterColumnType", columns={'c': 'counter'}) # Now stop the node and restart but first install a rule to slow down how fast node 2 will update the list # nodes that are alive nodes[1].stop(wait=True, wait_other_notice=False) nodes[1].update_startup_byteman_script('./byteman/gossip_alive_callback_sleep.btm') nodes[1].start(no_wait=True, wait_other_notice=False) # Until node 2 is fully alive try to force other nodes to pick him as mutation leader. # If CASSANDRA-13043 is fixed, they will not. Otherwise they will do, but since we are slowing down how # fast node 2 updates the list of nodes that are alive, it will just have a partial view on the cluster # and thus will raise an 'UnavailableException' exception. nb_attempts = 50000 for i in range(0, nb_attempts): # Change the name of the counter for the sake of randomization q = SimpleStatement( query_string="UPDATE ks.cf SET c = c + 1 WHERE key = 'counter_%d'" % i, consistency_level=ConsistencyLevel.QUORUM ) session.execute(q)
def test_movement(self): cluster = self.cluster # Create an unbalanced ring cluster.populate(3, tokens=[0, 2**48, 2**62]).start() node1, node2, node3 = cluster.nodelist() session = self.patient_cql_connection(node1) create_ks(session, 'ks', 1) create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=30000, consistency=ConsistencyLevel.ONE) cluster.flush() # Move nodes to balance the cluster def move_node(node, token): mark = node.mark_log() node.move(token) # can't assume 0 is balanced with m3p node.watch_log_for('{} state jump to NORMAL'.format( node.address_for_current_version()), from_mark=mark, timeout=180) time.sleep(3) balancing_tokens = cluster.balanced_tokens(3) move_node(node1, balancing_tokens[0]) move_node(node2, balancing_tokens[1]) move_node(node3, balancing_tokens[2]) time.sleep(1) cluster.cleanup() # Check we can get all the keys for n in range(0, 30000): query_c1c2(session, n, ConsistencyLevel.ONE) # Now the load should be basically even sizes = [node.data_size() for node in [node1, node2, node3]] assert_almost_equal(sizes[0], sizes[1]) assert_almost_equal(sizes[0], sizes[2]) assert_almost_equal(sizes[1], sizes[2])
def create_schema_40(self, session, ks, compression): create_ks(session, ks, rf=2) create_cf(session, "standard1", compression=compression, compact_storage=self.compact()) create_cf(session, "counter1", key_type='text', compression=compression, columns={ 'column1': 'text', 'v': 'counter static', 'value': 'counter' }, primary_key="key, column1", clustering='column1 ASC', compact_storage=self.compact())
def simple_increment_test(self): """ Simple incrementation test (Created for #3465, that wasn't a bug) """ cluster = self.cluster cluster.populate(3).start() nodes = cluster.nodelist() session = self.patient_cql_connection(nodes[0]) create_ks(session, 'ks', 3) create_cf(session, 'cf', validation="CounterColumnType", columns={'c': 'counter'}) sessions = [self.patient_cql_connection(node, 'ks') for node in nodes] nb_increment = 50 nb_counter = 10 for i in xrange(0, nb_increment): for c in xrange(0, nb_counter): session = sessions[(i + c) % len(nodes)] query = SimpleStatement( "UPDATE cf SET c = c + 1 WHERE key = 'counter%i'" % c, consistency_level=ConsistencyLevel.QUORUM) session.execute(query) session = sessions[i % len(nodes)] keys = ",".join(["'counter%i'" % c for c in xrange(0, nb_counter)]) query = SimpleStatement("SELECT key, c FROM cf WHERE key IN (%s)" % keys, consistency_level=ConsistencyLevel.QUORUM) res = list(session.execute(query)) assert_length_equal(res, nb_counter) for c in xrange(0, nb_counter): self.assertEqual( len(res[c]), 2, "Expecting key and counter for counter {}, got {}".format( c, str(res[c]))) self.assertEqual( res[c][1], i + 1, "Expecting counter {} = {}, got {}".format( c, i + 1, res[c][0]))
def test_wide_row(self): """ Test wide row slices """ cluster = self.cluster cluster.populate(3).start() node1, node2, node3 = cluster.nodelist() session = self.patient_cql_connection(node1) create_ks(session, 'ks', 1) create_cf(session, 'cf') key = 'wide' for x in range(1, 5001): insert_columns(self, session, key, 100, offset=x - 1) for size in (10, 100, 1000): for x in range(1, (50001 - size) // size): query_columns(self, session, key, size, offset=x * size - 1)
def _test_streaming(self, op_zerocopy, op_partial, num_partial, num_zerocopy, compaction_strategy='LeveledCompactionStrategy', num_keys=1000, rf=3, num_nodes=3): keys = num_keys cluster = self.cluster tokens = cluster.balanced_tokens(num_nodes) cluster.set_configuration_options(values={'endpoint_snitch': 'org.apache.cassandra.locator.PropertyFileSnitch'}) cluster.set_configuration_options(values={'num_tokens': 1}) cluster.populate(num_nodes) nodes = cluster.nodelist() for i in range(0, len(nodes)): nodes[i].set_configuration_options(values={'initial_token': tokens[i]}) cluster.start(wait_for_binary_proto=True) session = self.patient_cql_connection(nodes[0]) create_ks(session, name='ks2', rf=rf) create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}, compaction_strategy=compaction_strategy) insert_c1c2(session, n=keys, consistency=ConsistencyLevel.ALL) session_n2 = self.patient_exclusive_cql_connection(nodes[1]) session_n2.execute("TRUNCATE system.available_ranges;") mark = nodes[1].mark_log() nodes[1].nodetool('rebuild -ks ks2') nodes[1].watch_log_for('Completed submission of build tasks', filename='debug.log', timeout=120) zerocopy_streamed_sstable = len( nodes[1].grep_log('.*CassandraEntireSSTableStreamReader.*?Finished receiving Data.*', filename='debug.log', from_mark=mark)) partial_streamed_sstable = len( nodes[1].grep_log('.*CassandraStreamReader.*?Finished receiving file.*', filename='debug.log', from_mark=mark)) assert op_zerocopy(zerocopy_streamed_sstable, num_zerocopy), "%s %s %s" % (num_zerocopy, opmap.get(op_zerocopy), zerocopy_streamed_sstable) assert op_partial(partial_streamed_sstable, num_partial), "%s %s %s" % (num_partial, op_partial, partial_streamed_sstable)
def test_movement(self): cluster = self.cluster # Create an unbalanced ring cluster.populate(3, tokens=[0, 2**48, 2**62]).start() node1, node2, node3 = cluster.nodelist() session = self.patient_cql_connection(node1) create_ks(session, 'ks', 1) create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=30000, consistency=ConsistencyLevel.ONE) cluster.flush() # Move nodes to balance the cluster def move_node(node, token): mark = node.mark_log() node.move(token) # can't assume 0 is balanced with m3p node.watch_log_for('{} state jump to NORMAL'.format(node.address_for_current_version()), from_mark=mark, timeout=180) time.sleep(3) balancing_tokens = cluster.balanced_tokens(3) move_node(node1, balancing_tokens[0]) move_node(node2, balancing_tokens[1]) move_node(node3, balancing_tokens[2]) time.sleep(1) cluster.cleanup() # Check we can get all the keys for n in range(0, 30000): query_c1c2(session, n, ConsistencyLevel.ONE) # Now the load should be basically even sizes = [node.data_size() for node in [node1, node2, node3]] assert_almost_equal(sizes[0], sizes[1]) assert_almost_equal(sizes[0], sizes[2]) assert_almost_equal(sizes[1], sizes[2])
def gc_test(self): """ Test that tombstone purging doesn't bring back deleted data by writing 2 rows to a table with gc_grace=0, deleting one of those rows, then asserting that it isn't present in the results of SELECT *, before and after a flush and compaction. """ cluster = self.cluster cluster.populate(1).start() [node1] = cluster.nodelist() time.sleep(.5) session = self.patient_cql_connection(node1) create_ks(session, 'ks', 1) create_cf(session, 'cf', gc_grace=0, key_type='int', columns={'c1': 'int'}) session.execute('insert into cf (key, c1) values (1,1)') session.execute('insert into cf (key, c1) values (2,1)') node1.flush() self.assertEqual(rows_to_list(session.execute('select * from cf;')), [[1, 1], [2, 1]]) session.execute('delete from cf where key=1') self.assertEqual(rows_to_list(session.execute('select * from cf;')), [[2, 1]]) node1.flush() time.sleep(.5) node1.compact() time.sleep(.5) self.assertEqual(rows_to_list(session.execute('select * from cf;')), [[2, 1]])
def drop_table_reflected_in_size_estimates_test(self): """ A dropped table should result in its entries being removed from size estimates, on both nodes that are up and down at the time of the drop. @jira_ticket CASSANDRA-14905 """ cluster = self.cluster cluster.populate(2).start() node1, node2 = cluster.nodelist() session = self.patient_exclusive_cql_connection(node1) create_ks(session, 'ks1', 2) create_ks(session, 'ks2', 2) create_cf(session, 'ks1.cf1', columns={'c1': 'text', 'c2': 'text'}) create_cf(session, 'ks2.cf1', columns={'c1': 'text', 'c2': 'text'}) create_cf(session, 'ks2.cf2', columns={'c1': 'text', 'c2': 'text'}) node1.nodetool('refreshsizeestimates') node2.nodetool('refreshsizeestimates') node2.stop() session.execute('DROP TABLE ks2.cf1') session.execute('DROP KEYSPACE ks1') node2.start(wait_for_binary_proto=True) session2 = self.patient_exclusive_cql_connection(node2) session.cluster.control_connection.wait_for_schema_agreement() assert_none(session, "SELECT * FROM system.size_estimates WHERE keyspace_name='ks1'") assert_none(session, "SELECT * FROM system.size_estimates WHERE keyspace_name='ks2' AND table_name='cf1'") assert_some(session, "SELECT * FROM system.size_estimates WHERE keyspace_name='ks2' AND table_name='cf2'") assert_none(session2, "SELECT * FROM system.size_estimates WHERE keyspace_name='ks1'") assert_none(session2, "SELECT * FROM system.size_estimates WHERE keyspace_name='ks2' AND table_name='cf1'") assert_some(session, "SELECT * FROM system.size_estimates WHERE keyspace_name='ks2' AND table_name='cf2'")
def decommission_test(self): cluster = self.cluster tokens = cluster.balanced_tokens(4) cluster.populate(4, tokens=tokens).start() node1, node2, node3, node4 = cluster.nodelist() session = self.patient_cql_connection(node1) create_ks(session, 'ks', 2) create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=30000, consistency=ConsistencyLevel.QUORUM) cluster.flush() sizes = [ node.data_size() for node in cluster.nodelist() if node.is_running() ] init_size = sizes[0] assert_almost_equal(*sizes) time.sleep(.5) node4.decommission() node4.stop() cluster.cleanup() time.sleep(.5) # Check we can get all the keys for n in xrange(0, 30000): query_c1c2(session, n, ConsistencyLevel.QUORUM) sizes = [ node.data_size() for node in cluster.nodelist() if node.is_running() ] debug(sizes) assert_almost_equal(sizes[0], sizes[1]) assert_almost_equal((2.0 / 3.0) * sizes[0], sizes[2]) assert_almost_equal(sizes[2], init_size)
def concurrent_decommission_not_allowed_test(self): """ Test concurrent decommission is not allowed """ cluster = self.cluster cluster.set_configuration_options( values={'stream_throughput_outbound_megabits_per_sec': 1}) cluster.populate(2).start(wait_other_notice=True) node1, node2 = cluster.nodelist() session = self.patient_cql_connection(node2) create_ks(session, 'ks', 1) create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=10000, consistency=ConsistencyLevel.ALL) mark = node2.mark_log() def decommission(): node2.nodetool('decommission') # Launch first decommission in a external thread t = Thread(target=decommission) t.start() # Make sure first decommission is initialized before second decommission node2.watch_log_for('DECOMMISSIONING', filename='debug.log') # Launch a second decommission, should fail with self.assertRaises(ToolError): node2.nodetool('decommission') # Check data is correctly forwarded to node1 after node2 is decommissioned t.join() node2.watch_log_for('DECOMMISSIONED', from_mark=mark) session = self.patient_cql_connection(node1) session.execute('USE ks') for n in xrange(0, 10000): query_c1c2(session, n, ConsistencyLevel.ONE)
def test_concurrent_decommission_not_allowed(self): """ Test concurrent decommission is not allowed """ cluster = self.cluster cluster.set_configuration_options(values={'stream_throughput_outbound_megabits_per_sec': 1}) cluster.populate(2).start(wait_other_notice=True) node1, node2 = cluster.nodelist() session = self.patient_cql_connection(node2) create_ks(session, 'ks', 1) create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=10000, consistency=ConsistencyLevel.ALL) mark = node2.mark_log() def decommission(): node2.nodetool('decommission') # Launch first decommission in a external thread t = Thread(target=decommission) t.start() # Make sure first decommission is initialized before second decommission node2.watch_log_for('DECOMMISSIONING', filename='debug.log') # Launch a second decommission, should fail with pytest.raises(ToolError): node2.nodetool('decommission') # Check data is correctly forwarded to node1 after node2 is decommissioned t.join() node2.watch_log_for('DECOMMISSIONED', from_mark=mark) session = self.patient_cql_connection(node1) session.execute('USE ks') for n in range(0, 10000): query_c1c2(session, n, ConsistencyLevel.ONE)
def short_read_delete_test(self): """ Test short reads ultimately leaving no columns alive [#4000] """ cluster = self.cluster # Disable hinted handoff and set batch commit log so this doesn't # interfere with the test cluster.set_configuration_options( values={'hinted_handoff_enabled': False}) cluster.set_batch_commitlog(enabled=True) cluster.populate(2).start(wait_other_notice=True) node1, node2 = cluster.nodelist() session = self.patient_cql_connection(node1) create_ks(session, 'ks', 3) create_cf(session, 'cf', read_repair=0.0) # insert 2 columns in one row insert_columns(self, session, 0, 2) # Delete the row while first node is dead node1.flush() node1.stop(wait_other_notice=True) session = self.patient_cql_connection(node2, 'ks') query = SimpleStatement('DELETE FROM cf WHERE key=\'k0\'', consistency_level=ConsistencyLevel.ONE) session.execute(query) node1.start(wait_other_notice=True) # Query first column session = self.patient_cql_connection(node1, 'ks') assert_none(session, "SELECT c, v FROM cf WHERE key=\'k0\' LIMIT 1", cl=ConsistencyLevel.QUORUM)
def drop_table_reflected_in_size_estimates_test(self): """ A dropped table should result in its entries being removed from size estimates, on both nodes that are up and down at the time of the drop. @jira_ticket CASSANDRA-14905 """ cluster = self.cluster cluster.populate(2).start() node1, node2 = cluster.nodelist() session = self.patient_exclusive_cql_connection(node1) create_ks(session, 'ks1', 2) create_ks(session, 'ks2', 2) create_cf(session, 'ks1.cf1', columns={'c1': 'text', 'c2': 'text'}) create_cf(session, 'ks2.cf1', columns={'c1': 'text', 'c2': 'text'}) create_cf(session, 'ks2.cf2', columns={'c1': 'text', 'c2': 'text'}) node1.nodetool('refreshsizeestimates') node2.nodetool('refreshsizeestimates') node2.stop() session.execute('DROP TABLE ks2.cf1') session.execute('DROP KEYSPACE ks1') node2.start(wait_for_binary_proto=True) session2 = self.patient_exclusive_cql_connection(node2) session.cluster.control_connection.wait_for_schema_agreement() assert_none( session, "SELECT * FROM system.size_estimates WHERE keyspace_name='ks1'") assert_none( session, "SELECT * FROM system.size_estimates WHERE keyspace_name='ks2' AND table_name='cf1'" ) assert_some( session, "SELECT * FROM system.size_estimates WHERE keyspace_name='ks2' AND table_name='cf2'" ) assert_none( session2, "SELECT * FROM system.size_estimates WHERE keyspace_name='ks1'") assert_none( session2, "SELECT * FROM system.size_estimates WHERE keyspace_name='ks2' AND table_name='cf1'" ) assert_some( session, "SELECT * FROM system.size_estimates WHERE keyspace_name='ks2' AND table_name='cf2'" )
def short_read_test(self): """ @jira_ticket CASSANDRA-9460 """ cluster = self.cluster # Disable hinted handoff and set batch commit log so this doesn't # interfer with the test cluster.set_configuration_options(values={'hinted_handoff_enabled': False}) cluster.set_batch_commitlog(enabled=True) cluster.populate(3).start(wait_other_notice=True) node1, node2, node3 = cluster.nodelist() session = self.patient_cql_connection(node1) create_ks(session, 'ks', 3) create_cf(session, 'cf', read_repair=0.0) normal_key = 'normal' reversed_key = 'reversed' # Repeat this test 10 times to make it more easy to spot a null pointer exception caused by a race, see CASSANDRA-9460 for k in xrange(10): # insert 9 columns in two rows insert_columns(self, session, normal_key, 9) insert_columns(self, session, reversed_key, 9) # Delete 3 first columns (and 3 last columns, for the reversed version) with a different node dead each time for node, column_number_to_delete in zip(range(1, 4), range(3)): self.stop_node(node) self.delete(node, normal_key, column_number_to_delete) self.delete(node, reversed_key, 8 - column_number_to_delete) self.restart_node(node) # Query 3 firsts columns in normal order session = self.patient_cql_connection(node1, 'ks') query = SimpleStatement( 'SELECT c, v FROM cf WHERE key=\'k{}\' LIMIT 3'.format(normal_key), consistency_level=ConsistencyLevel.QUORUM) rows = list(session.execute(query)) res = rows assert_length_equal(res, 3) # value 0, 1 and 2 have been deleted for i in xrange(1, 4): self.assertEqual('value{}'.format(i + 2), res[i - 1][1]) # Query 3 firsts columns in reverse order session = self.patient_cql_connection(node1, 'ks') query = SimpleStatement( 'SELECT c, v FROM cf WHERE key=\'k{}\' ORDER BY c DESC LIMIT 3'.format(reversed_key), consistency_level=ConsistencyLevel.QUORUM) rows = list(session.execute(query)) res = rows assert_length_equal(res, 3) # value 6, 7 and 8 have been deleted for i in xrange(0, 3): self.assertEqual('value{}'.format(5 - i), res[i][1]) session.execute('TRUNCATE cf')
def _base_bootstrap_test(self, bootstrap=None, bootstrap_from_version=None, enable_ssl=None): def default_bootstrap(cluster, token): node2 = new_node(cluster) node2.set_configuration_options(values={'initial_token': token}) node2.start(wait_for_binary_proto=True) return node2 if bootstrap is None: bootstrap = default_bootstrap cluster = self.cluster if enable_ssl: logger.debug("***using internode ssl***") generate_ssl_stores(self.fixture_dtest_setup.test_path) cluster.enable_internode_ssl(self.fixture_dtest_setup.test_path) tokens = cluster.balanced_tokens(2) cluster.set_configuration_options(values={'num_tokens': 1}) logger.debug("[node1, node2] tokens: %r" % (tokens, )) keys = 10000 # Create a single node cluster cluster.populate(1) node1 = cluster.nodelist()[0] if bootstrap_from_version: logger.debug("starting source node on version {}".format( bootstrap_from_version)) node1.set_install_dir(version=bootstrap_from_version) node1.set_configuration_options(values={'initial_token': tokens[0]}) cluster.start(wait_other_notice=True) session = self.patient_cql_connection(node1) create_ks(session, 'ks', 1) create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) # record the size before inserting any of our own data empty_size = data_size(node1, 'ks', 'cf') logger.debug("node1 empty size for ks.cf: %s" % float(empty_size)) insert_statement = session.prepare( "INSERT INTO ks.cf (key, c1, c2) VALUES (?, 'value1', 'value2')") execute_concurrent_with_args(session, insert_statement, [['k%d' % k] for k in range(keys)]) node1.flush() node1.compact() initial_size = data_size(node1, 'ks', 'cf') logger.debug("node1 size for ks.cf before bootstrapping node2: %s" % float(initial_size)) # Reads inserted data all during the bootstrap process. We shouldn't # get any error query_c1c2(session, random.randint(0, keys - 1), ConsistencyLevel.ONE) session.shutdown() # Bootstrapping a new node in the current version node2 = bootstrap(cluster, tokens[1]) node2.compact() node1.cleanup() logger.debug("node1 size for ks.cf after cleanup: %s" % float(data_size(node1, 'ks', 'cf'))) node1.compact() logger.debug("node1 size for ks.cf after compacting: %s" % float(data_size(node1, 'ks', 'cf'))) logger.debug("node2 size for ks.cf after compacting: %s" % float(data_size(node2, 'ks', 'cf'))) size1 = float(data_size(node1, 'ks', 'cf')) size2 = float(data_size(node2, 'ks', 'cf')) assert_almost_equal(size1, size2, error=0.3) assert_almost_equal(float(initial_size - empty_size), 2 * (size1 - float(empty_size))) assert_bootstrap_state(self, node2, 'COMPLETED')
def create_users(self, session): columns = {"password": "******", "gender": "varchar", "session_token": "varchar", "state": "varchar", "birth_year": "bigint"} create_cf(session, 'users', columns=columns)
def create_c1c2_table(tester, session, read_repair=None): create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}, read_repair=read_repair)
def test_size_estimates_multidc(self): """ Test that primary ranges are correctly generated on system.size_estimates for multi-dc, multi-ks scenario @jira_ticket CASSANDRA-9639 """ logger.debug("Creating cluster") cluster = self.cluster cluster.set_configuration_options(values={'num_tokens': 2}) cluster.populate([2, 1]) node1_1, node1_2, node2_1 = cluster.nodelist() logger.debug("Setting tokens") node1_tokens, node2_tokens, node3_tokens = ['-6639341390736545756,-2688160409776496397', '-2506475074448728501,8473270337963525440', '-3736333188524231709,8673615181726552074'] node1_1.set_configuration_options(values={'initial_token': node1_tokens}) node1_2.set_configuration_options(values={'initial_token': node2_tokens}) node2_1.set_configuration_options(values={'initial_token': node3_tokens}) cluster.set_configuration_options(values={'num_tokens': 2}) logger.debug("Starting cluster") cluster.start() out, _, _ = node1_1.nodetool('ring') logger.debug("Nodetool ring output {}".format(out)) logger.debug("Creating keyspaces") session = self.patient_cql_connection(node1_1) create_ks(session, 'ks1', 3) create_ks(session, 'ks2', {'dc1': 2}) create_cf(session, 'ks1.cf1', columns={'c1': 'text', 'c2': 'text'}) create_cf(session, 'ks2.cf2', columns={'c1': 'text', 'c2': 'text'}) logger.debug("Refreshing size estimates") node1_1.nodetool('refreshsizeestimates') node1_2.nodetool('refreshsizeestimates') node2_1.nodetool('refreshsizeestimates') """ CREATE KEYSPACE ks1 WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '3'} CREATE KEYSPACE ks2 WITH replication = {'class': 'NetworkTopologyStrategy', 'dc1': '2'} AND durable_writes = true; Datacenter: dc1 ========== Address Token 8473270337963525440 127.0.0.1 -6639341390736545756 127.0.0.1 -2688160409776496397 127.0.0.2 -2506475074448728501 127.0.0.2 8473270337963525440 Datacenter: dc2 ========== Address Token 8673615181726552074 127.0.0.3 -3736333188524231709 127.0.0.3 8673615181726552074 """ logger.debug("Checking node1_1 size_estimates primary ranges") session = self.patient_exclusive_cql_connection(node1_1) assert_all(session, "SELECT range_start, range_end FROM system.size_estimates " "WHERE keyspace_name = 'ks1'", [['-3736333188524231709', '-2688160409776496397'], ['-9223372036854775808', '-6639341390736545756'], ['8673615181726552074', '-9223372036854775808']]) assert_all(session, "SELECT range_start, range_end FROM system.size_estimates " "WHERE keyspace_name = 'ks2'", [['-3736333188524231709', '-2688160409776496397'], ['-6639341390736545756', '-3736333188524231709'], ['-9223372036854775808', '-6639341390736545756'], ['8473270337963525440', '8673615181726552074'], ['8673615181726552074', '-9223372036854775808']]) logger.debug("Checking node1_2 size_estimates primary ranges") session = self.patient_exclusive_cql_connection(node1_2) assert_all(session, "SELECT range_start, range_end FROM system.size_estimates " "WHERE keyspace_name = 'ks1'", [['-2506475074448728501', '8473270337963525440'], ['-2688160409776496397', '-2506475074448728501']]) assert_all(session, "SELECT range_start, range_end FROM system.size_estimates " "WHERE keyspace_name = 'ks2'", [['-2506475074448728501', '8473270337963525440'], ['-2688160409776496397', '-2506475074448728501']]) logger.debug("Checking node2_1 size_estimates primary ranges") session = self.patient_exclusive_cql_connection(node2_1) assert_all(session, "SELECT range_start, range_end FROM system.size_estimates " "WHERE keyspace_name = 'ks1'", [['-6639341390736545756', '-3736333188524231709'], ['8473270337963525440', '8673615181726552074']]) assert_none(session, "SELECT range_start, range_end FROM system.size_estimates " "WHERE keyspace_name = 'ks2'")
def multiple_repair_test(self): """ * Launch a three node cluster * Create a keyspace with RF 3 and a table * Insert 49 rows * Stop node3 * Insert 50 more rows * Restart node3 * Issue an incremental repair on node3 * Stop node2 * Insert a final50 rows * Restart node2 * Issue an incremental repair on node2 * Replace node3 with a new node * Verify data integrity # TODO: Several more verifications of data need to be interspersed throughout the test. The final assertion is insufficient. @jira_ticket CASSANDRA-10644 """ cluster = self.cluster cluster.populate(3).start() node1, node2, node3 = cluster.nodelist() session = self.patient_cql_connection(node1) create_ks(session, 'ks', 3) create_cf(session, 'cf', read_repair=0.0, columns={'c1': 'text', 'c2': 'text'}) debug("insert data") insert_c1c2(session, keys=range(1, 50), consistency=ConsistencyLevel.ALL) node1.flush() debug("bringing down node 3") node3.flush() node3.stop(gently=False) debug("inserting additional data into node 1 and 2") insert_c1c2(session, keys=range(50, 100), consistency=ConsistencyLevel.TWO) node1.flush() node2.flush() debug("restarting and repairing node 3") node3.start(wait_for_binary_proto=True) if cluster.version() >= "2.2": node3.repair() else: node3.nodetool("repair -par -inc") # wait stream handlers to be closed on windows # after session is finished (See CASSANDRA-10644) if is_win: time.sleep(2) debug("stopping node 2") node2.stop(gently=False) debug("inserting data in nodes 1 and 3") insert_c1c2(session, keys=range(100, 150), consistency=ConsistencyLevel.TWO) node1.flush() node3.flush() debug("start and repair node 2") node2.start(wait_for_binary_proto=True) if cluster.version() >= "2.2": node2.repair() else: node2.nodetool("repair -par -inc") debug("replace node and check data integrity") node3.stop(gently=False) node5 = Node('node5', cluster, True, ('127.0.0.5', 9160), ('127.0.0.5', 7000), '7500', '0', None, ('127.0.0.5', 9042)) cluster.add(node5, False) node5.start(replace_address='127.0.0.3', wait_other_notice=True) assert_one(session, "SELECT COUNT(*) FROM ks.cf LIMIT 200", [149])
def _putget(self, cluster, session, ks='ks', cf='cf'): create_ks(session, ks, 1) create_cf(session, cf, compression=None) putget(cluster, session, cl=ConsistencyLevel.ONE)
def resumable_rebuild_test(self): """ @jira_ticket CASSANDRA-10810 Test rebuild operation is resumable """ self.ignore_log_patterns = list(self.ignore_log_patterns) + [ r'Error while rebuilding node', r'Streaming error occurred on session with peer 127.0.0.3', r'Remote peer 127.0.0.3 failed stream session' ] cluster = self.cluster cluster.set_configuration_options(values={ 'endpoint_snitch': 'org.apache.cassandra.locator.PropertyFileSnitch' }) # Create 2 nodes on dc1 node1 = cluster.create_node('node1', False, ('127.0.0.1', 9160), ('127.0.0.1', 7000), '7100', '2000', None, binary_interface=('127.0.0.1', 9042)) node2 = cluster.create_node('node2', False, ('127.0.0.2', 9160), ('127.0.0.2', 7000), '7200', '2001', None, binary_interface=('127.0.0.2', 9042)) cluster.add(node1, True, data_center='dc1') cluster.add(node2, True, data_center='dc1') node1.start(wait_for_binary_proto=True) node2.start(wait_for_binary_proto=True) # Insert data into node1 and node2 session = self.patient_exclusive_cql_connection(node1) create_ks(session, 'ks', {'dc1': 1}) create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=10000, consistency=ConsistencyLevel.ALL) key = list(range(10000, 20000)) session = self.patient_exclusive_cql_connection(node2) session.execute('USE ks') insert_c1c2(session, keys=key, consistency=ConsistencyLevel.ALL) session.shutdown() # Create a new node3 on dc2 node3 = cluster.create_node('node3', False, ('127.0.0.3', 9160), ('127.0.0.3', 7000), '7300', '2002', None, binary_interface=('127.0.0.3', 9042), byteman_port='8300') cluster.add(node3, False, data_center='dc2') node3.start(wait_other_notice=False, wait_for_binary_proto=True) # Wait for snitch to be refreshed time.sleep(5) # Alter necessary keyspace for rebuild operation session = self.patient_exclusive_cql_connection(node3) session.execute( "ALTER KEYSPACE ks WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};" ) session.execute( "ALTER KEYSPACE system_auth WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};" ) # Path to byteman script which makes the streaming to node2 throw an exception, making rebuild fail if cluster.version() < '4.0': script = ['./byteman/pre4.0/inject_failure_streaming_to_node2.btm'] else: script = ['./byteman/4.0/inject_failure_streaming_to_node2.btm'] node3.byteman_submit(script) # First rebuild must fail and data must be incomplete with self.assertRaises(ToolError, msg='Unexpected: SUCCEED'): debug('Executing first rebuild -> '), node3.nodetool('rebuild dc1') debug('Expected: FAILED') session.execute('USE ks') with self.assertRaises(AssertionError, msg='Unexpected: COMPLETE'): debug('Checking data is complete -> '), for i in xrange(0, 20000): query_c1c2(session, i, ConsistencyLevel.LOCAL_ONE) debug('Expected: INCOMPLETE') debug('Executing second rebuild -> '), node3.nodetool('rebuild dc1') debug('Expected: SUCCEED') # Check all streaming sessions completed, streamed ranges are skipped and verify streamed data node3.watch_log_for('All sessions completed') node3.watch_log_for('Skipping streaming those ranges.') debug('Checking data is complete -> '), for i in xrange(0, 20000): query_c1c2(session, i, ConsistencyLevel.LOCAL_ONE) debug('Expected: COMPLETE')
def test_multiple_repair(self): """ * Launch a three node cluster * Create a keyspace with RF 3 and a table * Insert 49 rows * Stop node3 * Insert 50 more rows * Restart node3 * Issue an incremental repair on node3 * Stop node2 * Insert a final50 rows * Restart node2 * Issue an incremental repair on node2 * Replace node3 with a new node * Verify data integrity # TODO: Several more verifications of data need to be interspersed throughout the test. The final assertion is insufficient. @jira_ticket CASSANDRA-10644 """ cluster = self.cluster cluster.populate(3).start() node1, node2, node3 = cluster.nodelist() session = self.patient_cql_connection(node1) create_ks(session, 'ks', 3) if cluster.version() < '4.0': create_cf(session, 'cf', read_repair=0.0, columns={ 'c1': 'text', 'c2': 'text' }) else: create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) logger.debug("insert data") insert_c1c2(session, keys=list(range(1, 50)), consistency=ConsistencyLevel.ALL) node1.flush() logger.debug("bringing down node 3") node3.flush() node3.stop(gently=False) logger.debug("inserting additional data into node 1 and 2") insert_c1c2(session, keys=list(range(50, 100)), consistency=ConsistencyLevel.TWO) node1.flush() node2.flush() logger.debug("restarting and repairing node 3") node3.start(wait_for_binary_proto=True) if cluster.version() >= "2.2": node3.repair() else: node3.nodetool("repair -par -inc") # wait stream handlers to be closed on windows # after session is finished (See CASSANDRA-10644) if is_win: time.sleep(2) logger.debug("stopping node 2") node2.stop(gently=False) logger.debug("inserting data in nodes 1 and 3") insert_c1c2(session, keys=list(range(100, 150)), consistency=ConsistencyLevel.TWO) node1.flush() node3.flush() logger.debug("start and repair node 2") node2.start(wait_for_binary_proto=True) if cluster.version() >= "2.2": node2.repair() else: node2.nodetool("repair -par -inc") logger.debug("replace node and check data integrity") node3.stop(gently=False) node5 = Node('node5', cluster, True, ('127.0.0.5', 9160), ('127.0.0.5', 7000), '7500', '0', None, ('127.0.0.5', 9042)) cluster.add(node5, False, data_center="dc1") node5.start(replace_address='127.0.0.3') assert_one(session, "SELECT COUNT(*) FROM ks.cf LIMIT 200", [149])
def rebuild_ranges_test(self): """ @jira_ticket CASSANDRA-10406 """ keys = 1000 cluster = self.cluster tokens = cluster.balanced_tokens_across_dcs(['dc1', 'dc2']) cluster.set_configuration_options(values={ 'endpoint_snitch': 'org.apache.cassandra.locator.PropertyFileSnitch' }) cluster.set_configuration_options(values={'num_tokens': 1}) node1 = cluster.create_node('node1', False, ('127.0.0.1', 9160), ('127.0.0.1', 7000), '7100', '2000', tokens[0], binary_interface=('127.0.0.1', 9042)) node1.set_configuration_options(values={'initial_token': tokens[0]}) cluster.add(node1, True, data_center='dc1') node1 = cluster.nodelist()[0] # start node in dc1 node1.start(wait_for_binary_proto=True) # populate data in dc1 session = self.patient_exclusive_cql_connection(node1) # ks1 will be rebuilt in node2 create_ks(session, 'ks1', {'dc1': 1}) create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=keys, consistency=ConsistencyLevel.ALL) # ks2 will not be rebuilt in node2 create_ks(session, 'ks2', {'dc1': 1}) create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=keys, consistency=ConsistencyLevel.ALL) session.shutdown() # Bootstraping a new node in dc2 with auto_bootstrap: false node2 = cluster.create_node('node2', False, ('127.0.0.2', 9160), ('127.0.0.2', 7000), '7200', '2001', tokens[1], binary_interface=('127.0.0.2', 9042)) node2.set_configuration_options(values={'initial_token': tokens[1]}) cluster.add(node2, False, data_center='dc2') node2.start(wait_other_notice=True, wait_for_binary_proto=True) # wait for snitch to reload time.sleep(60) # alter keyspace to replicate to dc2 session = self.patient_exclusive_cql_connection(node2) session.execute( "ALTER KEYSPACE ks1 WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};" ) session.execute( "ALTER KEYSPACE ks2 WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};" ) session.execute('USE ks1') # rebuild only ks1 with range that is node1's replica node2.nodetool('rebuild -ks ks1 -ts (%s,%s] dc1' % (tokens[1], str(pow(2, 63) - 1))) # check data is sent by stopping node1 node1.stop() for i in xrange(0, keys): query_c1c2(session, i, ConsistencyLevel.ONE) # ks2 should not be streamed session.execute('USE ks2') for i in xrange(0, keys): query_c1c2(session, i, ConsistencyLevel.ONE, tolerate_missing=True, must_be_missing=True)
def simple_rebuild_test(self): """ @jira_ticket CASSANDRA-9119 Test rebuild from other dc works as expected. """ keys = 1000 cluster = self.cluster cluster.set_configuration_options(values={ 'endpoint_snitch': 'org.apache.cassandra.locator.PropertyFileSnitch' }) node1 = cluster.create_node('node1', False, None, ('127.0.0.1', 7000), '7100', '2000', None, binary_interface=('127.0.0.1', 9042)) cluster.add(node1, True, data_center='dc1') # start node in dc1 node1.start(wait_for_binary_proto=True) # populate data in dc1 session = self.patient_exclusive_cql_connection(node1) create_ks(session, 'ks', {'dc1': 1}) create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=keys, consistency=ConsistencyLevel.LOCAL_ONE) # check data for i in xrange(0, keys): query_c1c2(session, i, ConsistencyLevel.LOCAL_ONE) session.shutdown() # Bootstrapping a new node in dc2 with auto_bootstrap: false node2 = cluster.create_node('node2', False, ('127.0.0.2', 9160), ('127.0.0.2', 7000), '7200', '2001', None, binary_interface=('127.0.0.2', 9042)) cluster.add(node2, False, data_center='dc2') node2.start(wait_other_notice=True, wait_for_binary_proto=True) # wait for snitch to reload time.sleep(60) # alter keyspace to replicate to dc2 session = self.patient_exclusive_cql_connection(node2) session.execute( "ALTER KEYSPACE ks WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};" ) if self.cluster.version() >= '2.2': # alter system_auth -- rebuilding it no longer possible after # CASSANDRA-11848 prevented local node from being considered a source # Only do this on 2.2+, because on 2.1, this keyspace only # exists if auth is enabled, which it isn't in this test session.execute( "ALTER KEYSPACE system_auth WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1};" ) session.execute('USE ks') self.rebuild_errors = 0 # rebuild dc2 from dc1 def rebuild(): try: node2.nodetool('rebuild dc1') except ToolError as e: if 'Node is still rebuilding' in e.stdout: self.rebuild_errors += 1 else: raise e class Runner(Thread): def __init__(self, func): Thread.__init__(self) self.func = func self.thread_exc_info = None def run(self): """ Closes over self to catch any exceptions raised by func and register them at self.thread_exc_info Based on http://stackoverflow.com/a/1854263 """ try: self.func() except Exception: import sys self.thread_exc_info = sys.exc_info() cmd1 = Runner(rebuild) cmd1.start() # concurrent rebuild should not be allowed (CASSANDRA-9119) # (following sleep is needed to avoid conflict in 'nodetool()' method setting up env.) time.sleep(.1) # we don't need to manually raise exeptions here -- already handled rebuild() cmd1.join() # manually raise exception from cmd1 thread # see http://stackoverflow.com/a/1854263 if cmd1.thread_exc_info is not None: raise cmd1.thread_exc_info[1], None, cmd1.thread_exc_info[2] # exactly 1 of the two nodetool calls should fail # usually it will be the one in the main thread, # but occasionally it wins the race with the one in the secondary thread, # so we check that one succeeded and the other failed self.assertEqual( self.rebuild_errors, 1, msg= 'rebuild errors should be 1, but found {}. Concurrent rebuild should not be allowed, but one rebuild command should have succeeded.' .format(self.rebuild_errors)) # check data for i in xrange(0, keys): query_c1c2(session, i, ConsistencyLevel.LOCAL_ONE)
def create_schema(self, session, ks, compression): create_ks(session, ks, rf=2) create_cf(session, "standard1", compression=compression, compact_storage=self.compact()) create_cf(session, "counter1", compression=compression, columns={'v': 'counter'}, compact_storage=self.compact())
def rebuild_with_specific_sources_test(self): """ @jira_ticket CASSANDRA-9875 Verifies that an operator can specify specific sources to use when rebuilding. 1. Set up a 2 node cluster across dc1 and dc2 2. Create new keyspaces with replication factor 2 (one replica in each datacenter) 4. Populate nodes with data 5. Create a new node in dc3 and update the keyspace replication 6. Run rebuild on the new node with a specific source in dc2 7. Assert that streaming only occurred between the new node and the specified source 8. Assert that the rebuild was successful by checking the data """ keys = 1000 cluster = self.cluster tokens = cluster.balanced_tokens_across_dcs(['dc1', 'dc2', 'dc3']) cluster.set_configuration_options(values={ 'endpoint_snitch': 'org.apache.cassandra.locator.PropertyFileSnitch' }) cluster.set_configuration_options(values={'num_tokens': 1}) cluster.populate([1, 1], tokens=tokens[:2]) node1, node2 = cluster.nodelist() cluster.start(wait_for_binary_proto=True) # populate data in dc1, dc2 session = self.patient_exclusive_cql_connection(node1) # ks1 will be rebuilt in node3 create_ks(session, 'ks1', {'dc1': 1, 'dc2': 1}) create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=keys, consistency=ConsistencyLevel.ALL) # ks2 will not be rebuilt in node3 create_ks(session, 'ks2', {'dc1': 1, 'dc2': 1}) create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=keys, consistency=ConsistencyLevel.ALL) session.shutdown() # bootstrap a new node in dc3 with auto_bootstrap: false node3 = cluster.create_node('node3', False, ('127.0.0.3', 9160), ('127.0.0.3', 7000), '7300', '2002', tokens[2], binary_interface=('127.0.0.3', 9042)) cluster.add(node3, False, data_center='dc3') node3.start(wait_other_notice=True, wait_for_binary_proto=True) # wait for snitch to reload time.sleep(60) # alter keyspace to replicate to dc3 session = self.patient_exclusive_cql_connection(node3) session.execute( "ALTER KEYSPACE ks1 WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1, 'dc3':1};" ) session.execute( "ALTER KEYSPACE ks2 WITH REPLICATION = {'class':'NetworkTopologyStrategy', 'dc1':1, 'dc2':1, 'dc3':1};" ) session.execute('USE ks1') node2_address = node2.network_interfaces['binary'][0] node3_address = node3.network_interfaces['binary'][0] # rebuild only ks1, restricting the source to node2 node3.nodetool('rebuild -ks ks1 -ts (%s,%s] -s %s' % (tokens[2], str(pow(2, 63) - 1), node2_address)) # verify that node2 streamed to node3 log_matches = node2.grep_log('Session with /%s is complete' % node3_address) self.assertTrue(len(log_matches) > 0) # verify that node1 did not participate log_matches = node1.grep_log('streaming plan for Rebuild') self.assertEqual(len(log_matches), 0) # check data is sent by stopping node1, node2 node1.stop() node2.stop() for i in xrange(0, keys): query_c1c2(session, i, ConsistencyLevel.ONE) # ks2 should not be streamed session.execute('USE ks2') for i in xrange(0, keys): query_c1c2(session, i, ConsistencyLevel.ONE, tolerate_missing=True, must_be_missing=True)
def create_schema(self, session, ks, compression): create_ks(session, ks, rf=2) create_cf(session, "standard1", compression=compression, compact_storage=self.compact) create_cf(session, "counter1", compression=compression, columns={'v': 'counter'}, compact_storage=self.compact)
def _base_bootstrap_test(self, bootstrap=None, bootstrap_from_version=None, enable_ssl=None): def default_bootstrap(cluster, token): node2 = new_node(cluster) node2.set_configuration_options(values={'initial_token': token}) node2.start(wait_for_binary_proto=True) return node2 if bootstrap is None: bootstrap = default_bootstrap cluster = self.cluster if enable_ssl: logger.debug("***using internode ssl***") generate_ssl_stores(self.fixture_dtest_setup.test_path) cluster.enable_internode_ssl(self.fixture_dtest_setup.test_path) tokens = cluster.balanced_tokens(2) cluster.set_configuration_options(values={'num_tokens': 1}) logger.debug("[node1, node2] tokens: %r" % (tokens,)) keys = 10000 # Create a single node cluster cluster.populate(1) node1 = cluster.nodelist()[0] if bootstrap_from_version: logger.debug("starting source node on version {}".format(bootstrap_from_version)) node1.set_install_dir(version=bootstrap_from_version) node1.set_configuration_options(values={'initial_token': tokens[0]}) cluster.start(wait_other_notice=True) session = self.patient_cql_connection(node1) create_ks(session, 'ks', 1) create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) # record the size before inserting any of our own data empty_size = node1.data_size() logger.debug("node1 empty size : %s" % float(empty_size)) insert_statement = session.prepare("INSERT INTO ks.cf (key, c1, c2) VALUES (?, 'value1', 'value2')") execute_concurrent_with_args(session, insert_statement, [['k%d' % k] for k in range(keys)]) node1.flush() node1.compact() initial_size = node1.data_size() logger.debug("node1 size before bootstrapping node2: %s" % float(initial_size)) # Reads inserted data all during the bootstrap process. We shouldn't # get any error query_c1c2(session, random.randint(0, keys - 1), ConsistencyLevel.ONE) session.shutdown() # Bootstrapping a new node in the current version node2 = bootstrap(cluster, tokens[1]) node2.compact() node1.cleanup() logger.debug("node1 size after cleanup: %s" % float(node1.data_size())) node1.compact() logger.debug("node1 size after compacting: %s" % float(node1.data_size())) logger.debug("node2 size after compacting: %s" % float(node2.data_size())) size1 = float(node1.data_size()) size2 = float(node2.data_size()) assert_almost_equal(size1, size2, error=0.3) assert_almost_equal(float(initial_size - empty_size), 2 * (size1 - float(empty_size))) assert_bootstrap_state(self, node2, 'COMPLETED')
def resumable_decommission_test(self): """ @jira_ticket CASSANDRA-12008 Test decommission operation is resumable """ self.ignore_log_patterns = [ r'Streaming error occurred', r'Error while decommissioning node', r'Remote peer 127.0.0.2 failed stream session' ] cluster = self.cluster cluster.set_configuration_options( values={'stream_throughput_outbound_megabits_per_sec': 1}) cluster.populate(3, install_byteman=True).start(wait_other_notice=True) node1, node2, node3 = cluster.nodelist() session = self.patient_cql_connection(node2) # reduce system_distributed RF to 2 so we don't require forceful decommission session.execute( "ALTER KEYSPACE system_distributed WITH REPLICATION = {'class':'SimpleStrategy', 'replication_factor':'2'};" ) create_ks(session, 'ks', 2) create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=10000, consistency=ConsistencyLevel.ALL) # Execute first rebuild, should fail with self.assertRaises(ToolError): if cluster.version() >= '4.0': script = ['./byteman/4.0/decommission_failure_inject.btm'] else: script = ['./byteman/pre4.0/decommission_failure_inject.btm'] node2.byteman_submit(script) node2.nodetool('decommission') # Make sure previous ToolError is due to decommission node2.watch_log_for('Error while decommissioning node') # Decommission again mark = node2.mark_log() node2.nodetool('decommission') # Check decommision is done and we skipped transfereed ranges node2.watch_log_for('DECOMMISSIONED', from_mark=mark) node2.grep_log( "Skipping transferred range .* of keyspace ks, endpoint /127.0.0.3", filename='debug.log') # Check data is correctly forwarded to node1 and node3 cluster.remove(node2) node3.stop(gently=False) session = self.patient_exclusive_cql_connection(node1) session.execute('USE ks') for i in xrange(0, 10000): query_c1c2(session, i, ConsistencyLevel.ONE) node1.stop(gently=False) node3.start() session.shutdown() mark = node3.mark_log() node3.watch_log_for('Starting listening for CQL clients', from_mark=mark) session = self.patient_exclusive_cql_connection(node3) session.execute('USE ks') for i in xrange(0, 10000): query_c1c2(session, i, ConsistencyLevel.ONE)