def prepare(self, create_keyspace=True, nodes=3, rf=3, protocol_version=3, jvm_args=None, random_partitioner=False, **kwargs): if jvm_args is None: jvm_args = [] jvm_args.append('-Dcassandra.wait_for_tracing_events_timeout_secs=15') cluster = self.cluster if random_partitioner: cluster.set_partitioner( "org.apache.cassandra.dht.RandomPartitioner") else: cluster.set_partitioner( "org.apache.cassandra.dht.Murmur3Partitioner") cluster.populate(nodes) node1 = cluster.nodelist()[0] remove_perf_disable_shared_mem(node1) # necessary for jmx cluster.start(wait_for_binary_proto=True, jvm_args=jvm_args) session = self.patient_cql_connection( node1, protocol_version=protocol_version) if create_keyspace: create_ks(session, 'ks', rf) return session
def blacklisted_directory_test(self): cluster = self.cluster cluster.set_datadir_count(3) cluster.populate(1) [node] = cluster.nodelist() remove_perf_disable_shared_mem(node) cluster.start(wait_for_binary_proto=True) session = self.patient_cql_connection(node) create_ks(session, 'ks', 1) create_c1c2_table(self, session) insert_c1c2(session, n=10000) node.flush() for k in xrange(0, 10000): query_c1c2(session, k) node.compact() mbean = make_mbean('db', type='BlacklistedDirectories') with JolokiaAgent(node) as jmx: jmx.execute_method(mbean, 'markUnwritable', [os.path.join(node.get_path(), 'data0')]) for k in xrange(0, 10000): query_c1c2(session, k) node.nodetool('relocatesstables') for k in xrange(0, 10000): query_c1c2(session, k)
def overlapping_data_folders(self): """ @jira_ticket CASSANDRA-10902 """ self.cluster.populate(1) node1 = self.cluster.nodelist()[0] default_path = node1.data_directories()[0] node1.set_configuration_options({ 'saved_caches_directory': os.path.join(default_path, 'saved_caches') }) remove_perf_disable_shared_mem(node1) self.cluster.start(wait_for_binary_proto=True) session = self.patient_exclusive_cql_connection(node1) session.execute( "CREATE KEYSPACE ks WITH REPLICATION = {'class': 'SimpleStrategy', 'replication_factor': 1}" ) session.execute("CREATE TABLE ks.tab (key int PRIMARY KEY, a int)") session.execute("INSERT INTO ks.tab (key, a) VALUES (%s, %s)", [0, 0]) session.execute("SELECT * FROM ks.tab WHERE key = %s", [0]) cache_service = make_mbean('db', type="Caches") with JolokiaAgent(node1) as jmx: jmx.execute_method(cache_service, 'saveCaches') self.cluster.stop() self.cluster.start(wait_for_binary_proto=True)
def blacklisted_directory_test(self): cluster = self.cluster cluster.set_datadir_count(3) cluster.populate(1) [node] = cluster.nodelist() remove_perf_disable_shared_mem(node) cluster.start(wait_for_binary_proto=True) session = self.patient_cql_connection(node) self.create_ks(session, 'ks', 1) create_c1c2_table(self, session) insert_c1c2(session, n=10000) node.flush() for k in xrange(0, 10000): query_c1c2(session, k) node.compact() mbean = make_mbean('db', type='BlacklistedDirectories') with JolokiaAgent(node) as jmx: jmx.execute_method(mbean, 'markUnwritable', [os.path.join(node.get_path(), 'data0')]) for k in xrange(0, 10000): query_c1c2(session, k) node.nodetool('relocatesstables') for k in xrange(0, 10000): query_c1c2(session, k)
def table_metric_mbeans_test(self): """ Test some basic table metric mbeans with simple writes. """ cluster = self.cluster cluster.populate(3) node1, node2, node3 = cluster.nodelist() remove_perf_disable_shared_mem(node1) cluster.start(wait_for_binary_proto=True) version = cluster.version() node1.stress(['write', 'n=10K', 'no-warmup', '-schema', 'replication(factor=3)']) typeName = "ColumnFamily" if version <= '2.2.X' else 'Table' debug('Version {} typeName {}'.format(version, typeName)) # TODO the keyspace and table name are capitalized in 2.0 memtable_size = make_mbean('metrics', type=typeName, keyspace='keyspace1', scope='standard1', name='AllMemtablesHeapSize') disk_size = make_mbean('metrics', type=typeName, keyspace='keyspace1', scope='standard1', name='LiveDiskSpaceUsed') sstable_count = make_mbean('metrics', type=typeName, keyspace='keyspace1', scope='standard1', name='LiveSSTableCount') with JolokiaAgent(node1) as jmx: mem_size = jmx.read_attribute(memtable_size, "Value") self.assertGreater(int(mem_size), 10000) on_disk_size = jmx.read_attribute(disk_size, "Count") self.assertEquals(int(on_disk_size), 0) node1.flush() on_disk_size = jmx.read_attribute(disk_size, "Count") self.assertGreater(int(on_disk_size), 10000) sstables = jmx.read_attribute(sstable_count, "Value") self.assertGreaterEqual(int(sstables), 1)
def test_reloadlocalschema(self): """ @jira_ticket CASSANDRA-13954 Test that `nodetool reloadlocalschema` works as intended """ cluster = self.cluster cluster.populate(1) node = cluster.nodelist()[0] remove_perf_disable_shared_mem(node) # for jmx cluster.start() session = self.patient_cql_connection(node) query = "CREATE KEYSPACE IF NOT EXISTS test WITH replication = {'class': 'NetworkTopologyStrategy', 'datacenter1': 2};" session.execute(query) query = 'CREATE TABLE test.test (pk int, ck int, PRIMARY KEY (pk, ck));' session.execute(query) ss = make_mbean('db', type='StorageService') schema_version = '' # get initial schema version with JolokiaAgent(node) as jmx: schema_version = jmx.read_attribute(ss, 'SchemaVersion') # manually add a regular column 'val' to test.test query = """ INSERT INTO system_schema.columns (keyspace_name, table_name, column_name, clustering_order, column_name_bytes, kind, position, type) VALUES ('test', 'test', 'val', 'none', 0x76616c, 'regular', -1, 'int');""" session.execute(query) # validate that schema version wasn't automatically updated with JolokiaAgent(node) as jmx: self.assertEqual(schema_version, jmx.read_attribute(ss, 'SchemaVersion')) # make sure the new column wasn't automagically picked up assert_invalid( session, 'INSERT INTO test.test (pk, ck, val) VALUES (0, 1, 2);') # force the node to reload schema from disk node.nodetool('reloadlocalschema') # validate that schema version changed with JolokiaAgent(node) as jmx: self.assertNotEqual(schema_version, jmx.read_attribute(ss, 'SchemaVersion')) # try an insert with the new column again and validate it succeeds this time session.execute( 'INSERT INTO test.test (pk, ck, val) VALUES (0, 1, 2);') assert_all(session, 'SELECT pk, ck, val FROM test.test;', [[0, 1, 2]])
def test_reloadlocalschema(self): """ @jira_ticket CASSANDRA-13954 Test that `nodetool reloadlocalschema` works as intended """ cluster = self.cluster cluster.populate(1) node = cluster.nodelist()[0] remove_perf_disable_shared_mem(node) # for jmx cluster.start() session = self.patient_cql_connection(node) query = "CREATE KEYSPACE IF NOT EXISTS test WITH replication = {'class': 'NetworkTopologyStrategy', 'datacenter1': 2};" session.execute(query) query = 'CREATE TABLE test.test (pk int, ck int, PRIMARY KEY (pk, ck));' session.execute(query) ss = make_mbean('db', type='StorageService') schema_version = '' # get initial schema version with JolokiaAgent(node) as jmx: schema_version = jmx.read_attribute(ss, 'SchemaVersion') # manually add a regular column 'val' to test.test query = """ INSERT INTO system_schema.columns (keyspace_name, table_name, column_name, clustering_order, column_name_bytes, kind, position, type) VALUES ('test', 'test', 'val', 'none', 0x76616c, 'regular', -1, 'int');""" session.execute(query) # validate that schema version wasn't automatically updated with JolokiaAgent(node) as jmx: self.assertEqual(schema_version, jmx.read_attribute(ss, 'SchemaVersion')) # make sure the new column wasn't automagically picked up assert_invalid(session, 'INSERT INTO test.test (pk, ck, val) VALUES (0, 1, 2);') # force the node to reload schema from disk node.nodetool('reloadlocalschema') # validate that schema version changed with JolokiaAgent(node) as jmx: self.assertNotEqual(schema_version, jmx.read_attribute(ss, 'SchemaVersion')) # try an insert with the new column again and validate it succeeds this time session.execute('INSERT INTO test.test (pk, ck, val) VALUES (0, 1, 2);') assert_all(session, 'SELECT pk, ck, val FROM test.test;', [[0, 1, 2]])
def new_commitlog_cluster_node(): # writes should block on commitlog fsync self.fixture_dtest_setup.cluster.populate(1) node = self.fixture_dtest_setup.cluster.nodelist()[0] self.fixture_dtest_setup.cluster.set_batch_commitlog(enabled=True) # disable JVM option so we can use Jolokia # this has to happen after .set_configuration_options because of implementation details remove_perf_disable_shared_mem(node) self.fixture_dtest_setup.cluster.start(wait_for_binary_proto=True) return node
def test_closing_connections(self): """ @jira_ticket CASSANDRA-6546 Test CASSANDRA-6546 - do connections get closed when disabling / renabling thrift service? """ cluster = self.cluster cluster.set_configuration_options(values={ 'start_rpc': 'true', 'rpc_server_type': 'hsha', 'rpc_max_threads': 20 }) cluster.populate(1) (node1, ) = cluster.nodelist() remove_perf_disable_shared_mem(node1) cluster.start() session = self.patient_cql_connection(node1) create_ks(session, 'test', 1) session.execute( "CREATE TABLE \"CF\" (key text PRIMARY KEY, val text) WITH COMPACT STORAGE;" ) def make_connection(): host, port = node1.network_interfaces['thrift'] client = get_thrift_client(host, port) client.transport.open() return client pools = [] connected_thrift_clients = make_mbean('metrics', type='Client', name='connectedThriftClients') for i in range(10): logger.debug("Creating connection pools..") for x in range(3): pools.append(make_connection()) logger.debug( "Disabling/Enabling thrift iteration #{i}".format(i=i)) node1.nodetool('disablethrift') node1.nodetool('enablethrift') logger.debug("Closing connections from the client side..") for client in pools: client.transport.close() with JolokiaAgent(node1) as jmx: num_clients = jmx.read_attribute(connected_thrift_clients, "Value") assert int( num_clients ) == 0, "There are still open Thrift connections after stopping service " + str( num_clients)
def test_closing_connections(self): """ @jira_ticket CASSANDRA-6546 Test CASSANDRA-6546 - do connections get closed when disabling / renabling thrift service? """ cluster = self.cluster cluster.set_configuration_options(values={ 'start_rpc': 'true', 'rpc_server_type': 'hsha', 'rpc_max_threads': 20 }) cluster.populate(1) (node1, ) = cluster.nodelist() remove_perf_disable_shared_mem(node1) cluster.start(wait_for_binary_proto=True) session = self.patient_cql_connection(node1) self.create_ks(session, 'test', 1) session.execute( "CREATE TABLE \"CF\" (key text PRIMARY KEY, val text) WITH COMPACT STORAGE;" ) def make_connection(): pool = pycassa.ConnectionPool('test', timeout=None) cf = pycassa.ColumnFamily(pool, 'CF') return pool pools = [] connected_thrift_clients = make_mbean('metrics', type='Client', name='connectedThriftClients') for i in xrange(10): debug("Creating connection pools..") for x in xrange(3): pools.append(make_connection()) debug("Disabling/Enabling thrift iteration #{i}".format(i=i)) node1.nodetool('disablethrift') node1.nodetool('enablethrift') debug("Closing connections from the client side..") for pool in pools: pool.dispose() with JolokiaAgent(node1) as jmx: num_clients = jmx.read_attribute(connected_thrift_clients, "Value") self.assertEqual( int(num_clients), 0, "There are still open Thrift connections after stopping service" )
def upgrade_node(self, node): """ Upgrade a node to the current version """ debug('Upgrading {} to the current version'.format(node.name)) debug('Shutting down {}'.format(node.name)) node.stop(wait_other_notice=False) self.set_node_to_current_version(node) debug("Set cassandra dir for {} to {}".format(node.name, node.get_install_dir())) # needed for jmx remove_perf_disable_shared_mem(node) # Restart nodes on new version debug('Starting {} on new version ({})'.format(node.name, node.get_cassandra_version())) node.start(wait_other_notice=True, wait_for_binary_proto=True)
def table_metric_mbeans_test(self): """ Test some basic table metric mbeans with simple writes. """ cluster = self.cluster cluster.populate(3) node1, node2, node3 = cluster.nodelist() remove_perf_disable_shared_mem(node1) cluster.start(wait_for_binary_proto=True) version = cluster.version() node1.stress([ 'write', 'n=10K', 'no-warmup', '-schema', 'replication(factor=3)' ]) typeName = "ColumnFamily" if version <= '2.2.X' else 'Table' debug('Version {} typeName {}'.format(version, typeName)) # TODO the keyspace and table name are capitalized in 2.0 memtable_size = make_mbean('metrics', type=typeName, keyspace='keyspace1', scope='standard1', name='AllMemtablesHeapSize') disk_size = make_mbean('metrics', type=typeName, keyspace='keyspace1', scope='standard1', name='LiveDiskSpaceUsed') sstable_count = make_mbean('metrics', type=typeName, keyspace='keyspace1', scope='standard1', name='LiveSSTableCount') with JolokiaAgent(node1) as jmx: mem_size = jmx.read_attribute(memtable_size, "Value") self.assertGreater(int(mem_size), 10000) on_disk_size = jmx.read_attribute(disk_size, "Count") self.assertEquals(int(on_disk_size), 0) node1.flush() on_disk_size = jmx.read_attribute(disk_size, "Count") self.assertGreater(int(on_disk_size), 10000) sstables = jmx.read_attribute(sstable_count, "Value") self.assertGreaterEqual(int(sstables), 1)
def prepare(self, nodes=1, compression=True, version=None, protocol_version=None, install_byteman=False): if version: self.cluster.set_install_dir(version=version) debug("Set cassandra dir to {}".format(self.cluster.get_install_dir())) self.cluster.populate(nodes, install_byteman=install_byteman) for n in self.cluster.nodelist(): remove_perf_disable_shared_mem(n) self.cluster.start(wait_other_notice=True) node1 = self.cluster.nodelist()[0] session = self.patient_cql_connection(node1, protocol_version=protocol_version) self.create_schema(session, nodes) return session
def tombstone_size_test(self): self.cluster.populate(1) node1 = self.cluster.nodelist()[0] remove_perf_disable_shared_mem(node1) self.cluster.start(wait_for_binary_proto=True) [node1] = self.cluster.nodelist() session = self.patient_cql_connection(node1) self.create_ks(session, 'ks', 1) session.execute('CREATE TABLE test (i int PRIMARY KEY)') stmt = session.prepare('DELETE FROM test where i = ?') for i in range(100): session.execute(stmt, [i]) self.assertEqual(memtable_count(node1, 'ks', 'test'), 100) self.assertGreater(memtable_size(node1, 'ks', 'test'), 0)
def test_tombstone_size(self): self.cluster.populate(1) node1 = self.cluster.nodelist()[0] remove_perf_disable_shared_mem(node1) self.cluster.start() [node1] = self.cluster.nodelist() session = self.patient_cql_connection(node1) create_ks(session, 'ks', 1) session.execute('CREATE TABLE test (i int PRIMARY KEY)') stmt = session.prepare('DELETE FROM test where i = ?') for i in range(100): session.execute(stmt, [i]) assert memtable_count(node1, 'ks', 'test') == 100 assert memtable_size(node1, 'ks', 'test') > 0
def test_set_get_batchlog_replay_throttle(self): """ @jira_ticket CASSANDRA-13614 Test that batchlog replay throttle can be set and get through JMX """ cluster = self.cluster cluster.populate(2) node = cluster.nodelist()[0] remove_perf_disable_shared_mem(node) cluster.start() # Set and get throttle with JMX, ensuring that the rate change is logged with JolokiaAgent(node) as jmx: mbean = make_mbean('db', 'StorageService') jmx.write_attribute(mbean, 'BatchlogReplayThrottleInKB', 4096) assert len(node.grep_log('Updating batchlog replay throttle to 4096 KB/s, 2048 KB/s per endpoint', filename='debug.log')) > 0 assert 4096 == jmx.read_attribute(mbean, 'BatchlogReplayThrottleInKB')
def mv_metric_mbeans_release_test(self): """ Test that the right mbeans are created and released when creating mvs """ cluster = self.cluster cluster.populate(1) node = cluster.nodelist()[0] remove_perf_disable_shared_mem(node) cluster.start(wait_for_binary_proto=True) node.run_cqlsh(cmds=""" CREATE KEYSPACE mvtest WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor': 1 }; CREATE TABLE mvtest.testtable ( foo int, bar text, baz text, PRIMARY KEY (foo, bar) ); CREATE MATERIALIZED VIEW mvtest.testmv AS SELECT foo, bar, baz FROM mvtest.testtable WHERE foo IS NOT NULL AND bar IS NOT NULL AND baz IS NOT NULL PRIMARY KEY (foo, bar, baz);""") table_memtable_size = make_mbean('metrics', type='Table', keyspace='mvtest', scope='testtable', name='AllMemtablesHeapSize') table_view_read_time = make_mbean('metrics', type='Table', keyspace='mvtest', scope='testtable', name='ViewReadTime') table_view_lock_time = make_mbean('metrics', type='Table', keyspace='mvtest', scope='testtable', name='ViewLockAcquireTime') mv_memtable_size = make_mbean('metrics', type='Table', keyspace='mvtest', scope='testmv', name='AllMemtablesHeapSize') mv_view_read_time = make_mbean('metrics', type='Table', keyspace='mvtest', scope='testmv', name='ViewReadTime') mv_view_lock_time = make_mbean('metrics', type='Table', keyspace='mvtest', scope='testmv', name='ViewLockAcquireTime') missing_metric_message = "Table metric %s should have been registered after creating table %s" \ "but wasn't!" with JolokiaAgent(node) as jmx: self.assertIsNotNone( jmx.read_attribute(table_memtable_size, "Value"), missing_metric_message.format("AllMemtablesHeapSize", "testtable")) self.assertIsNotNone( jmx.read_attribute(table_view_read_time, "Count"), missing_metric_message.format("ViewReadTime", "testtable")) self.assertIsNotNone( jmx.read_attribute(table_view_lock_time, "Count"), missing_metric_message.format("ViewLockAcquireTime", "testtable")) self.assertIsNotNone( jmx.read_attribute(mv_memtable_size, "Value"), missing_metric_message.format("AllMemtablesHeapSize", "testmv")) self.assertRaisesRegexp(Exception, ".*InstanceNotFoundException.*", jmx.read_attribute, mbean=mv_view_read_time, attribute="Count", verbose=False) self.assertRaisesRegexp(Exception, ".*InstanceNotFoundException.*", jmx.read_attribute, mbean=mv_view_lock_time, attribute="Count", verbose=False) node.run_cqlsh(cmds="DROP KEYSPACE mvtest;") with JolokiaAgent(node) as jmx: self.assertRaisesRegexp(Exception, ".*InstanceNotFoundException.*", jmx.read_attribute, mbean=table_memtable_size, attribute="Value", verbose=False) self.assertRaisesRegexp(Exception, ".*InstanceNotFoundException.*", jmx.read_attribute, mbean=table_view_lock_time, attribute="Count", verbose=False) self.assertRaisesRegexp(Exception, ".*InstanceNotFoundException.*", jmx.read_attribute, mbean=table_view_read_time, attribute="Count", verbose=False) self.assertRaisesRegexp(Exception, ".*InstanceNotFoundException.*", jmx.read_attribute, mbean=mv_memtable_size, attribute="Value", verbose=False) self.assertRaisesRegexp(Exception, ".*InstanceNotFoundException.*", jmx.read_attribute, mbean=mv_view_lock_time, attribute="Count", verbose=False) self.assertRaisesRegexp(Exception, ".*InstanceNotFoundException.*", jmx.read_attribute, mbean=mv_view_read_time, attribute="Count", verbose=False)
def prepare(self, ordered=False, create_keyspace=True, use_cache=False, use_thrift=False, nodes=None, rf=None, protocol_version=None, cl=None, extra_config_options=None, **kwargs): nodes = self.NODES if nodes is None else nodes rf = self.RF if rf is None else rf cl = self.CL if cl is None else cl self.CL = cl # store for later use in do_upgrade assert nodes, 2 >= "backwards compatibility tests require at least two nodes" self.protocol_version = protocol_version cluster = self.cluster cluster.set_install_dir(version=self.UPGRADE_PATH.starting_version) self.fixture_dtest_setup.reinitialize_cluster_for_different_version() if ordered: cluster.set_partitioner( "org.apache.cassandra.dht.ByteOrderedPartitioner") if use_cache: cluster.set_configuration_options( values={'row_cache_size_in_mb': 100}) if use_thrift: cluster.set_configuration_options(values={'start_rpc': 'true'}) start_rpc = kwargs.pop('start_rpc', False) if start_rpc: cluster.set_configuration_options(values={'start_rpc': True}) cluster.set_configuration_options( values={'internode_compression': 'none'}) if extra_config_options: cluster.set_configuration_options(values=extra_config_options) cluster.populate(nodes) node1 = cluster.nodelist()[0] self.fixture_dtest_setup.enable_for_jolokia = kwargs.pop( 'jolokia', False) if self.fixture_dtest_setup.enable_for_jolokia: remove_perf_disable_shared_mem(node1) cluster.start(wait_for_binary_proto=True) node1 = cluster.nodelist()[0] time.sleep(0.2) if cl: session = self.patient_cql_connection( node1, protocol_version=protocol_version, consistency_level=cl, **kwargs) else: session = self.patient_cql_connection( node1, protocol_version=protocol_version, **kwargs) if create_keyspace: create_ks(session, 'ks', rf) return session
def do_upgrade(self, session, use_thrift=False, return_nodes=False, **kwargs): """ Upgrades the first node in the cluster and returns a list of (is_upgraded, Session) tuples. If `is_upgraded` is true, the Session is connected to the upgraded node. If `return_nodes` is True, a tuple of (is_upgraded, Session, Node) will be returned instead. """ session.cluster.shutdown() node1 = self.cluster.nodelist()[0] node2 = self.cluster.nodelist()[1] # stop the nodes, this can fail due to https://issues.apache.org/jira/browse/CASSANDRA-8220 on MacOS # for the tests that run against 2.0. You will need to run those in Linux. node1.drain() node1.stop(gently=True) # Ignore errors before upgrade on Windows # We ignore errors from 2.1, because windows 2.1 # support is only beta. There are frequent log errors, # related to filesystem interactions that are a direct result # of the lack of full functionality on 2.1 Windows, and we dont # want these to pollute our results. if is_win() and self.cluster.version() <= '2.2': node1.mark_log_for_errors() logger.debug('upgrading node1 to {}'.format( self.UPGRADE_PATH.upgrade_version)) switch_jdks(self.UPGRADE_PATH.upgrade_meta.java_version) node1.set_install_dir(version=self.UPGRADE_PATH.upgrade_version) # this is a bandaid; after refactoring, upgrades should account for protocol version new_version_from_build = get_version_from_build( node1.get_install_dir()) # Check if a since annotation with a max_version was set on this test. # The since decorator can only check the starting version of the upgrade, # so here we check to new version of the upgrade as well. if hasattr( self, 'max_version' ) and self.max_version is not None and new_version_from_build >= self.max_version: pytest.skip( "Skipping test, new version {} is equal to or higher than " "max version {}".format(new_version_from_build, self.max_version)) if (new_version_from_build >= '3' and self.protocol_version is not None and self.protocol_version < 3): pytest.skip('Protocol version {} incompatible ' 'with Cassandra version {}'.format( self.protocol_version, new_version_from_build)) node1.set_log_level(logging.getLevelName(logging.root.level)) node1.set_configuration_options( values={'internode_compression': 'none'}) if use_thrift and node1.get_cassandra_version() < '4': node1.set_configuration_options(values={'start_rpc': 'true'}) if self.fixture_dtest_setup.enable_for_jolokia: remove_perf_disable_shared_mem(node1) node1.start(wait_for_binary_proto=True, wait_other_notice=True) sessions_and_meta = [] if self.CL: session = self.patient_exclusive_cql_connection( node1, protocol_version=self.protocol_version, consistency_level=self.CL, **kwargs) else: session = self.patient_exclusive_cql_connection( node1, protocol_version=self.protocol_version, **kwargs) session.set_keyspace('ks') if return_nodes: sessions_and_meta.append((True, session, node1)) else: sessions_and_meta.append((True, session)) # open a second session with the node on the old version if self.CL: session = self.patient_exclusive_cql_connection( node2, protocol_version=self.protocol_version, consistency_level=self.CL, **kwargs) else: session = self.patient_exclusive_cql_connection( node2, protocol_version=self.protocol_version, **kwargs) session.set_keyspace('ks') if return_nodes: sessions_and_meta.append((False, session, node2)) else: sessions_and_meta.append((False, session)) # Let the nodes settle briefly before yielding connections in turn (on the upgraded and non-upgraded alike) # CASSANDRA-11396 was the impetus for this change, wherein some apparent perf noise was preventing # CL.ALL from being reached. The newly upgraded node needs to settle because it has just barely started, and each # non-upgraded node needs a chance to settle as well, because the entire cluster (or isolated nodes) may have been doing resource intensive activities # immediately before. for s in sessions_and_meta: time.sleep(5) yield s
def test_compactionstats(self): """ @jira_ticket CASSANDRA-10504 @jira_ticket CASSANDRA-10427 Test that jmx MBean used by nodetool compactionstats properly updates the progress of a compaction """ cluster = self.cluster cluster.populate(1) node = cluster.nodelist()[0] remove_perf_disable_shared_mem(node) cluster.start(wait_for_binary_proto=True) # Run a quick stress command to create the keyspace and table node.stress(['write', 'n=1', 'no-warmup']) # Disable compaction on the table node.nodetool('disableautocompaction keyspace1 standard1') node.nodetool('setcompactionthroughput 1') node.stress(['write', 'n=150K', 'no-warmup']) node.flush() # Run a major compaction. This will be the compaction whose # progress we track. node.nodetool_process('compact') # We need to sleep here to give compaction time to start # Why not do something smarter? Because if the bug regresses, # we can't rely on jmx to tell us that compaction started. time.sleep(5) compaction_manager = make_mbean('db', type='CompactionManager') with JolokiaAgent(node) as jmx: progress_string = jmx.read_attribute(compaction_manager, 'CompactionSummary')[0] # Pause in between reads # to allow compaction to move forward time.sleep(2) updated_progress_string = jmx.read_attribute( compaction_manager, 'CompactionSummary')[0] var = 'Compaction@{uuid}(keyspace1, standard1, {progress}/{total})bytes' progress = int( parse.search(var, progress_string).named['progress']) updated_progress = int( parse.search(var, updated_progress_string).named['progress']) debug(progress_string) debug(updated_progress_string) # We want to make sure that the progress is increasing, # and that values other than zero are displayed. self.assertGreater(updated_progress, progress) self.assertGreaterEqual(progress, 0) self.assertGreater(updated_progress, 0) # Block until the major compaction is complete # Otherwise nodetool will throw an exception # Give a timeout, in case compaction is broken # and never ends. start = time.time() max_query_timeout = 600 debug("Waiting for compaction to finish:") while (len( jmx.read_attribute(compaction_manager, 'CompactionSummary')) > 0) and (time.time() - start < max_query_timeout): debug( jmx.read_attribute(compaction_manager, 'CompactionSummary')) time.sleep(2)
def test_multidatacenter_local_quorum(self): ''' @jira_ticket CASSANDRA-13074 If we do only local datacenters reads in a multidatacenter DES setup, DES should take effect and route around a degraded node ''' def no_cross_dc(scores, cross_dc_nodes): return all('/' + k.address() not in scores for k in cross_dc_nodes) def snitchable(scores_before, scores_after, needed_nodes): return all('/' + k.address() in scores_before and '/' + k.address() in scores_after for k in needed_nodes) cluster = self.cluster cluster.populate([3, 3]) coordinator_node, healthy_node, degraded_node, node4, node5, node6 = cluster.nodelist( ) # increase DES reset/update interval so we clear any cross-DC startup reads faster cluster.set_configuration_options( values={ 'dynamic_snitch_reset_interval_in_ms': 10000, 'dynamic_snitch_update_interval_in_ms': 50, 'phi_convict_threshold': 12 }) remove_perf_disable_shared_mem(coordinator_node) remove_perf_disable_shared_mem(degraded_node) # Delay reads on the degraded node by 50 milliseconds degraded_node.start(jvm_args=[ '-Dcassandra.test.read_iteration_delay_ms=50', '-Dcassandra.allow_unsafe_join=true' ]) cluster.start(wait_for_binary_proto=30, wait_other_notice=True) des = make_mbean('db', type='DynamicEndpointSnitch') read_stage = make_mbean('metrics', type='ThreadPools', path='request', scope='ReadStage', name='CompletedTasks') session = self.patient_exclusive_cql_connection(coordinator_node) session.execute( "CREATE KEYSPACE snitchtestks WITH replication = {'class': 'NetworkTopologyStrategy', 'dc1': 3, 'dc2': 3}" ) session.execute( "CREATE TABLE snitchtestks.tbl1 (key int PRIMARY KEY) WITH speculative_retry = 'NONE' AND dclocal_read_repair_chance = 0.0" ) read_stmt = session.prepare( "SELECT * FROM snitchtestks.tbl1 where key = ?") read_stmt.consistency_level = ConsistencyLevel.LOCAL_QUORUM insert_stmt = session.prepare( "INSERT INTO snitchtestks.tbl1 (key) VALUES (?)") insert_stmt.consistency_level = ConsistencyLevel.ALL with JolokiaAgent(coordinator_node) as jmx: with JolokiaAgent(degraded_node) as bad_jmx: for x in range(0, 300): session.execute(insert_stmt, [x]) cleared = False # Wait for a snitch reset in case any earlier # startup process populated cross-DC read timings while not cleared: scores = jmx.read_attribute(des, 'Scores') cleared = ('/127.0.0.1' in scores and (len(scores) == 1)) or not scores snitchable_count = 0 for x in range(0, 300): degraded_reads_before = bad_jmx.read_attribute( read_stage, 'Value') scores_before = jmx.read_attribute(des, 'Scores') assert_true( no_cross_dc(scores_before, [node4, node5, node6]), "Cross DC scores were present: " + str(scores_before)) future = session.execute_async(read_stmt, [x]) future.result() scores_after = jmx.read_attribute(des, 'Scores') assert_true( no_cross_dc(scores_after, [node4, node5, node6]), "Cross DC scores were present: " + str(scores_after)) if snitchable( scores_before, scores_after, [coordinator_node, healthy_node, degraded_node]): snitchable_count = snitchable_count + 1 # If the DES correctly routed the read around the degraded node, # it shouldn't have another completed read request in metrics assert_equal( degraded_reads_before, bad_jmx.read_attribute(read_stage, 'Value')) else: # sleep to give dynamic snitch time to recalculate scores time.sleep(.1) # check that most reads were snitchable, with some # room allowed in case score recalculation is slow assert_greater_equal(snitchable_count, 250)
def _deprecated_repair_jmx(self, method, arguments): """ * Launch a two node, two DC cluster * Create a keyspace and table * Insert some data * Call the deprecated repair JMX API based on the arguments passed into this method * Check the node log to see if the correct repair was performed based on the jmx args """ cluster = self.cluster debug("Starting cluster..") cluster.populate([1, 1]) node1, node2 = cluster.nodelist() remove_perf_disable_shared_mem(node1) cluster.start() supports_pull_repair = cluster.version() >= LooseVersion('3.10') session = self.patient_cql_connection(node1) create_ks(session, 'ks', 2) create_cf(session, 'cf', read_repair=0.0, columns={ 'c1': 'text', 'c2': 'text' }) insert_c1c2(session, n=1000, consistency=ConsistencyLevel.ALL) # Run repair mbean = make_mbean('db', 'StorageService') with JolokiaAgent(node1) as jmx: # assert repair runs and returns valid cmd number self.assertEqual(jmx.execute_method(mbean, method, arguments), 1) # wait for log to start node1.watch_log_for("Starting repair command") # get repair parameters from the log line = node1.grep_log(( "Starting repair command #1" + (" \([^\)]+\)" if cluster.version() >= LooseVersion("3.10") else "") + ", repairing keyspace ks with repair options \(parallelism: (?P<parallelism>\w+), primary range: (?P<pr>\w+), " "incremental: (?P<incremental>\w+), job threads: (?P<jobs>\d+), ColumnFamilies: (?P<cfs>.+), dataCenters: (?P<dc>.+), " "hosts: (?P<hosts>.+), # of ranges: (?P<ranges>\d+)(, pull repair: (?P<pullrepair>true|false))?\)" )) assert_length_equal(line, 1) line, m = line[0] if supports_pull_repair: self.assertEqual( m.group("pullrepair"), "false", "Pull repair cannot be enabled through the deprecated API so the pull repair option should always be false." ) return { "parallelism": m.group("parallelism"), "primary_range": m.group("pr"), "incremental": m.group("incremental"), "job_threads": m.group("jobs"), "column_families": m.group("cfs"), "data_centers": m.group("dc"), "hosts": m.group("hosts"), "ranges": m.group("ranges") }