def query_users(self, session): ret = list(session.execute("SELECT * FROM users")) ret.extend(list(session.execute("SELECT * FROM users WHERE state='TX'"))) ret.extend(list(session.execute("SELECT * FROM users WHERE gender='f'"))) ret.extend(list(session.execute("SELECT * FROM users WHERE birth_year=1978"))) assert_length_equal(ret, 8) return ret
def simple_increment_test(self): """ Simple incrementation test (Created for #3465, that wasn't a bug) """ cluster = self.cluster cluster.populate(3).start() nodes = cluster.nodelist() session = self.patient_cql_connection(nodes[0]) self.create_ks(session, 'ks', 3) self.create_cf(session, 'cf', validation="CounterColumnType", columns={'c': 'counter'}) sessions = [self.patient_cql_connection(node, 'ks') for node in nodes] nb_increment = 50 nb_counter = 10 for i in xrange(0, nb_increment): for c in xrange(0, nb_counter): session = sessions[(i + c) % len(nodes)] query = SimpleStatement("UPDATE cf SET c = c + 1 WHERE key = 'counter%i'" % c, consistency_level=ConsistencyLevel.QUORUM) session.execute(query) session = sessions[i % len(nodes)] keys = ",".join(["'counter%i'" % c for c in xrange(0, nb_counter)]) query = SimpleStatement("SELECT key, c FROM cf WHERE key IN (%s)" % keys, consistency_level=ConsistencyLevel.QUORUM) res = list(session.execute(query)) assert_length_equal(res, nb_counter) for c in xrange(0, nb_counter): self.assertEqual(len(res[c]), 2, "Expecting key and counter for counter {}, got {}".format(c, str(res[c]))) self.assertEqual(res[c][1], i + 1, "Expecting counter {} = {}, got {}".format(c, i + 1, res[c][0]))
def test_upgrade_with_range_tombstone_eoc_0(self): """ Check sstable upgrading when the sstable contains a range tombstone with EOC=0. @jira_ticket CASSANDRA-12423 """ session = self._setup_cluster(cluster_options={'start_rpc': 'true'}) session.execute("CREATE TABLE rt (id INT, c1 TEXT, c2 TEXT, v INT, PRIMARY KEY (id, c1, c2)) " "with compact storage and compression = {'sstable_compression': ''};") range_delete = { i32(1): { 'rt': [Mutation(deletion=Deletion(2470761440040513, predicate=SlicePredicate(slice_range=SliceRange( start=composite('a', eoc='\x00'), finish=composite('asd', eoc='\x00')))))] } } client = get_thrift_client() client.transport.open() client.set_keyspace('ks') client.batch_mutate(range_delete, ConsistencyLevel.ONE) client.transport.close() session.execute("INSERT INTO rt (id, c1, c2, v) VALUES (1, 'asd', '', 0) USING TIMESTAMP 1470761451368658") session.execute("INSERT INTO rt (id, c1, c2, v) VALUES (1, 'asd', 'asd', 0) USING TIMESTAMP 1470761449416613") session = self._do_upgrade() ret = list(session.execute('SELECT * FROM rt')) assert_length_equal(ret, 2)
def test_upgrade_with_range_tombstone_eoc_0(self): """ Check sstable upgrading when the sstable contains a range tombstone with EOC=0. @jira_ticket CASSANDRA-12423 """ session = self._setup_cluster(cluster_options={'start_rpc': 'true'}) session.execute("CREATE TABLE rt (id INT, c1 TEXT, c2 TEXT, v INT, PRIMARY KEY (id, c1, c2)) " "with compact storage and compression = {'sstable_compression': ''};") range_delete = { i32(1): { 'rt': [Mutation(deletion=Deletion(2470761440040513, predicate=SlicePredicate(slice_range=SliceRange( start=composite('a', eoc=b'\x00'), finish=composite('asd', eoc=b'\x00')))))] } } client = get_thrift_client() client.transport.open() client.set_keyspace('ks') client.batch_mutate(range_delete, ConsistencyLevel.ONE) client.transport.close() session.execute("INSERT INTO rt (id, c1, c2, v) VALUES (1, 'asd', '', 0) USING TIMESTAMP 1470761451368658") session.execute("INSERT INTO rt (id, c1, c2, v) VALUES (1, 'asd', 'asd', 0) USING TIMESTAMP 1470761449416613") session = self._do_upgrade() ret = list(session.execute('SELECT * FROM rt')) assert_length_equal(ret, 2)
def large_compaction_warning_test(self): """ @jira_ticket CASSANDRA-9643 Check that we log a warning when the partition size is bigger than compaction_large_partition_warning_threshold_mb """ cluster = self.cluster cluster.set_configuration_options({'compaction_large_partition_warning_threshold_mb': 1}) cluster.populate(1).start(wait_for_binary_proto=True) [node] = cluster.nodelist() session = self.patient_cql_connection(node) self.create_ks(session, 'ks', 1) mark = node.mark_log() strlen = (1024 * 1024) / 100 session.execute("CREATE TABLE large(userid text PRIMARY KEY, properties map<int, text>) with compression = {}") for i in range(200): # ensures partition size larger than compaction_large_partition_warning_threshold_mb session.execute("UPDATE ks.large SET properties[%i] = '%s' WHERE userid = 'user'" % (i, get_random_word(strlen))) ret = list(session.execute("SELECT properties from ks.large where userid = 'user'")) assert_length_equal(ret, 1) self.assertEqual(200, len(ret[0][0].keys())) node.flush() node.nodetool('compact ks large') verb = 'Writing' if self.cluster.version() > '2.2' else 'Compacting' sizematcher = '\d+ bytes' if LooseVersion(self.cluster.version()) < LooseVersion('3.6') else '\d+\.\d{3}(K|M|G)iB' node.watch_log_for('{} large partition ks/large:user \({}\)'.format(verb, sizematcher), from_mark=mark, timeout=180) ret = list(session.execute("SELECT properties from ks.large where userid = 'user'")) assert_length_equal(ret, 1) self.assertEqual(200, len(ret[0][0].keys()))
def simple_increment_test(self): """ Simple incrementation test (Created for #3465, that wasn't a bug) """ cluster = self.cluster cluster.populate(3).start() nodes = cluster.nodelist() session = self.patient_cql_connection(nodes[0]) create_ks(session, 'ks', 3) create_cf(session, 'cf', validation="CounterColumnType", columns={'c': 'counter'}) sessions = [self.patient_cql_connection(node, 'ks') for node in nodes] nb_increment = 50 nb_counter = 10 for i in xrange(0, nb_increment): for c in xrange(0, nb_counter): session = sessions[(i + c) % len(nodes)] query = SimpleStatement("UPDATE cf SET c = c + 1 WHERE key = 'counter%i'" % c, consistency_level=ConsistencyLevel.QUORUM) session.execute(query) session = sessions[i % len(nodes)] keys = ",".join(["'counter%i'" % c for c in xrange(0, nb_counter)]) query = SimpleStatement("SELECT key, c FROM cf WHERE key IN (%s)" % keys, consistency_level=ConsistencyLevel.QUORUM) res = list(session.execute(query)) assert_length_equal(res, nb_counter) for c in xrange(0, nb_counter): self.assertEqual(len(res[c]), 2, "Expecting key and counter for counter {}, got {}".format(c, str(res[c]))) self.assertEqual(res[c][1], i + 1, "Expecting counter {} = {}, got {}".format(c, i + 1, res[c][0]))
def large_compaction_warning_test(self): """ @jira_ticket CASSANDRA-9643 Check that we log a warning when the partition size is bigger than compaction_large_partition_warning_threshold_mb """ cluster = self.cluster cluster.set_configuration_options({'compaction_large_partition_warning_threshold_mb': 1}) cluster.populate(1).start(wait_for_binary_proto=True) [node] = cluster.nodelist() session = self.patient_cql_connection(node) create_ks(session, 'ks', 1) mark = node.mark_log() strlen = (1024 * 1024) / 100 session.execute("CREATE TABLE large(userid text PRIMARY KEY, properties map<int, text>) with compression = {}") for i in range(200): # ensures partition size larger than compaction_large_partition_warning_threshold_mb session.execute("UPDATE ks.large SET properties[%i] = '%s' WHERE userid = 'user'" % (i, get_random_word(strlen))) ret = list(session.execute("SELECT properties from ks.large where userid = 'user'")) assert_length_equal(ret, 1) self.assertEqual(200, len(ret[0][0].keys())) node.flush() node.nodetool('compact ks large') verb = 'Writing' if self.cluster.version() > '2.2' else 'Compacting' sizematcher = '\d+ bytes' if self.cluster.version() < LooseVersion('3.6') else '\d+\.\d{3}(K|M|G)iB' node.watch_log_for('{} large partition ks/large:user \({}'.format(verb, sizematcher), from_mark=mark, timeout=180) ret = list(session.execute("SELECT properties from ks.large where userid = 'user'")) assert_length_equal(ret, 1) self.assertEqual(200, len(ret[0][0].keys()))
def launch_nodetool_cmd(self, cmd): """ Launch a nodetool command and check the result is empty (no error) """ node1 = self.cluster.nodelist()[0] response = node1.nodetool(cmd).stdout if not common.is_win(): # nodetool always prints out on windows assert_length_equal(response, 0) # nodetool does not print anything unless there is an error
def test_assertions(self): # assert_exception_test mock_session = Mock( ** {'execute.side_effect': AlreadyExists("Dummy exception message.")}) assert_exception(mock_session, "DUMMY QUERY", expected=AlreadyExists) # assert_unavailable_test mock_session = Mock(**{ 'execute.side_effect': Unavailable("Dummy Unavailabile message.") }) assert_unavailable(mock_session.execute) # assert_invalid_test mock_session = Mock(**{ 'execute.side_effect': InvalidRequest("Dummy InvalidRequest message.") }) assert_invalid(mock_session, "DUMMY QUERY") # assert_unauthorized_test mock_session = Mock(**{ 'execute.side_effect': Unauthorized("Dummy Unauthorized message.") }) assert_unauthorized(mock_session, "DUMMY QUERY", None) # assert_one_test mock_session = Mock() mock_session.execute = Mock(return_value=[[1, 1]]) assert_one(mock_session, "SELECT * FROM test", [1, 1]) # assert_none_test mock_session = Mock() mock_session.execute = Mock(return_value=[]) assert_none(mock_session, "SELECT * FROM test") # assert_all_test mock_session = Mock() mock_session.execute = Mock(return_value=[[i, i] for i in range(0, 10)]) assert_all(mock_session, "SELECT k, v FROM test", [[i, i] for i in range(0, 10)], ignore_order=True) # assert_almost_equal_test assert_almost_equal(1, 1.1, 1.2, 1.9, error=1.0) # assert_row_count_test mock_session = Mock() mock_session.execute = Mock(return_value=[[1]]) assert_row_count(mock_session, 'test', 1) # assert_length_equal_test check = [1, 2, 3, 4] assert_length_equal(check, 4)
def test_13747(self): """ @jira_ticket CASSANDRA-13747 """ cluster = self.cluster # disable hinted handoff and set batch commit log so this doesn't interfere with the test cluster.set_configuration_options( values={'hinted_handoff_enabled': False}) cluster.set_batch_commitlog(enabled=True) cluster.populate(2).start(wait_other_notice=True) node1, node2 = cluster.nodelist() session = self.patient_cql_connection(node1) query = "CREATE KEYSPACE IF NOT EXISTS test WITH replication = {'class': 'NetworkTopologyStrategy', 'datacenter1': 2};" session.execute(query) query = "CREATE TABLE IF NOT EXISTS test.test (id int PRIMARY KEY);" session.execute(query) # # populate the table with 10 rows: # # -7509452495886106294 | 5 # -4069959284402364209 | 1 x # -3799847372828181882 | 8 # -3485513579396041028 | 0 x # -3248873570005575792 | 2 # -2729420104000364805 | 4 x # 1634052884888577606 | 7 # 2705480034054113608 | 6 x # 3728482343045213994 | 9 # 9010454139840013625 | 3 x stmt = session.prepare("INSERT INTO test.test (id) VALUES (?);") for id in range(0, 10): session.execute(stmt, [id], ConsistencyLevel.ALL) # with node2 down and hints disabled, delete every other row on node1 node2.stop(wait_other_notice=True) session.execute("DELETE FROM test.test WHERE id IN (1, 0, 4, 6, 3);") # with both nodes up, do a DISTINCT range query with CL.ALL; # prior to CASSANDRA-13747 this would cause an assertion in short read protection code node2.start(wait_other_notice=True) stmt = SimpleStatement("SELECT DISTINCT token(id), id FROM test.test;", consistency_level=ConsistencyLevel.ALL) result = list(session.execute(stmt)) assert_length_equal(result, 5)
def _validate_dense_thrift(client, cf='dense_super_1'): client.transport.open() client.set_keyspace('ks') result = client.get_slice('k1', ColumnParent(cf), SlicePredicate(slice_range=SliceRange('', '', False, 5)), ConsistencyLevel.ONE) assert_length_equal(result, 2) assert result[0].super_column.name == 'key1' assert result[1].super_column.name == 'key2' print((result[0])) print((result[1])) for cosc in result: assert cosc.super_column.columns[0].name == _i64(100) assert cosc.super_column.columns[0].value == 'value1'
def test_13747(self): """ @jira_ticket CASSANDRA-13747 """ cluster = self.cluster # disable hinted handoff and set batch commit log so this doesn't interfere with the test cluster.set_configuration_options(values={'hinted_handoff_enabled': False}) cluster.set_batch_commitlog(enabled=True) cluster.populate(2).start(wait_other_notice=True) node1, node2 = cluster.nodelist() session = self.patient_cql_connection(node1) query = "CREATE KEYSPACE IF NOT EXISTS test WITH replication = {'class': 'NetworkTopologyStrategy', 'datacenter1': 2};" session.execute(query) query = "CREATE TABLE IF NOT EXISTS test.test (id int PRIMARY KEY);" session.execute(query) # # populate the table with 10 rows: # # -7509452495886106294 | 5 # -4069959284402364209 | 1 x # -3799847372828181882 | 8 # -3485513579396041028 | 0 x # -3248873570005575792 | 2 # -2729420104000364805 | 4 x # 1634052884888577606 | 7 # 2705480034054113608 | 6 x # 3728482343045213994 | 9 # 9010454139840013625 | 3 x stmt = session.prepare("INSERT INTO test.test (id) VALUES (?);") for id in range(0, 10): session.execute(stmt, [id], ConsistencyLevel.ALL) # with node2 down and hints disabled, delete every other row on node1 node2.stop(wait_other_notice=True) session.execute("DELETE FROM test.test WHERE id IN (1, 0, 4, 6, 3);") # with both nodes up, do a DISTINCT range query with CL.ALL; # prior to CASSANDRA-13747 this would cause an assertion in short read protection code node2.start(wait_other_notice=True) stmt = SimpleStatement("SELECT DISTINCT token(id), id FROM test.test;", consistency_level = ConsistencyLevel.ALL) result = list(session.execute(stmt)) assert_length_equal(result, 5)
def _deprecated_repair_jmx(self, method, arguments): """ * Launch a two node, two DC cluster * Create a keyspace and table * Insert some data * Call the deprecated repair JMX API based on the arguments passed into this method * Check the node log to see if the correct repair was performed based on the jmx args """ cluster = self.cluster logger.debug("Starting cluster..") cluster.populate([1, 1]) node1, node2 = cluster.nodelist() remove_perf_disable_shared_mem(node1) cluster.start() supports_pull_repair = cluster.version() >= LooseVersion('3.10') session = self.patient_cql_connection(node1) create_ks(session, 'ks', 2) create_cf(session, 'cf', read_repair=0.0, columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=1000, consistency=ConsistencyLevel.ALL) # Run repair mbean = make_mbean('db', 'StorageService') with JolokiaAgent(node1) as jmx: # assert repair runs and returns valid cmd number assert jmx.execute_method(mbean, method, arguments) == 1 # wait for log to start node1.watch_log_for("Starting repair command") # get repair parameters from the log line = node1.grep_log(("Starting repair command #1" + (" \([^\)]+\)" if cluster.version() >= LooseVersion("3.10") else "") + ", repairing keyspace ks with repair options \(parallelism: (?P<parallelism>\w+), primary range: (?P<pr>\w+), " "incremental: (?P<incremental>\w+), job threads: (?P<jobs>\d+), ColumnFamilies: (?P<cfs>.+), dataCenters: (?P<dc>.+), " "hosts: (?P<hosts>.+), # of ranges: (?P<ranges>\d+)(, pull repair: (?P<pullrepair>true|false))?\)")) assert_length_equal(line, 1) line, m = line[0] if supports_pull_repair: assert m.group("pullrepair"), "false" == "Pull repair cannot be enabled through the deprecated API so the pull repair option should always be false." return {"parallelism": m.group("parallelism"), "primary_range": m.group("pr"), "incremental": m.group("incremental"), "job_threads": m.group("jobs"), "column_families": m.group("cfs"), "data_centers": m.group("dc"), "hosts": m.group("hosts"), "ranges": m.group("ranges")}
def _deprecated_repair_jmx(self, method, arguments): """ * Launch a two node, two DC cluster * Create a keyspace and table * Insert some data * Call the deprecated repair JMX API based on the arguments passed into this method * Check the node log to see if the correct repair was performed based on the jmx args """ cluster = self.cluster logger.debug("Starting cluster..") cluster.populate([1, 1]) node1, node2 = cluster.nodelist() cluster.start() supports_pull_repair = cluster.version() >= LooseVersion('3.10') session = self.patient_cql_connection(node1) create_ks(session, 'ks', 2) create_cf(session, 'cf', read_repair=0.0, columns={'c1': 'text', 'c2': 'text'}) insert_c1c2(session, n=1000, consistency=ConsistencyLevel.ALL) # Run repair mbean = make_mbean('db', 'StorageService') with JolokiaAgent(node1) as jmx: # assert repair runs and returns valid cmd number assert jmx.execute_method(mbean, method, arguments) == 1 # wait for log to start node1.watch_log_for("Starting repair command") # get repair parameters from the log line = node1.grep_log((r"Starting repair command #1" + (r" \([^\)]+\)" if cluster.version() >= LooseVersion("3.10") else "") + r", repairing keyspace ks with repair options \(parallelism: (?P<parallelism>\w+), primary range: (?P<pr>\w+), " r"incremental: (?P<incremental>\w+), job threads: (?P<jobs>\d+), ColumnFamilies: (?P<cfs>.+), dataCenters: (?P<dc>.+), " r"hosts: (?P<hosts>.+), # of ranges: (?P<ranges>\d+)(, pull repair: (?P<pullrepair>true|false))?(, ignore unreplicated keyspaces: (?P<ignoreunrepl>true|false))?\)")) assert_length_equal(line, 1) line, m = line[0] if supports_pull_repair: assert m.group("pullrepair"), "false" == "Pull repair cannot be enabled through the deprecated API so the pull repair option should always be false." return {"parallelism": m.group("parallelism"), "primary_range": m.group("pr"), "incremental": m.group("incremental"), "job_threads": m.group("jobs"), "column_families": m.group("cfs"), "data_centers": m.group("dc"), "hosts": m.group("hosts"), "ranges": m.group("ranges")}
def _validate_sparse_thrift(client, cf='sparse_super_1'): client.transport.open() client.set_keyspace('ks') result = client.get_slice('k1', ColumnParent(cf), SlicePredicate(slice_range=SliceRange('', '', False, 5)), ConsistencyLevel.ONE) assert_length_equal(result, 2) assert_equal(result[0].super_column.name, 'key1') assert_equal(result[1].super_column.name, 'key2') for cosc in result: assert_equal(cosc.super_column.columns[0].name, 'col1') assert_equal(cosc.super_column.columns[0].value, _i64(200)) assert_equal(cosc.super_column.columns[1].name, 'col2') assert_equal(cosc.super_column.columns[1].value, _i64(300)) assert_equal(cosc.super_column.columns[2].name, 'value1') assert_equal(cosc.super_column.columns[2].value, _i64(100))
def _validate_sparse_thrift(client, cf='sparse_super_1'): try: client.transport.open() except: pass client.set_keyspace('ks') result = client.get_slice('k1'.encode(), ColumnParent(cf), SlicePredicate(slice_range=SliceRange(''.encode(), ''.encode(), False, 5)), ConsistencyLevel.ONE) assert_length_equal(result, 2) assert result[0].super_column.name == 'key1'.encode() assert result[1].super_column.name == 'key2'.encode() for cosc in result: assert cosc.super_column.columns[0].name == 'col1'.encode() assert cosc.super_column.columns[0].value == _i64(200) assert cosc.super_column.columns[1].name == 'col2'.encode() assert cosc.super_column.columns[1].value == _i64(300) assert cosc.super_column.columns[2].name == 'value1'.encode() assert cosc.super_column.columns[2].value == _i64(100)
def assertions_test(self): # assert_exception_test mock_session = Mock(**{'execute.side_effect': AlreadyExists("Dummy exception message.")}) assert_exception(mock_session, "DUMMY QUERY", expected=AlreadyExists) # assert_unavailable_test mock_session = Mock(**{'execute.side_effect': Unavailable("Dummy Unavailabile message.")}) assert_unavailable(mock_session.execute) # assert_invalid_test mock_session = Mock(**{'execute.side_effect': InvalidRequest("Dummy InvalidRequest message.")}) assert_invalid(mock_session, "DUMMY QUERY") # assert_unauthorized_test mock_session = Mock(**{'execute.side_effect': Unauthorized("Dummy Unauthorized message.")}) assert_unauthorized(mock_session, "DUMMY QUERY", None) # assert_one_test mock_session = Mock() mock_session.execute = Mock(return_value=[[1, 1]]) assert_one(mock_session, "SELECT * FROM test", [1, 1]) # assert_none_test mock_session = Mock() mock_session.execute = Mock(return_value=[]) assert_none(mock_session, "SELECT * FROM test") # assert_all_test mock_session = Mock() mock_session.execute = Mock(return_value=[[i, i] for i in range(0, 10)]) assert_all(mock_session, "SELECT k, v FROM test", [[i, i] for i in range(0, 10)], ignore_order=True) # assert_almost_equal_test assert_almost_equal(1, 1.1, 1.2, 1.9, error=1.0) # assert_row_count_test mock_session = Mock() mock_session.execute = Mock(return_value=[[1]]) assert_row_count(mock_session, 'test', 1) # assert_length_equal_test check = [1, 2, 3, 4] assert_length_equal(check, 4)
def test_column_index_stress(self): """Write a large number of columns to a single row and set 'column_index_size_in_kb' to a sufficiently low value to force the creation of a column index. The test will then randomly read columns from that row and ensure that all data is returned. See CASSANDRA-5225. """ cluster = self.cluster cluster.populate(1).start() (node1, ) = cluster.nodelist() cluster.set_configuration_options( values={'column_index_size_in_kb': 1}) # reduce this value to force column index creation session = self.patient_cql_connection(node1) create_ks(session, 'wide_rows', 1) create_table_query = 'CREATE TABLE test_table (row varchar, name varchar, value int, PRIMARY KEY (row, name));' session.execute(create_table_query) # Now insert 100,000 columns to row 'row0' insert_column_query = "UPDATE test_table SET value = {value} WHERE row = '{row}' AND name = '{name}';" for i in range(100000): row = 'row0' name = 'val' + str(i) session.execute( insert_column_query.format(value=i, row=row, name=name)) # now randomly fetch columns: 1 to 3 at a time for i in range(10000): select_column_query = "SELECT value FROM test_table WHERE row='row0' AND name in ('{name1}', '{name2}', '{name3}');" values2fetch = [str(random.randint(0, 99999)) for i in range(3)] # values2fetch is a list of random values. Because they are random, they will not be unique necessarily. # To simplify the template logic in the select_column_query I will not expect the query to # necessarily return 3 values. Hence I am computing the number of unique values in values2fetch # and using that in the assert at the end. expected_rows = len(set(values2fetch)) rows = list( session.execute( select_column_query.format(name1="val" + values2fetch[0], name2="val" + values2fetch[1], name3="val" + values2fetch[2]))) assert_length_equal(rows, expected_rows)
def ghosts_test(self): """ Check range ghost are correctly removed by the system """ cluster = self.cluster cluster.populate(1).start() [node1] = cluster.nodelist() time.sleep(.5) session = self.cql_connection(node1) self.create_ks(session, 'ks', 1) self.create_cf(session, 'cf', gc_grace=0, columns={'c': 'text'}) rows = 1000 for i in xrange(0, rows): session.execute("UPDATE cf SET c = 'value' WHERE key = 'k%i'" % i) res = list(session.execute("SELECT * FROM cf LIMIT 10000")) assert_length_equal(res, rows) node1.flush() for i in xrange(0, rows / 2): session.execute("DELETE FROM cf WHERE key = 'k%i'" % i) res = list(session.execute("SELECT * FROM cf LIMIT 10000")) # no ghosts in 1.2+ assert_length_equal(res, rows / 2) node1.flush() time.sleep(1) # make sure tombstones are collected node1.compact() res = list(session.execute("SELECT * FROM cf LIMIT 10000")) assert_length_equal(res, rows / 2)
def test_ghosts(self): """ Check range ghost are correctly removed by the system """ cluster = self.cluster cluster.populate(1).start() [node1] = cluster.nodelist() time.sleep(.5) session = self.cql_connection(node1) create_ks(session, 'ks', 1) create_cf(session, 'cf', gc_grace=0, columns={'c': 'text'}) rows = 1000 for i in range(0, rows): session.execute("UPDATE cf SET c = 'value' WHERE key = 'k%i'" % i) res = list(session.execute("SELECT * FROM cf LIMIT 10000")) assert_length_equal(res, rows) node1.flush() for i in range(0, rows // 2): session.execute("DELETE FROM cf WHERE key = 'k%i'" % i) res = list(session.execute("SELECT * FROM cf LIMIT 10000")) # no ghosts in 1.2+ assert_length_equal(res, rows / 2) node1.flush() time.sleep(1) # make sure tombstones are collected node1.compact() res = list(session.execute("SELECT * FROM cf LIMIT 10000")) assert_length_equal(res, rows / 2)
def test_column_index_stress(self): """Write a large number of columns to a single row and set 'column_index_size_in_kb' to a sufficiently low value to force the creation of a column index. The test will then randomly read columns from that row and ensure that all data is returned. See CASSANDRA-5225. """ cluster = self.cluster cluster.populate(1).start() (node1,) = cluster.nodelist() cluster.set_configuration_options(values={'column_index_size_in_kb': 1}) # reduce this value to force column index creation session = self.patient_cql_connection(node1) create_ks(session, 'wide_rows', 1) create_table_query = 'CREATE TABLE test_table (row varchar, name varchar, value int, PRIMARY KEY (row, name));' session.execute(create_table_query) # Now insert 100,000 columns to row 'row0' insert_column_query = "UPDATE test_table SET value = {value} WHERE row = '{row}' AND name = '{name}';" for i in range(100000): row = 'row0' name = 'val' + str(i) session.execute(insert_column_query.format(value=i, row=row, name=name)) # now randomly fetch columns: 1 to 3 at a time for i in range(10000): select_column_query = "SELECT value FROM test_table WHERE row='row0' AND name in ('{name1}', '{name2}', '{name3}');" values2fetch = [str(random.randint(0, 99999)) for i in range(3)] # values2fetch is a list of random values. Because they are random, they will not be unique necessarily. # To simplify the template logic in the select_column_query I will not expect the query to # necessarily return 3 values. Hence I am computing the number of unique values in values2fetch # and using that in the assert at the end. expected_rows = len(set(values2fetch)) rows = list(session.execute(select_column_query.format(name1="val" + values2fetch[0], name2="val" + values2fetch[1], name3="val" + values2fetch[2]))) assert_length_equal(rows, expected_rows)
def test_cdc_data_available_in_cdc_raw(self): ks_name = 'ks' # First, create a new node just for data generation. generation_node, generation_session = self.prepare(ks_name=ks_name) cdc_table_info = TableInfo( ks_name=ks_name, table_name='cdc_tab', column_spec=_16_uuid_column_spec, insert_stmt=_get_16_uuid_insert_stmt(ks_name, 'cdc_tab'), options={ 'cdc': 'true', # give table an explicit id so when we create it again it's the # same table and we can replay into it 'id': uuid.uuid4() }) # Write until we get a new CL segment to avoid replaying initialization # mutations from this node's startup into system tables in the other # node. See CASSANDRA-11811. advance_to_next_cl_segment(session=generation_session, commitlog_dir=os.path.join( generation_node.get_path(), 'commitlogs')) generation_session.execute(cdc_table_info.create_stmt) # insert 10000 rows inserted_rows = _insert_rows(generation_session, cdc_table_info.name, cdc_table_info.insert_stmt, repeat((), 10000)) # drain the node to guarantee all cl segments will be recycled logger.debug('draining') generation_node.drain() logger.debug('stopping') # stop the node and clean up all sessions attached to it generation_session.cluster.shutdown() generation_node.stop() # We can rely on the existing _cdc.idx files to determine which .log files contain cdc data. source_path = os.path.join(generation_node.get_path(), 'cdc_raw') source_cdc_indexes = { ReplayData.load(source_path, name) for name in source_path if name.endswith('_cdc.idx') } # assertNotEqual(source_cdc_indexes, {}) assert source_cdc_indexes != {} # create a new node to use for cdc_raw cl segment replay loading_node = self._init_new_loading_node( ks_name, cdc_table_info.create_stmt, self.cluster.version() < '4') # move cdc_raw contents to commitlog directories, then start the # node again to trigger commitlog replay, which should replay the # cdc_raw files we moved to commitlogs into memtables. logger.debug('moving cdc_raw and restarting node') _move_commitlog_segments( os.path.join(generation_node.get_path(), 'cdc_raw'), os.path.join(loading_node.get_path(), 'commitlogs')) loading_node.start(wait_for_binary_proto=True) logger.debug('node successfully started; waiting on log replay') loading_node.grep_log('Log replay complete') logger.debug('log replay complete') # final assertions validation_session = self.patient_exclusive_cql_connection( loading_node) data_in_cdc_table_after_restart = rows_to_list( validation_session.execute('SELECT * FROM ' + cdc_table_info.name)) logger.debug('found {cdc} values in CDC table'.format( cdc=len(data_in_cdc_table_after_restart))) # Then we assert that the CDC data that we expect to be there is there. # All data that was in CDC tables should have been copied to cdc_raw, # then used in commitlog replay, so it should be back in the cluster. assert (inserted_rows == data_in_cdc_table_after_restart ), 'not all expected data selected' if self.cluster.version() >= '4.0': # Create ReplayData objects for each index file found in loading cluster loading_path = os.path.join(loading_node.get_path(), 'cdc_raw') dest_cdc_indexes = [ ReplayData.load(loading_path, name) for name in os.listdir(loading_path) if name.endswith('_cdc.idx') ] # Compare source replay data to dest to ensure replay process created both hard links and index files. for srd in source_cdc_indexes: # Confirm both log and index are in dest assert os.path.isfile(os.path.join(loading_path, srd.idx_name)) assert os.path.isfile(os.path.join(loading_path, srd.log_name)) # Find dest ReplayData that corresponds to the source (should be exactly 1) corresponding_dest_replay_datae = [ x for x in dest_cdc_indexes if srd.idx_name == x.idx_name ] assert_length_equal(corresponding_dest_replay_datae, 1) drd = corresponding_dest_replay_datae[0] # We can't compare equality on offsets since replay uses the raw file length as the written # cdc offset. We *can*, however, confirm that the offset in the replayed file is >= # the source file, ensuring clients are signaled to replay at least all the data in the # log. assert drd.offset >= srd.offset # Confirm completed flag is the same in both assert srd.completed == drd.completed # Confirm that the relationship between index files on the source # and destination looks like we expect. # First, grab the mapping between the two, make sure it's a 1-1 # mapping, and transform the dict to reflect that: src_to_dest_idx_map = { src_rd: [ dest_rd for dest_rd in dest_cdc_indexes if dest_rd.idx_name == src_rd.idx_name ] for src_rd in source_cdc_indexes } for src_rd, dest_rds in src_to_dest_idx_map.items(): assert_length_equal(dest_rds, 1) src_to_dest_idx_map[src_rd] = dest_rds[0] # All offsets in idx files that were copied should be >0 on the # destination node. assert ( 0 not in {i.offset for i in src_to_dest_idx_map.values()}),\ ('Found index offsets == 0 in an index file on the ' 'destination node that corresponds to an index file on the ' 'source node:\n' '{}').format(pformat(src_to_dest_idx_map)) # Offsets of all shared indexes should be >= on the destination # than on the source. for src_rd, dest_rd in src_to_dest_idx_map.items(): assert dest_rd.offset >= src_rd.offset src_to_dest_idx_map = { src_rd: [ dest_rd for dest_rd in dest_cdc_indexes if dest_rd.idx_name == src_rd.idx_name ] for src_rd in source_cdc_indexes } for k, v in src_to_dest_idx_map.items(): assert_length_equal(v, 1) assert k.offset >= v.offset
def test_cdc_data_available_in_cdc_raw(self): ks_name = 'ks' # First, create a new node just for data generation. generation_node, generation_session = self.prepare(ks_name=ks_name) cdc_table_info = TableInfo( ks_name=ks_name, table_name='cdc_tab', column_spec=_16_uuid_column_spec, insert_stmt=_get_16_uuid_insert_stmt(ks_name, 'cdc_tab'), options={ 'cdc': 'true', # give table an explicit id so when we create it again it's the # same table and we can replay into it 'id': uuid.uuid4() } ) # Write until we get a new CL segment to avoid replaying initialization # mutations from this node's startup into system tables in the other # node. See CASSANDRA-11811. advance_to_next_cl_segment( session=generation_session, commitlog_dir=os.path.join(generation_node.get_path(), 'commitlogs') ) generation_session.execute(cdc_table_info.create_stmt) # insert 10000 rows inserted_rows = _insert_rows(generation_session, cdc_table_info.name, cdc_table_info.insert_stmt, repeat((), 10000)) # drain the node to guarantee all cl segments will be recycled logger.debug('draining') generation_node.drain() logger.debug('stopping') # stop the node and clean up all sessions attached to it generation_session.cluster.shutdown() generation_node.stop() # We can rely on the existing _cdc.idx files to determine which .log files contain cdc data. source_path = os.path.join(generation_node.get_path(), 'cdc_raw') source_cdc_indexes = {ReplayData.load(source_path, name) for name in source_path if name.endswith('_cdc.idx')} # assertNotEqual(source_cdc_indexes, {}) assert source_cdc_indexes != {} # create a new node to use for cdc_raw cl segment replay loading_node = self._init_new_loading_node(ks_name, cdc_table_info.create_stmt, self.cluster.version() < '4') # move cdc_raw contents to commitlog directories, then start the # node again to trigger commitlog replay, which should replay the # cdc_raw files we moved to commitlogs into memtables. logger.debug('moving cdc_raw and restarting node') _move_commitlog_segments( os.path.join(generation_node.get_path(), 'cdc_raw'), os.path.join(loading_node.get_path(), 'commitlogs') ) loading_node.start(wait_for_binary_proto=True) logger.debug('node successfully started; waiting on log replay') loading_node.grep_log('Log replay complete') logger.debug('log replay complete') # final assertions validation_session = self.patient_exclusive_cql_connection(loading_node) data_in_cdc_table_after_restart = rows_to_list( validation_session.execute('SELECT * FROM ' + cdc_table_info.name) ) logger.debug('found {cdc} values in CDC table'.format( cdc=len(data_in_cdc_table_after_restart) )) # Then we assert that the CDC data that we expect to be there is there. # All data that was in CDC tables should have been copied to cdc_raw, # then used in commitlog replay, so it should be back in the cluster. assert (inserted_rows == data_in_cdc_table_after_restart), 'not all expected data selected' if self.cluster.version() >= '4.0': # Create ReplayData objects for each index file found in loading cluster loading_path = os.path.join(loading_node.get_path(), 'cdc_raw') dest_cdc_indexes = [ReplayData.load(loading_path, name) for name in os.listdir(loading_path) if name.endswith('_cdc.idx')] # Compare source replay data to dest to ensure replay process created both hard links and index files. for srd in source_cdc_indexes: # Confirm both log and index are in dest assert os.path.isfile(os.path.join(loading_path, srd.idx_name)) assert os.path.isfile(os.path.join(loading_path, srd.log_name)) # Find dest ReplayData that corresponds to the source (should be exactly 1) corresponding_dest_replay_datae = [x for x in dest_cdc_indexes if srd.idx_name == x.idx_name] assert_length_equal(corresponding_dest_replay_datae, 1) drd = corresponding_dest_replay_datae[0] # We can't compare equality on offsets since replay uses the raw file length as the written # cdc offset. We *can*, however, confirm that the offset in the replayed file is >= # the source file, ensuring clients are signaled to replay at least all the data in the # log. assert drd.offset >= srd.offset # Confirm completed flag is the same in both assert srd.completed == drd.completed # Confirm that the relationship between index files on the source # and destination looks like we expect. # First, grab the mapping between the two, make sure it's a 1-1 # mapping, and transform the dict to reflect that: src_to_dest_idx_map = { src_rd: [dest_rd for dest_rd in dest_cdc_indexes if dest_rd.idx_name == src_rd.idx_name] for src_rd in source_cdc_indexes } for src_rd, dest_rds in src_to_dest_idx_map.items(): assert_length_equal(dest_rds, 1) src_to_dest_idx_map[src_rd] = dest_rds[0] # All offsets in idx files that were copied should be >0 on the # destination node. assert ( 0 not in {i.offset for i in src_to_dest_idx_map.values()}),\ ('Found index offsets == 0 in an index file on the ' 'destination node that corresponds to an index file on the ' 'source node:\n' '{}').format(pformat(src_to_dest_idx_map)) # Offsets of all shared indexes should be >= on the destination # than on the source. for src_rd, dest_rd in src_to_dest_idx_map.items(): assert dest_rd.offset >= src_rd.offset src_to_dest_idx_map = { src_rd: [dest_rd for dest_rd in dest_cdc_indexes if dest_rd.idx_name == src_rd.idx_name] for src_rd in source_cdc_indexes } for k, v in src_to_dest_idx_map.items(): assert_length_equal(v, 1) assert k.offset >= v.offset
def short_read_test(self): """ @jira_ticket CASSANDRA-9460 """ cluster = self.cluster # Disable hinted handoff and set batch commit log so this doesn't # interfer with the test cluster.set_configuration_options(values={'hinted_handoff_enabled': False}) cluster.set_batch_commitlog(enabled=True) cluster.populate(3).start(wait_other_notice=True) node1, node2, node3 = cluster.nodelist() session = self.patient_cql_connection(node1) create_ks(session, 'ks', 3) create_cf(session, 'cf', read_repair=0.0) normal_key = 'normal' reversed_key = 'reversed' # Repeat this test 10 times to make it more easy to spot a null pointer exception caused by a race, see CASSANDRA-9460 for k in xrange(10): # insert 9 columns in two rows insert_columns(self, session, normal_key, 9) insert_columns(self, session, reversed_key, 9) # Delete 3 first columns (and 3 last columns, for the reversed version) with a different node dead each time for node, column_number_to_delete in zip(range(1, 4), range(3)): self.stop_node(node) self.delete(node, normal_key, column_number_to_delete) self.delete(node, reversed_key, 8 - column_number_to_delete) self.restart_node(node) # Query 3 firsts columns in normal order session = self.patient_cql_connection(node1, 'ks') query = SimpleStatement( 'SELECT c, v FROM cf WHERE key=\'k{}\' LIMIT 3'.format(normal_key), consistency_level=ConsistencyLevel.QUORUM) rows = list(session.execute(query)) res = rows assert_length_equal(res, 3) # value 0, 1 and 2 have been deleted for i in xrange(1, 4): self.assertEqual('value{}'.format(i + 2), res[i - 1][1]) # Query 3 firsts columns in reverse order session = self.patient_cql_connection(node1, 'ks') query = SimpleStatement( 'SELECT c, v FROM cf WHERE key=\'k{}\' ORDER BY c DESC LIMIT 3'.format(reversed_key), consistency_level=ConsistencyLevel.QUORUM) rows = list(session.execute(query)) res = rows assert_length_equal(res, 3) # value 6, 7 and 8 have been deleted for i in xrange(0, 3): self.assertEqual('value{}'.format(5 - i), res[i][1]) session.execute('TRUNCATE cf')
def query_users(self, session): ret = list(session.execute("SELECT * FROM users")) assert_length_equal(ret, 5) return ret
def short_read_test(self): """ @jira_ticket CASSANDRA-9460 """ cluster = self.cluster # Disable hinted handoff and set batch commit log so this doesn't # interfer with the test cluster.set_configuration_options( values={'hinted_handoff_enabled': False}) cluster.set_batch_commitlog(enabled=True) cluster.populate(3).start(wait_other_notice=True) node1, node2, node3 = cluster.nodelist() session = self.patient_cql_connection(node1) self.create_ks(session, 'ks', 3) self.create_cf(session, 'cf', read_repair=0.0) normal_key = 'normal' reversed_key = 'reversed' # Repeat this test 10 times to make it more easy to spot a null pointer exception caused by a race, see CASSANDRA-9460 for k in xrange(10): # insert 9 columns in two rows insert_columns(self, session, normal_key, 9) insert_columns(self, session, reversed_key, 9) # Delete 3 first columns (and 3 last columns, for the reversed version) with a different node dead each time for node, column_number_to_delete in zip(range(1, 4), range(3)): self.stop_node(node) self.delete(node, normal_key, column_number_to_delete) self.delete(node, reversed_key, 8 - column_number_to_delete) self.restart_node(node) # Query 3 firsts columns in normal order session = self.patient_cql_connection(node1, 'ks') query = SimpleStatement( 'SELECT c, v FROM cf WHERE key=\'k{}\' LIMIT 3'.format( normal_key), consistency_level=ConsistencyLevel.QUORUM) rows = list(session.execute(query)) res = rows assert_length_equal(res, 3) # value 0, 1 and 2 have been deleted for i in xrange(1, 4): self.assertEqual('value{}'.format(i + 2), res[i - 1][1]) # Query 3 firsts columns in reverse order session = self.patient_cql_connection(node1, 'ks') query = SimpleStatement( 'SELECT c, v FROM cf WHERE key=\'k{}\' ORDER BY c DESC LIMIT 3' .format(reversed_key), consistency_level=ConsistencyLevel.QUORUM) rows = list(session.execute(query)) res = rows assert_length_equal(res, 3) # value 6, 7 and 8 have been deleted for i in xrange(0, 3): self.assertEqual('value{}'.format(5 - i), res[i][1]) session.execute('TRUNCATE cf')