def readrepair_test(self): cluster = self.cluster cluster.set_configuration_options(values={ 'hinted_handoff_enabled' : False}) if DISABLE_VNODES: cluster.populate(2).start() else: tokens = cluster.balanced_tokens(2) cluster.populate(2, tokens=tokens).start() [node1, node2] = cluster.nodelist() cursor = self.patient_cql_connection(node1) self.create_ks(cursor, 'ks', 2) create_c1c2_table(self, cursor, read_repair=1.0) node2.stop(wait_other_notice=True) for n in xrange(0, 10000): insert_c1c2(cursor, n, ConsistencyLevel.ONE) node2.start(wait_other_notice=True) # query everything to cause RR for n in xrange(0, 10000): query_c1c2(cursor, n, ConsistencyLevel.QUORUM) node1.stop(wait_other_notice=True) # Check node2 for all the keys that should have been repaired cursor = self.patient_cql_connection(node2, keyspace='ks') for n in xrange(0, 10000): query_c1c2(cursor, n, ConsistencyLevel.ONE)
def hintedhandoff_test(self): cluster = self.cluster if DISABLE_VNODES: cluster.populate(2).start() else: tokens = cluster.balanced_tokens(2) cluster.populate(2, tokens=tokens).start() [node1, node2] = cluster.nodelist() cursor = self.patient_cql_connection(node1) self.create_ks(cursor, 'ks', 2) create_c1c2_table(self, cursor) node2.stop(wait_other_notice=True) for n in xrange(0, 100): insert_c1c2(cursor, n, ConsistencyLevel.ONE) log_mark = node1.mark_log() node2.start() node1.watch_log_for(["Finished hinted"], from_mark=log_mark, timeout=120) node1.stop(wait_other_notice=True) # Check node2 for all the keys that should have been delivered via HH cursor = self.patient_cql_connection(node2, keyspace='ks') for n in xrange(0, 100): query_c1c2(cursor, n, ConsistencyLevel.ONE)
def readrepair_test(self): cluster = self.cluster cluster.set_configuration_options( values={'hinted_handoff_enabled': False}) if DISABLE_VNODES: cluster.populate(2).start() else: tokens = cluster.balanced_tokens(2) cluster.populate(2, tokens=tokens).start() [node1, node2] = cluster.nodelist() cursor = self.patient_cql_connection(node1) self.create_ks(cursor, 'ks', 2) create_c1c2_table(self, cursor, read_repair=1.0) node2.stop(wait_other_notice=True) for n in xrange(0, 10000): insert_c1c2(cursor, n, ConsistencyLevel.ONE) node2.start(wait_other_notice=True) # query everything to cause RR for n in xrange(0, 10000): query_c1c2(cursor, n, ConsistencyLevel.QUORUM) node1.stop(wait_other_notice=True) # Check node2 for all the keys that should have been repaired cursor = self.patient_cql_connection(node2, keyspace='ks') for n in xrange(0, 10000): query_c1c2(cursor, n, ConsistencyLevel.ONE)
def move_single_node_test(self): """ Test moving a node in a single-node cluster (#4200) """ cluster = self.cluster # Create an unbalanced ring cluster.populate(1, tokens=[0]).start() node1 = cluster.nodelist()[0] time.sleep(0.2) cursor = self.patient_cql_connection(node1) self.create_ks(cursor, 'ks', 1) self.create_cf(cursor, 'cf', columns={'c1': 'text', 'c2': 'text'}) for n in xrange(0, 10000): insert_c1c2(cursor, n, ConsistencyLevel.ONE) cluster.flush() node1.move(2**25) time.sleep(1) cluster.cleanup() # Check we can get all the keys for n in xrange(0, 10000): query_c1c2(cursor, n, ConsistencyLevel.ONE)
def hintedhandoff_test(self): cluster = self.cluster if DISABLE_VNODES: cluster.populate(2).start() else: tokens = cluster.balanced_tokens(2) cluster.populate(2, tokens=tokens).start() [node1, node2] = cluster.nodelist() cursor = self.patient_cql_connection(node1) self.create_ks(cursor, 'ks', 2) create_c1c2_table(self, cursor) node2.stop(wait_other_notice=True) for n in xrange(0, 100): insert_c1c2(cursor, n, ConsistencyLevel.ONE) log_mark = node1.mark_log() node2.start() node1.watch_log_for(["Finished hinted"], from_mark=log_mark, timeout=90) node1.stop(wait_other_notice=True) # Check node2 for all the keys that should have been delivered via HH cursor = self.patient_cql_connection(node2, keyspace='ks') for n in xrange(0, 100): query_c1c2(cursor, n, ConsistencyLevel.ONE)
def quorum_available_during_failure_test(self): CL = ConsistencyLevel.QUORUM RF = 3 debug("Creating a ring") cluster = self.cluster if DISABLE_VNODES: cluster.populate(3).start() else: tokens = cluster.balanced_tokens(3) cluster.populate(3, tokens=tokens).start() [node1, node2, node3] = cluster.nodelist() cluster.start() debug("Set to talk to node 2") cursor = self.patient_cql_connection(node2) self.create_ks(cursor, 'ks', RF) create_c1c2_table(self, cursor) debug("Generating some data") for n in xrange(100): insert_c1c2(cursor, n, CL) debug("Taking down node1") node1.stop(wait_other_notice=True) debug("Reading back data.") for n in xrange(100): query_c1c2(cursor, n, CL)
def quorum_quorum_test(self): session, session2 = self.cl_cl_prepare(ConsistencyLevel.QUORUM, ConsistencyLevel.QUORUM) #Stop a node and retest self.cluster.nodelist()[2].stop() for n in xrange(0, 100): insert_c1c2(session, n, ConsistencyLevel.QUORUM) query_c1c2(session2, n, ConsistencyLevel.QUORUM) self.cluster.nodelist()[1].stop() assert_unavailable(insert_c1c2, session, 100, ConsistencyLevel.QUORUM)
def one_all_test(self): session, session2 = self.cl_cl_prepare(ConsistencyLevel.ONE, ConsistencyLevel.ALL) #Stop a node and retest self.cluster.nodelist()[2].stop() for n in xrange(0, 100): insert_c1c2(session, n, ConsistencyLevel.ONE) assert_unavailable(query_c1c2, session2, 100, ConsistencyLevel.ALL) #Stop a node and retest self.cluster.nodelist()[1].stop() for n in xrange(0, 100): insert_c1c2(session, n, ConsistencyLevel.ONE) assert_unavailable(query_c1c2, session2, 100, ConsistencyLevel.ALL)
def non_local_read_test(self): """ This test reads from a coordinator we know has no copy of the data """ cluster = self.cluster cluster.populate(3).start() [node1, node2, node3] = cluster.nodelist() cursor = self.patient_cql_connection(node1) self.create_ks(cursor, 'ks', 2) create_c1c2_table(self, cursor) # insert and get at CL.QUORUM (since RF=2, node1 won't have all key locally) for n in xrange(0, 1000): tools.insert_c1c2(cursor, n, ConsistencyLevel.QUORUM) tools.query_c1c2(cursor, n, ConsistencyLevel.QUORUM)
def one_one_test(self): session, session2 = self.cl_cl_prepare(ConsistencyLevel.ONE, ConsistencyLevel.ONE) #Stop a node and retest self.cluster.nodelist()[2].stop() for n in xrange(0, 100): insert_c1c2(session, n, ConsistencyLevel.ONE) query_c1c2(session2, n, ConsistencyLevel.ONE) #Stop a node and retest self.cluster.nodelist()[1].stop() for n in xrange(0, 100): insert_c1c2(session, n, ConsistencyLevel.ONE) query_c1c2(session2, n, ConsistencyLevel.ONE)
def cl_cl_prepare(self, write_cl, read_cl): cluster = self.cluster cluster.populate(3).start() node1, node2, node3 = cluster.nodelist() session = self.patient_cql_connection(node1) self.create_ks(session, 'ks', 3) create_c1c2_table(self, session) session2 = self.patient_cql_connection(node2, 'ks') # insert and get at CL.QUORUM for n in xrange(0, 100): insert_c1c2(session, n, write_cl) query_c1c2(session2, n, read_cl) return session, session2
def consistent_reads_after_bootstrap_test(self): debug("Creating a ring") cluster = self.cluster cluster.set_configuration_options(values={ 'hinted_handoff_enabled' : False, 'write_request_timeout_in_ms' : 60000, 'read_request_timeout_in_ms' : 60000, 'dynamic_snitch_badness_threshold' : 0.0}, batch_commitlog=True) cluster.populate(2).start() [node1, node2] = cluster.nodelist() cluster.start() debug("Set to talk to node 2") n2cursor = self.patient_cql_connection(node2) self.create_ks(n2cursor, 'ks', 2) create_c1c2_table(self, n2cursor) debug("Generating some data for all nodes") for n in xrange(10,20): insert_c1c2(n2cursor, n, ConsistencyLevel.ALL) node1.flush() debug("Taking down node1") node1.stop(wait_other_notice=True) debug("Writing data to only node2") for n in xrange(30,1000): insert_c1c2(n2cursor, n, ConsistencyLevel.ONE) node2.flush() debug("Restart node1") node1.start(wait_other_notice=True) debug("Boostraping node3") node3 = new_node(cluster) node3.start() n3cursor = self.patient_cql_connection(node3) n3cursor.execute("USE ks"); debug("Checking that no data was lost") for n in xrange(10,20): query_c1c2(n3cursor, n, ConsistencyLevel.ALL) for n in xrange(30,1000): query_c1c2(n3cursor, n, ConsistencyLevel.ALL)
def movement_test(self): cluster = self.cluster # Create an unbalanced ring cluster.populate(3, tokens=[0, 2**48, 2**62]).start() [node1, node2, node3] = cluster.nodelist() cursor = self.patient_cql_connection(node1) self.create_ks(cursor, 'ks', 1) self.create_cf(cursor, 'cf', columns={'c1': 'text', 'c2': 'text'}) for n in xrange(0, 10000): insert_c1c2(cursor, n, ConsistencyLevel.ONE) cluster.flush() # Move nodes to balance the cluster balancing_tokens = cluster.balanced_tokens(3) escformat = '\\%s' if cluster.version() >= '2.1': escformat = '%s' node1.move(escformat % balancing_tokens[0]) # can't assume 0 is balanced with m3p node2.move(escformat % balancing_tokens[1]) node3.move(escformat % balancing_tokens[2]) time.sleep(1) cluster.cleanup() # Check we can get all the keys for n in xrange(0, 10000): query_c1c2(cursor, n, ConsistencyLevel.ONE) # Now the load should be basically even sizes = [node.data_size() for node in [node1, node2, node3]] assert_almost_equal(sizes[0], sizes[1]) assert_almost_equal(sizes[0], sizes[2]) assert_almost_equal(sizes[1], sizes[2])
def tracing_from_system_traces_test(self): self.cluster.populate(1).start() node1, = self.cluster.nodelist() node1.watch_log_for('thrift clients...') session = self.patient_cql_connection(node1) self.create_ks(session, 'ks', 1) create_c1c2_table(self, session) for n in xrange(100): insert_c1c2(session, n) out, err = self.run_cqlsh(node1, 'TRACING ON; SELECT * FROM ks.cf') self.assertIn('Tracing session: ', out) out, err = self.run_cqlsh(node1, 'TRACING ON; SELECT * FROM system_traces.events') self.assertNotIn('Tracing session: ', out) out, err = self.run_cqlsh(node1, 'TRACING ON; SELECT * FROM system_traces.sessions') self.assertNotIn('Tracing session: ', out)
def simple_bootstrap_test(self): cluster = self.cluster tokens = cluster.balanced_tokens(2) keys = 10000 # Create a single node cluster cluster.populate(1, tokens=[tokens[0]]).start(wait_other_notice=True) node1 = cluster.nodes["node1"] session = self.patient_cql_connection(node1) self.create_ks(session, 'ks', 1) self.create_cf(session, 'cf', columns={'c1': 'text', 'c2': 'text'}) for n in xrange(0, keys): insert_c1c2(session, n, ConsistencyLevel.ONE) node1.flush() initial_size = node1.data_size() # Reads inserted data all during the boostrap process. We shouldn't # get any error reader = self.go(lambda _: query_c1c2( session, random.randint(0, keys - 1), ConsistencyLevel.ONE)) # Boostraping a new node node2 = new_node(cluster, token=tokens[1]) node2.start() node2.watch_log_for("Listening for thrift clients...") reader.check() node1.cleanup() time.sleep(.5) reader.check() size1 = node1.data_size() size2 = node2.data_size() assert_almost_equal(size1, size2, error=0.3) assert_almost_equal(initial_size, 2 * size1)
def simple_bootstrap_test(self): cluster = self.cluster tokens = cluster.balanced_tokens(2) keys = 10000 # Create a single node cluster cluster.populate(1, tokens=[tokens[0]]).start(wait_other_notice=True) node1 = cluster.nodes["node1"] session = self.patient_cql_connection(node1) self.create_ks(session, 'ks', 1) self.create_cf(session, 'cf', columns={ 'c1' : 'text', 'c2' : 'text' }) for n in xrange(0, keys): insert_c1c2(session, n, ConsistencyLevel.ONE) node1.flush() initial_size = node1.data_size() # Reads inserted data all during the boostrap process. We shouldn't # get any error reader = self.go(lambda _: query_c1c2(session, random.randint(0, keys-1), ConsistencyLevel.ONE)) # Boostraping a new node node2 = new_node(cluster, token=tokens[1]) node2.start() node2.watch_log_for("Listening for thrift clients...") reader.check() node1.cleanup() time.sleep(.5) reader.check() size1 = node1.data_size() size2 = node2.data_size() assert_almost_equal(size1, size2, error=0.3) assert_almost_equal(initial_size, 2 * size1)
def movement_test(self): cluster = self.cluster # Create an unbalanced ring cluster.populate(3, tokens=[0, 2**48, 2**62]).start() [node1, node2, node3] = cluster.nodelist() cursor = self.patient_cql_connection(node1) self.create_ks(cursor, 'ks', 1) self.create_cf(cursor, 'cf', columns={'c1': 'text', 'c2': 'text'}) for n in xrange(0, 10000): insert_c1c2(cursor, n, ConsistencyLevel.ONE) cluster.flush() # Move nodes to balance the cluster balancing_tokens = cluster.balanced_tokens(3) escformat = '\\%s' if cluster.version() >= '2.1': escformat = '%s' node1.move(escformat % balancing_tokens[0]) # can't assume 0 is balanced with m3p node2.move(escformat % balancing_tokens[1]) node3.move(escformat % balancing_tokens[2]) time.sleep(1) cluster.cleanup() # Check we can get all the keys for n in xrange(0, 10000): query_c1c2(cursor, n, ConsistencyLevel.ONE) # Now the load should be basically even sizes = [ node.data_size() for node in [node1, node2, node3] ] assert_almost_equal(sizes[0], sizes[1]) assert_almost_equal(sizes[0], sizes[2]) assert_almost_equal(sizes[1], sizes[2])
def tracing_from_system_traces_test(self): self.cluster.populate(1).start() node1, = self.cluster.nodelist() node1.watch_log_for('thrift clients...') session = self.patient_cql_connection(node1) self.create_ks(session, 'ks', 1) create_c1c2_table(self, session) for n in xrange(100): insert_c1c2(session, n) out, err = self.run_cqlsh(node1, 'TRACING ON; SELECT * FROM ks.cf') self.assertIn('Tracing session: ', out) out, err = self.run_cqlsh( node1, 'TRACING ON; SELECT * FROM system_traces.events') self.assertNotIn('Tracing session: ', out) out, err = self.run_cqlsh( node1, 'TRACING ON; SELECT * FROM system_traces.sessions') self.assertNotIn('Tracing session: ', out)
def decomission_test(self): cluster = self.cluster tokens = cluster.balanced_tokens(4) cluster.populate(4, tokens=tokens).start() [node1, node2, node3, node4] = cluster.nodelist() cursor = self.patient_cql_connection(node1) self.create_ks(cursor, 'ks', 2) self.create_cf(cursor, 'cf',columns={'c1': 'text', 'c2': 'text'}) for n in xrange(0, 10000): insert_c1c2(cursor, n, ConsistencyLevel.QUORUM) cluster.flush() sizes = [ node.data_size() for node in cluster.nodelist() if node.is_running()] init_size = sizes[0] assert_almost_equal(*sizes) time.sleep(.5) node4.decommission() node4.stop() cluster.cleanup() time.sleep(.5) # Check we can get all the keys for n in xrange(0, 10000): query_c1c2(cursor, n, ConsistencyLevel.QUORUM) sizes = [ node.data_size() for node in cluster.nodelist() if node.is_running() ] three_node_sizes = sizes assert_almost_equal(sizes[0], sizes[1]) assert_almost_equal((2.0/3.0) * sizes[0], sizes[2]) assert_almost_equal(sizes[2], init_size) if cluster.version() <= '1.2': node3.stop(wait_other_notice=True) node1.removeToken(tokens[2]) time.sleep(.5) cluster.cleanup() time.sleep(.5) # Check we can get all the keys for n in xrange(0, 10000): query_c1c2(cursor, n, ConsistencyLevel.QUORUM) sizes = [ node.data_size() for node in cluster.nodelist() if node.is_running() ] assert_almost_equal(*sizes) assert_almost_equal(sizes[0], 2 * init_size) node5 = new_node(cluster, token=(tokens[2]+1)).start() time.sleep(.5) cluster.cleanup() time.sleep(.5) cluster.compact() time.sleep(.5) # Check we can get all the keys for n in xrange(0, 10000): query_c1c2(cursor, n, ConsistencyLevel.QUORUM) sizes = [ node.data_size() for node in cluster.nodelist() if node.is_running() ] # We should be back to the earlir 3 nodes situation for i in xrange(0, len(sizes)): assert_almost_equal(sizes[i], three_node_sizes[i])
def _simple_repair(self, order_preserving_partitioner=False): cluster = self.cluster if order_preserving_partitioner: cluster.set_partitioner( 'org.apache.cassandra.dht.ByteOrderedPartitioner') # Disable hinted handoff and set batch commit log so this doesn't # interfer with the test (this must be after the populate) cluster.set_configuration_options( values={'hinted_handoff_enabled': False}, batch_commitlog=True) debug("Starting cluster..") cluster.populate(3).start() [node1, node2, node3] = cluster.nodelist() cursor = self.patient_cql_connection(node1) self.create_ks(cursor, 'ks', 3) self.create_cf(cursor, 'cf', read_repair=0.0, columns={ 'c1': 'text', 'c2': 'text' }) # Insert 1000 keys, kill node 3, insert 1 key, restart node 3, insert 1000 more keys debug("Inserting data...") for i in xrange(0, 1000): insert_c1c2(cursor, i, ConsistencyLevel.ALL) node3.flush() node3.stop() insert_c1c2(cursor, 1000, ConsistencyLevel.TWO) node3.start(wait_other_notice=True) for i in xrange(1001, 2001): insert_c1c2(cursor, i, ConsistencyLevel.ALL) cluster.flush() # Verify that node3 has only 2000 keys debug("Checking data on node3...") self.check_rows_on_node(node3, 2000, missings=[1000]) # Verify that node1 has 2001 keys debug("Checking data on node1...") self.check_rows_on_node(node1, 2001, found=[1000]) # Verify that node2 has 2001 keys debug("Checking data on node2...") self.check_rows_on_node(node2, 2001, found=[1000]) time.sleep(10) # see CASSANDRA-4373 # Run repair start = time.time() debug("starting repair...") node1.repair() debug("Repair time: {end}".format(end=time.time() - start)) # Validate that only one range was transfered l = node1.grep_log( "/([0-9.]+) and /([0-9.]+) have ([0-9]+) range\(s\) out of sync") if cluster.version() > "1": assert len(l) == 2, "Lines matching: " + str([elt[0] for elt in l]) else: # In pre-1.0, we should have only one line assert len(l) == 1, "Lines matching: " + str([elt[0] for elt in l]) valid = [(node1.address(), node3.address()), (node3.address(), node1.address()), (node2.address(), node3.address()), (node3.address(), node2.address())] for line, m in l: assert int( m.group(3)) == 1, "Expecting 1 range out of sync, got " + int( m.group(1)) assert (m.group(1), m.group(2)) in valid, str( (m.group(1), m.group(2))) valid.remove((m.group(1), m.group(2))) valid.remove((m.group(2), m.group(1))) # Check node3 now has the key self.check_rows_on_node(node3, 2001, found=[1000], restart=False)
def decomission_test(self): cluster = self.cluster tokens = cluster.balanced_tokens(4) cluster.populate(4, tokens=tokens).start() [node1, node2, node3, node4] = cluster.nodelist() cursor = self.patient_cql_connection(node1) self.create_ks(cursor, 'ks', 2) self.create_cf(cursor, 'cf', columns={'c1': 'text', 'c2': 'text'}) for n in xrange(0, 10000): insert_c1c2(cursor, n, ConsistencyLevel.QUORUM) cluster.flush() sizes = [ node.data_size() for node in cluster.nodelist() if node.is_running() ] init_size = sizes[0] assert_almost_equal(*sizes) time.sleep(.5) node4.decommission() node4.stop() cluster.cleanup() time.sleep(.5) # Check we can get all the keys for n in xrange(0, 10000): query_c1c2(cursor, n, ConsistencyLevel.QUORUM) sizes = [ node.data_size() for node in cluster.nodelist() if node.is_running() ] three_node_sizes = sizes assert_almost_equal(sizes[0], sizes[1]) assert_almost_equal((2.0 / 3.0) * sizes[0], sizes[2]) assert_almost_equal(sizes[2], init_size) if cluster.version() <= '1.2': node3.stop(wait_other_notice=True) node1.removeToken(tokens[2]) time.sleep(.5) cluster.cleanup() time.sleep(.5) # Check we can get all the keys for n in xrange(0, 10000): query_c1c2(cursor, n, ConsistencyLevel.QUORUM) sizes = [ node.data_size() for node in cluster.nodelist() if node.is_running() ] assert_almost_equal(*sizes) assert_almost_equal(sizes[0], 2 * init_size) node5 = new_node(cluster, token=(tokens[2] + 1)).start() time.sleep(.5) cluster.cleanup() time.sleep(.5) cluster.compact() time.sleep(.5) # Check we can get all the keys for n in xrange(0, 10000): query_c1c2(cursor, n, ConsistencyLevel.QUORUM) sizes = [ node.data_size() for node in cluster.nodelist() if node.is_running() ] # We should be back to the earlir 3 nodes situation for i in xrange(0, len(sizes)): assert_almost_equal(sizes[i], three_node_sizes[i])
def _simple_repair(self, order_preserving_partitioner=False): cluster = self.cluster if order_preserving_partitioner: cluster.set_partitioner('org.apache.cassandra.dht.ByteOrderedPartitioner') # Disable hinted handoff and set batch commit log so this doesn't # interfer with the test (this must be after the populate) cluster.set_configuration_options(values={ 'hinted_handoff_enabled' : False}, batch_commitlog=True) debug("Starting cluster..") cluster.populate(3).start() [node1, node2, node3] = cluster.nodelist() cursor = self.patient_cql_connection(node1) self.create_ks(cursor, 'ks', 3) self.create_cf(cursor, 'cf', read_repair=0.0, columns={'c1': 'text', 'c2': 'text'}) # Insert 1000 keys, kill node 3, insert 1 key, restart node 3, insert 1000 more keys debug("Inserting data...") for i in xrange(0, 1000): insert_c1c2(cursor, i, ConsistencyLevel.ALL) node3.flush() node3.stop() insert_c1c2(cursor, 1000, ConsistencyLevel.TWO) node3.start(wait_other_notice=True) for i in xrange(1001, 2001): insert_c1c2(cursor, i, ConsistencyLevel.ALL) cluster.flush() # Verify that node3 has only 2000 keys debug("Checking data on node3...") self.check_rows_on_node(node3, 2000, missings=[1000]) # Verify that node1 has 2001 keys debug("Checking data on node1...") self.check_rows_on_node(node1, 2001, found=[1000]) # Verify that node2 has 2001 keys debug("Checking data on node2...") self.check_rows_on_node(node2, 2001, found=[1000]) time.sleep(10) # see CASSANDRA-4373 # Run repair start = time.time() debug("starting repair...") node1.repair() debug("Repair time: {end}".format(end=time.time() - start)) # Validate that only one range was transfered l = node1.grep_log("/([0-9.]+) and /([0-9.]+) have ([0-9]+) range\(s\) out of sync") if cluster.version() > "1": assert len(l) == 2, "Lines matching: " + str([elt[0] for elt in l]) else: # In pre-1.0, we should have only one line assert len(l) == 1, "Lines matching: " + str([elt[0] for elt in l]) valid = [(node1.address(), node3.address()), (node3.address(), node1.address()), (node2.address(), node3.address()), (node3.address(), node2.address())] for line, m in l: assert int(m.group(3)) == 1, "Expecting 1 range out of sync, got " + int(m.group(1)) assert (m.group(1), m.group(2)) in valid, str((m.group(1), m.group(2))) valid.remove((m.group(1), m.group(2))) valid.remove((m.group(2), m.group(1))) # Check node3 now has the key self.check_rows_on_node(node3, 2001, found=[1000], restart=False)
def repair_compaction_fine_test(self): """Check that we do not stream data for repairs past the last repair. Check cases i) node goes down, data inserted, node comes up run repair ii) after case i, do similar with a different node down (since compaction has seperated repaired) iii) check that repair works appropriately where a new node is replacing """ cluster = self.cluster cluster.set_configuration_options(values={ 'hinted_handoff_enabled': False, 'auto_snapshot': False }, batch_commitlog=False) cluster.populate(3).start() [node1, node2, node3] = cluster.nodelist() cursor = self.patient_cql_connection(node1) self.create_ks(cursor, 'ks', 3) self.create_cf(cursor, 'cf', read_repair=0.0, columns={ 'c1': 'text', 'c2': 'text' }) debug("insert data into all") for x in range(1, 5): insert_c1c2(cursor, x, ConsistencyLevel.ALL) node1.flush() debug("bringing down node 3") node3.flush() node3.stop(gently=False) debug("inserting additional data into node 1 and 2") for y in range(5, 10): insert_c1c2(cursor, y, ConsistencyLevel.TWO) node1.flush() node2.flush() debug("restarting and repairing node 3") node3.start() node3.repair() sstableNode1 = node3.grep_log( "reading file from /127.0.0.1, repairedAt = 0") sstableNode2 = node3.grep_log( "reading file from /127.0.0.2, repairedAt = 0") catchBadSSReads = node3.grep_log( "reading file from .* repairedAt = ([1-9])") self.assertGreaterEqual(len(sstableNode1), 1) self.assertGreaterEqual(len(sstableNode2), 1) self.assertLess(len(catchBadSSReads), 1) debug("stopping node 2") node2.stop(gently=False) debug("inserting data in nodes 1 and 3") for z in range(10, 15): insert_c1c2(cursor, z, ConsistencyLevel.TWO) node1.flush() debug("start and repair node 2") node2.flush() node2.start() node2.repair() fileFromNode1 = node2.grep_log( "reading file from /127.0.0.1, repairedAt = 0") fileFromNode3 = node2.grep_log( "reading file from /127.0.0.3, repairedAt = 0") catchBadReads = node2.grep_log( "reading file from .* repairedAt = ([1-9])") self.assertGreaterEqual(len(fileFromNode1), 1) self.assertGreaterEqual(len(fileFromNode3), 1) self.assertLess(len(catchBadReads), 1) node4 = Node('node4', cluster, True, ('127.0.0.4', 9160), ('127.0.0.4', 7000), '7400', '0', None, ('127.0.0.4', 9042)) node4.start() debug("replace node and check repair-like process") node3.stop(gently=False) node5 = Node('node5', cluster, True, ('127.0.0.5', 9160), ('127.0.0.5', 7000), '7500', '0', None, ('127.0.0.5', 9042)) cluster.add(node5, False) node5.start(replace_address='127.0.0.3', wait_other_notice=True) fileRead = node5.grep_log("reading file from .*, repairedAt = 0") self.assertGreaterEqual(len(fileRead), 1) # additionally should see 14 distinct keys in data(this prints to command line) debug((node2.run_sstable2json())) rows = cursor.execute("SELECT COUNT(*) FROM ks.cf LIMIT 100") results = rows[0] debug(results) self.assertEqual(results[0], 14)
def repair_compaction_fine_test(self): """Check that we do not stream data for repairs past the last repair. Check cases i) node goes down, data inserted, node comes up run repair ii) after case i, do similar with a different node down (since compaction has seperated repaired) iii) check that repair works appropriately where a new node is replacing """ cluster = self.cluster cluster.set_configuration_options(values={ 'hinted_handoff_enabled' : False, 'auto_snapshot': False}, batch_commitlog=False) cluster.populate(3).start() [node1, node2, node3] = cluster.nodelist() cursor = self.patient_cql_connection(node1) self.create_ks(cursor, 'ks', 3) self.create_cf(cursor, 'cf', read_repair=0.0, columns={'c1': 'text', 'c2': 'text'}) debug("insert data into all") for x in range(1, 5): insert_c1c2(cursor, x, ConsistencyLevel.ALL) node1.flush() debug("bringing down node 3") node3.flush() node3.stop(gently=False) debug("inserting additional data into node 1 and 2") for y in range(5, 10): insert_c1c2(cursor, y, ConsistencyLevel.TWO) node1.flush() node2.flush() debug("restarting and repairing node 3") node3.start() node3.repair() sstableNode1 = node3.grep_log("reading file from /127.0.0.1, repairedAt = 0") sstableNode2 = node3.grep_log("reading file from /127.0.0.2, repairedAt = 0") catchBadSSReads = node3.grep_log("reading file from .* repairedAt = ([1-9])") self.assertGreaterEqual(len(sstableNode1), 1) self.assertGreaterEqual(len(sstableNode2), 1) self.assertLess(len(catchBadSSReads), 1) debug("stopping node 2") node2.stop(gently=False) debug("inserting data in nodes 1 and 3") for z in range(10, 15): insert_c1c2(cursor, z, ConsistencyLevel.TWO) node1.flush() debug("start and repair node 2") node2.flush() node2.start() node2.repair() fileFromNode1 = node2.grep_log("reading file from /127.0.0.1, repairedAt = 0") fileFromNode3 = node2.grep_log("reading file from /127.0.0.3, repairedAt = 0") catchBadReads = node2.grep_log("reading file from .* repairedAt = ([1-9])") self.assertGreaterEqual(len(fileFromNode1), 1) self.assertGreaterEqual(len(fileFromNode3), 1) self.assertLess(len(catchBadReads), 1) node4 = Node('node4', cluster, True, ('127.0.0.4', 9160), ('127.0.0.4', 7000), '7400', '0', None, ('127.0.0.4',9042)) node4.start() debug("replace node and check repair-like process") node3.stop(gently=False) node5 = Node('node5', cluster, True, ('127.0.0.5', 9160), ('127.0.0.5', 7000), '7500', '0', None, ('127.0.0.5',9042)) cluster.add(node5, False) node5.start(replace_address = '127.0.0.3', wait_other_notice=True) fileRead = node5.grep_log("reading file from .*, repairedAt = 0") self.assertGreaterEqual(len(fileRead), 1) #additionally should see 14 distinct keys in data(this prints to command line) debug((node2.run_sstable2json())) rows = cursor.execute("SELECT COUNT(*) FROM ks.cf LIMIT 100") results = rows[0] debug(results) self.assertEqual(results[0], 14)