def create_ks(session, name, rf): query = 'CREATE KEYSPACE %s WITH replication={%s}' if isinstance(rf, int): # we assume simpleStrategy query = query % ( name, "'class':'SimpleStrategy', 'replication_factor':%d" % rf) else: assert len(rf) >= 0, "At least one datacenter/rf pair is needed" # we assume networkTopologyStrategy options = (', ').join(['\'%s\':%d' % (d, r) for d, r in rf.items()]) query = query % (name, "'class':'NetworkTopologyStrategy', %s" % options) try: retry_till_success(session.execute, query=query, timeout=120, bypassed_exception=cassandra.OperationTimedOut) except cassandra.AlreadyExists: logger.warn('AlreadyExists executing create ks query \'%s\'' % query) session.cluster.control_connection.wait_for_schema_agreement(wait_time=120) #Also validates it was indeed created even though we ignored OperationTimedOut #Might happen some of the time because CircleCI disk IO is unreliable and hangs randomly session.execute('USE {}'.format(name))
def create_cf(self): cf_def = self.Cassandra.CfDef(name=self.cf_name, keyspace=self.ks_name) retry_till_success(self.client.system_add_column_family, cf_def, timeout=30) time.sleep(0.5) retry_till_success(self.wait_for_agreement, timeout=10) time.sleep(0.5) return self
def create_cf_simple(session, name, query): try: retry_till_success(session.execute, query=query, timeout=120, bypassed_exception=cassandra.OperationTimedOut) except cassandra.AlreadyExists: logger.warn('AlreadyExists executing create cf query \'%s\'' % query) session.cluster.control_connection.wait_for_schema_agreement(wait_time=120) #Going to ignore OperationTimedOut from create CF, so need to validate it was indeed created session.execute('SELECT * FROM %s LIMIT 1' % name)
def test_resumable_bootstrap(self): """ Test resuming bootstrap after data streaming failure """ cluster = self.cluster cluster.populate(2) node1 = cluster.nodes['node1'] # set up byteman node1.byteman_port = '8100' node1.import_config_files() cluster.start(wait_other_notice=True) # kill stream to node3 in the middle of streaming to let it fail if cluster.version() < '4.0': node1.byteman_submit(['./byteman/pre4.0/stream_failure.btm']) else: node1.byteman_submit(['./byteman/4.0/stream_failure.btm']) node1.stress([ 'write', 'n=1K', 'no-warmup', 'cl=TWO', '-schema', 'replication(factor=2)', '-rate', 'threads=50' ]) cluster.flush() # start bootstrapping node3 and wait for streaming node3 = new_node(cluster) node3.start(wait_other_notice=False, wait_for_binary_proto=True) # wait for node3 ready to query node3.watch_log_for("Starting listening for CQL clients") mark = node3.mark_log() # check if node3 is still in bootstrap mode retry_till_success(assert_bootstrap_state, tester=self, node=node3, expected_bootstrap_state='IN_PROGRESS', timeout=120) # bring back node1 and invoke nodetool bootstrap to resume bootstrapping node3.nodetool('bootstrap resume') node3.watch_log_for("Resume complete", from_mark=mark) assert_bootstrap_state(self, node3, 'COMPLETED') # cleanup to guarantee each node will only have sstables of its ranges cluster.cleanup() logger.debug("Check data is present") # Let's check stream bootstrap completely transferred data stdout, stderr, _ = node3.stress([ 'read', 'n=1k', 'no-warmup', '-schema', 'replication(factor=2)', '-rate', 'threads=8' ]) if stdout is not None: assert "FAILURE" not in stdout
def test_read_repair_chance(self): """ @jira_ticket CASSANDRA-12368 """ # session is only used to setup & do schema modification. Actual data queries are done directly on # each node, using an exclusive connection and CL.ONE session = self.patient_cql_connection(self.cluster.nodelist()[0]) initial_replica, non_replicas = self.do_initial_setup(session) # To ensure read repairs are triggered, set the table property to 100% logger.debug("Setting table read repair chance to 1") session.execute("""ALTER TABLE alter_rf_test.t1 WITH read_repair_chance = 1;""") # Execute a query at CL.ONE on one of the nodes which was *not* the initial replica. It should trigger a # read repair because read_repair_chance == 1, and propagate the data to all 3 nodes. # Note: result of the read repair contains only the selected column (a), not all columns, so we won't expect # 'b' to have been fully repaired afterwards. logger.debug("Executing 'SELECT a...' on non-initial replica to trigger read repair " + non_replicas[0].name) read_repair_session = self.patient_exclusive_cql_connection(non_replicas[0]) read_repair_session.execute(SimpleStatement("SELECT a FROM alter_rf_test.t1 WHERE k=1", consistency_level=ConsistencyLevel.ONE)) # Query each replica individually to ensure that read repair was triggered. We should expect that only # the initial replica has data for both the 'a' and 'b' columns. If the cluster is on > 3.4, the read repair # should only have affected the selected column (CASSANDRA-10655), so the other two replicas should only have # that data. # Note: we need to temporarily set read_repair_chance to 0 while we perform this check. logger.debug("Setting table read repair chance to 0 while we verify each replica's data") session.execute("""ALTER TABLE alter_rf_test.t1 WITH read_repair_chance = 0;""") # The read repair is run in the background, so we spin while checking that the repair has completed value_skipping_disabled = True if self.cluster.version() < '3.4' else False retry_till_success(self.check_data_on_each_replica, expect_fully_repaired=value_skipping_disabled, initial_replica=initial_replica, timeout=30, bypassed_exception=NotRepairedException) # Re-enable global read repair and perform another query on a non-replica. This time the query selects all # columns so we also expect the value for 'b' to be repaired. logger.debug("Setting table read repair chance to 1") session.execute("""ALTER TABLE alter_rf_test.t1 WITH read_repair_chance = 1;""") logger.debug("Executing 'SELECT *...' on non-initial replica to trigger read repair " + non_replicas[0].name) read_repair_session = self.patient_exclusive_cql_connection(non_replicas[0]) read_repair_session.execute(SimpleStatement("SELECT * FROM alter_rf_test.t1 WHERE k=1", consistency_level=ConsistencyLevel.ONE)) # Query each replica again to ensure that second read repair was triggered. This time, we expect the # data to be fully repaired (both 'a' and 'b' columns) by virtue of the query being 'SELECT *...' # As before, we turn off read repair before doing this check. logger.debug("Setting table read repair chance to 0 while we verify each replica's data") session.execute("""ALTER TABLE alter_rf_test.t1 WITH read_repair_chance = 0;""") retry_till_success(self.check_data_on_each_replica, expect_fully_repaired=True, initial_replica=initial_replica, timeout=30, bypassed_exception=NotRepairedException)
def insert_columns(self, num_rows=10, consistency_level='QUORUM'): """ Insert some basic values """ cf_parent = self.Cassandra.ColumnParent(column_family=self.cf_name) for row_key in ('row_%d' % i for i in range(num_rows)): col = self.Cassandra.Column(name='col_0', value='val_0', timestamp=int(time.time() * 1000)) retry_till_success(self.client.insert, key=row_key, column_parent=cf_parent, column=col, consistency_level=self._translate_cl(consistency_level), timeout=30) return self
def create_cf(session, name, key_type="varchar", speculative_retry=None, read_repair=None, compression=None, gc_grace=None, columns=None, validation="UTF8Type", compact_storage=False, compaction_strategy='SizeTieredCompactionStrategy', primary_key=None, clustering=None): compaction_fragment = "compaction = {'class': '%s', 'enabled': 'true'}" if compaction_strategy == '': compaction_fragment = compaction_fragment % 'SizeTieredCompactionStrategy' else: compaction_fragment = compaction_fragment % compaction_strategy additional_columns = "" if columns is not None: for k, v in list(columns.items()): additional_columns = "{}, {} {}".format(additional_columns, k, v) if additional_columns == "": query = 'CREATE COLUMNFAMILY %s (key %s, c varchar, v varchar, PRIMARY KEY(key, c)) WITH comment=\'test cf\'' % (name, key_type) else: if primary_key: query = 'CREATE COLUMNFAMILY %s (key %s%s, PRIMARY KEY(%s)) WITH comment=\'test cf\'' % (name, key_type, additional_columns, primary_key) else: query = 'CREATE COLUMNFAMILY %s (key %s PRIMARY KEY%s) WITH comment=\'test cf\'' % (name, key_type, additional_columns) if compaction_fragment is not None: query = '%s AND %s' % (query, compaction_fragment) if clustering: query = '%s AND CLUSTERING ORDER BY (%s)' % (query, clustering) if compression is not None: query = '%s AND compression = { \'sstable_compression\': \'%sCompressor\' }' % (query, compression) else: # if a compression option is omitted, C* will default to lz4 compression query += ' AND compression = {}' if read_repair is not None: query = '%s AND read_repair_chance=%f AND dclocal_read_repair_chance=%f' % (query, read_repair, read_repair) if gc_grace is not None: query = '%s AND gc_grace_seconds=%d' % (query, gc_grace) if speculative_retry is not None: query = '%s AND speculative_retry=\'%s\'' % (query, speculative_retry) if compact_storage: query += ' AND COMPACT STORAGE' try: retry_till_success(session.execute, query=query, timeout=120, bypassed_exception=cassandra.OperationTimedOut) except cassandra.AlreadyExists: logger.warn('AlreadyExists executing create cf query \'%s\'' % query) session.cluster.control_connection.wait_for_schema_agreement(wait_time=120) #Going to ignore OperationTimedOut from create CF, so need to validate it was indeed created session.execute('SELECT * FROM %s LIMIT 1' % name);
def test_resumable_bootstrap(self): """ Test resuming bootstrap after data streaming failure """ cluster = self.cluster cluster.populate(2) node1 = cluster.nodes['node1'] # set up byteman node1.byteman_port = '8100' node1.import_config_files() cluster.start(wait_other_notice=True) # kill stream to node3 in the middle of streaming to let it fail if cluster.version() < '4.0': node1.byteman_submit(['./byteman/pre4.0/stream_failure.btm']) else: node1.byteman_submit(['./byteman/4.0/stream_failure.btm']) node1.stress(['write', 'n=1K', 'no-warmup', 'cl=TWO', '-schema', 'replication(factor=2)', '-rate', 'threads=50']) cluster.flush() # start bootstrapping node3 and wait for streaming node3 = new_node(cluster) node3.start(wait_other_notice=False, wait_for_binary_proto=True) # wait for node3 ready to query node3.watch_log_for("Starting listening for CQL clients") mark = node3.mark_log() # check if node3 is still in bootstrap mode retry_till_success(assert_bootstrap_state, tester=self, node=node3, expected_bootstrap_state='IN_PROGRESS', timeout=120) # bring back node1 and invoke nodetool bootstrap to resume bootstrapping node3.nodetool('bootstrap resume') node3.watch_log_for("Resume complete", from_mark=mark) assert_bootstrap_state(self, node3, 'COMPLETED') # cleanup to guarantee each node will only have sstables of its ranges cluster.cleanup() logger.debug("Check data is present") # Let's check stream bootstrap completely transferred data stdout, stderr, _ = node3.stress(['read', 'n=1k', 'no-warmup', '-schema', 'replication(factor=2)', '-rate', 'threads=8']) if stdout is not None: assert "FAILURE" not in stdout
def create_ks(self, replication_factor=1): if self.cassandra_interface == '07': ks_def = self.Cassandra.KsDef(name=self.ks_name, strategy_class='org.apache.cassandra.locator.SimpleStrategy', replication_factor=int(replication_factor), cf_defs=[]) else: ks_def = self.Cassandra.KsDef(name=self.ks_name, strategy_class='org.apache.cassandra.locator.SimpleStrategy', strategy_options={'replication_factor': str(replication_factor)}, cf_defs=[]) retry_till_success(self.client.system_add_keyspace, ks_def, timeout=30) time.sleep(0.5) retry_till_success(self.wait_for_agreement, timeout=10) time.sleep(0.5) self.use_ks() return self
def create_ks(session, name, rf): query = 'CREATE KEYSPACE %s WITH replication={%s}' if isinstance(rf, int): # we assume simpleStrategy query = query % (name, "'class':'SimpleStrategy', 'replication_factor':%d" % rf) else: assert len(rf) >= 0, "At least one datacenter/rf pair is needed" # we assume networkTopologyStrategy options = (', ').join(['\'%s\':%d' % (d, r) for d, r in rf.items()]) query = query % (name, "'class':'NetworkTopologyStrategy', %s" % options) try: retry_till_success(session.execute, query=query, timeout=120, bypassed_exception=cassandra.OperationTimedOut) except cassandra.AlreadyExists: logger.warn('AlreadyExists executing create ks query \'%s\'' % query) session.cluster.control_connection.wait_for_schema_agreement(wait_time=120) #Also validates it was indeed created even though we ignored OperationTimedOut #Might happen some of the time because CircleCI disk IO is unreliable and hangs randomly session.execute('USE {}'.format(name))
def query_columns(self, num_rows=10, consistency_level='QUORUM'): """ Check that the values inserted in insert_columns() are present """ for row_key in ('row_%d' % i for i in range(num_rows)): cpath = self.Cassandra.ColumnPath(column_family=self.cf_name, column='col_0') cosc = retry_till_success(self.client.get, key=row_key, column_path=cpath, consistency_level=self._translate_cl(consistency_level), timeout=30) col = cosc.column value = col.value assert value == 'val_0' return self
def use_ks(self): retry_till_success(self.client.set_keyspace, self.ks_name, timeout=30) return self