def clean(self): """ Caution! Empties database directories and commit logs for all nodes in db. :return: """ report( 'Cleaning data and commitlog directories for cluster {%s}' % (self.name), 'warning') cmd = 'sudo service cassandra stop' for ip in self.ips: rpc(ip, cmd, self.username, self.password, self.key) time.sleep(10) cmd_list = [ 'rm -f ~/.__jmxcmd*', 'sudo rm -rf %s/*' % self.data_dir, 'sudo rm -rf %s/*' % self.commitlog_dir, 'sudo service cassandra start', ] for ip in self.ips[:1]: for cmd in cmd_list: rpc(ip, cmd, self.username, self.password, self.key) time.sleep(30) for ip in self.ips[1:]: for cmd in cmd_list: rpc(ip, cmd, self.username, self.password, self.key) time.sleep(30) report('Status cluster {%s} \n %s' % (self.name, self.status()))
def clean(self): """ Caution! Empties database directories and commit logs for all nodes in db. :return: """ report('Cleaning data and commitlog directories for cluster {%s}' % (self.name), 'warning') cmd = 'sudo service cassandra stop' for ip in self.ips: rpc(ip, cmd, self.username, self.password, self.key) time.sleep(10) cmd_list = [ 'rm -f ~/.__jmxcmd*', 'sudo rm -rf %s/*' % self.data_dir, 'sudo rm -rf %s/*' % self.commitlog_dir, 'sudo service cassandra start', ] for ip in self.ips[:1]: for cmd in cmd_list: rpc(ip, cmd, self.username, self.password, self.key) time.sleep(30) for ip in self.ips[1:]: for cmd in cmd_list: rpc(ip, cmd, self.username, self.password, self.key) time.sleep(30) report('Status cluster {%s} \n %s' % (self.name, self.status()))
def mass_worker(): record_count_per_node = int(record_count / len(population_ips)) node_start_record = start_record auth_string = '' if self.db_user: auth_string = '--db_user %s --db_pass %s' % (self.db_user, self.db_pass) for ip in population_ips: report('Setting mass population on cluster {%s} node {%s}.' % (self.name, ip), 'warning') # Clean log first. cmd = 'sudo rm /tmp/mass_population.log' rpc(ip, cmd, self.username, self.password, self.key) cmd = '(python ~/.geppetto/data_population.py ' \ '%s %s %s ' \ 'insert ' \ '-r %s ' \ '-s %s ' \ '-n %s ' \ '-t %s ' \ '--replication %s ' \ ') > /tmp/mass_population.log &' % \ (ip, schema_file, auth_string, record_size, node_start_record, record_count_per_node, mgmt_object, replication) node_start_record += record_count_per_node rpc(ip, cmd, self.username, self.password, self.key, no_tty=True) # No tty so we can run as bg & disconnect. if not async: cmd = 'ps -ef | grep geppetto | grep -v grep | wc -l' cmd2 = 'tail -1 /tmp/mass_population.log' while True: try: report('Populating ...') processes_running = 0 for ip in population_ips: out, err = rpc(ip, cmd, self.username, self.password, self.key, suppress_output=True) out2, err2 = rpc(ip, cmd2, self.username, self.password, self.key, suppress_output=True) report('<%s> %s' % (ip, out2)) try: processes_running += int(out) except Exception as e: report(e, 'critical') raise if processes_running == 0: break except Exception as e: report(e, 'critical') break time.sleep(15)
def query(self, query, no_pause=False, suppress_reporting=False, retries=5): """ Performs a cql query on the database. """ assert(retries >= 0) # Format the query and make sure we have trailing ';' query = query.strip(' ') if not query: return if query[-1] != ';': query += ' ;' cluster = CassandraTestingCluster(self.ips, self.db_user, self.db_pass) if not cluster.connect(): report('Error cannot connect to Cassandra cluster', 'critical') if not no_pause: response = pause_execution_for_input('Error cannot connect to Cassandra cluster.') if response == 'r': result, success = self.query(query) else: return '', False else: return '', False else: # Persistent retry, then prompt use for action if still error. i = 0 wait_times = [0, 5, 15, 60, 60,] result, success = '', False while i <= retries: if not suppress_reporting: report(query) result, success = cluster.runQuery(query) if success or i >= retries: break if not suppress_reporting: report(result, 'warning') report(success, 'warning') retry_time = wait_times[min(i, len(wait_times) - 1)] if not suppress_reporting: report('Retrying in %s seconds' % retry_time) time.sleep(retry_time) i += 1 # If retries did not produce successful query, then prompt user for input if we allow pausing. if not success and not no_pause: response = pause_execution_for_input('Error') if response == 'r': # 'retry'. result, success = self.query(query, retries=0) # Only try once on manual retries. cluster.disconnect() return result, success
def do_update(cluster, target_db, schema_file, record_size, start_record, batch_size, insert_percentage, delay, batch_count, replication_factor=3, suppress_output=False): record_size = int(record_size) start_record = int(start_record) batch_size = int(batch_size) insert_percentage = int(insert_percentage) delay = float(delay) / 1000 batch_count = int(batch_count) nr_batch = 0 random.seed(1) ks_name, cf_name = getKSCFNames(target_db) if ks_name == None or cf_name == None: return createKeyspace(cluster, ks_name, replication_factor=replication_factor) createTable(cluster, ks_name, cf_name, schema_file) ts = TestSchema(cluster, ks_name, cf_name) ts.getSchema() while True: if ts.counter_table: batch = BatchStatement(batch_type = BatchType.COUNTER) else: batch = BatchStatement() stat_str = '' for i in range(batch_size): if start_record <= 0 or random.randrange(100) <= insert_percentage: # insert case record_num = start_record query = ts.getInsertQuerywithRandomData(record_num, record_size) stat_str += 'I(%d) ' % record_num else: record_num = random.randrange(0, start_record) if random.randrange(100) <= 70: # 70% update if not ts.counter_table: query = ts.getUpdateQuery(record_num) else: query = ts.getInsertQuerywithRandomData(record_num, 0) stat_str += 'U(%d) ' % record_num else: # 30% deletion query = ts.getDeleteQuery(record_num) stat_str += 'D(%d) ' % record_num if not suppress_output: report(stat_str) batch.add(query) start_record += 1 #print stat_str cluster.session.execute(batch) nr_batch += 1 if nr_batch == batch_count: if batch_count >= 0: break time.sleep(delay)
def single_random_db_failure(self, wait_time_min=0, run_time_min=10, time_length_of_failure=5, max_failure_repeats=1, randomness_time_injection=90): """ Shuts down a random cassandra node for a given time with some randomness thrown in for timing. """ assert (wait_time_min >= 0) assert (run_time_min >= 0.1) assert (time_length_of_failure >= 0.2) assert (max_failure_repeats > 0) self.cassandra.status() start_time = time.time() time.sleep(60 * wait_time_min) pick = pick_x_different_num(1, 0, len(self.cassandra.ips) - 1)[0] for _ in xrange(max_failure_repeats): time.sleep(random.randint( 0, randomness_time_injection)) # Randomize time that db fails. currently_down = collections.deque() try: # Bring down the db node for some time. currently_down.append(pick) add_test_note('%s' % (self.cassandra.ips[pick])) self.cassandra.db_stop(self.cassandra.ips[pick]) self.cassandra.status() # Let's see the db state. time.sleep(60 * random.randint(time_length_of_failure * 3 / 4, time_length_of_failure)) # Bring db node back up. self.cassandra.db_start(self.cassandra.ips[pick]) currently_down.popleft() time.sleep(20) self.cassandra.status() except (KeyboardInterrupt, SystemExit) as e: # Do some clean up (restore db nodes) and some reporting, then re-raise exception. report('Exit detected ... restoring db state', 'critical') for i in currently_down: self.cassandra.db_start(self.cassandra.ips[pick]) time.sleep(20) self.cassandra.status() # Logs will capture output. global global_vars global_vars['test_status'] = 'Aborted' add_test_note(e) raise e # Exit failure loop if we've reached max time. if (time.time() + wait_time_min * 60 - start_time >= run_time_min * 60): break
def delta_worker(): # Loop every 5 minutes and reinitialize delta. while self.do_delta_population: # Stop previous populations, in the case they are still going. rpc(workload_ip, cmd1, self.username, self.password, self.key) time.sleep(2) # Start new batch of populations. rpc(workload_ip, cmd2, self.username, self.password, self.key, no_tty=True) # No tty so we can run as bg & disconnect. report('{%s} delta population set on node %s.' % (mgmt_object, workload_ip)) time.sleep(60 * LOOP_MIN) # Sleep LOOP_MIN min, allow delta to complete and settle, then cycle again. (A more dependable way)
def _deliver_payload(self): """ Delivers population scripts and other goodies to the cassandra source cluster. Most stored in ~/.geppetto/ """ common_script_path = '%s/common/common.py' % ( common.common.global_vars['geppetto_install_dir']) population_script_path = '%s/db_utils/cassandra_utils/data_population.py' % ( common.common.global_vars['geppetto_install_dir']) schema_folder_path = '%s/db_utils/cassandra_utils/schema' % ( common.common.global_vars['geppetto_install_dir']) for ip in self.ips: report('Updating Geppetto payload on {%s}.' % ip) to_path = '%s@%s:~/.geppetto/' % (self.username, ip) # rpc(ip, 'rm -rf ~/.geppetto', self.username, self.password, self.key, suppress_output=True) rpc(ip, 'mkdir -p ~/.geppetto/common', self.username, self.password, self.key, suppress_output=True) rpc(ip, 'touch ~/.geppetto/common/__init__.py', self.username, self.password, self.key, suppress_output=True) scp(common_script_path, '%s/common/' % to_path, self.password, self.key, suppress_output=True) scp(population_script_path, to_path, self.password, self.key, suppress_output=True) scp(schema_folder_path, to_path, self.password, self.key, is_dir=True, suppress_output=True) self.payload = True return True
def _deliver_payload(self): """ Delivers population scripts and other goodies to the cassandra source cluster. Most stored in ~/.geppetto/ """ common_script_path = '%s/common/common.py' % (common.common.global_vars['geppetto_install_dir']) population_script_path = '%s/db_utils/cassandra_utils/data_population.py' % (common.common.global_vars['geppetto_install_dir']) schema_folder_path = '%s/db_utils/cassandra_utils/schema' % (common.common.global_vars['geppetto_install_dir']) for ip in self.ips: report('Updating Geppetto payload on {%s}.' % ip) to_path = '%s@%s:~/.geppetto/' % (self.username, ip) # rpc(ip, 'rm -rf ~/.geppetto', self.username, self.password, self.key, suppress_output=True) rpc(ip, 'mkdir -p ~/.geppetto/common', self.username, self.password, self.key, suppress_output=True) rpc(ip, 'touch ~/.geppetto/common/__init__.py', self.username, self.password, self.key, suppress_output=True) scp(common_script_path, '%s/common/' % to_path, self.password, self.key, suppress_output=True) scp(population_script_path, to_path, self.password, self.key, suppress_output=True) scp(schema_folder_path, to_path, self.password, self.key, is_dir=True, suppress_output=True) self.payload = True return True
def main(): args = parse_args() # Import the test file. try: test_file_name = args.test_file test_file = test_file_name[:-3].replace('/', '.') mod = __import__(test_file, fromlist=['TestRun']) TestRun = getattr(mod, 'TestRun') except: report('Unable to load TestRun() from file: %s' % args.test_file, 'critical', no_date=True) print(traceback.print_exc()) sys.exit(1) # Import the config file. try: config_file_name = args.config config_file = config_file_name[:-3].replace('/', '.') mod = __import__(config_file, fromlist=['CONFIG_DICT']) config_dict = getattr(mod, 'CONFIG_DICT') except: report("Unable to import the config file: %s" % args.config, 'critical', no_date=True) print(traceback.print_exc()) sys.exit(1) do_welcome() class GeppettoExecutableTest(TestRun): def __init__(self): Geppetto.__init__(self) TestRun.set_init_params(self, config_dict, args, test_file_name, config_file_name) @capture_exception_and_abort def run(self): TestRun.run(self) g = GeppettoExecutableTest() g.run()
def delta_worker(): # Loop every 5 minutes and reinitialize delta. while self.do_delta_population: # Stop previous populations, in the case they are still going. rpc(workload_ip, cmd1, self.username, self.password, self.key) time.sleep(2) # Start new batch of populations. rpc(workload_ip, cmd2, self.username, self.password, self.key, no_tty=True) # No tty so we can run as bg & disconnect. report('{%s} delta population set on node %s.' % (mgmt_object, workload_ip)) time.sleep( 60 * LOOP_MIN ) # Sleep LOOP_MIN min, allow delta to complete and settle, then cycle again. (A more dependable way)
def main(): args = parse_args() if not os.path.exists(args.schema_file): sys.exit(-1) if args.db_user: cluster = CassandraTestingCluster(args.ip_list, db_user=args.db_user, db_pass=args.db_pass) else: cluster = CassandraTestingCluster(args.ip_list) if not cluster.connect(): report('Cannot connect to cassandra cluster.', 'error') sys.exit(-1) try: if args.command == 'insert': do_insert(cluster, args.target_db, args.schema_file, args.record_size, args.start_record, args.record_count, args.uuid4, replication_factor=args.replication) elif args.command == 'update': do_update(cluster, args.target_db, args.schema_file, args.record_size, args.start_record, args.batch_size, args.insert_percentage, args.delay, args.batch_count, replication_factor=args.replication) else: report('Unrecognized command.\n') except Exception as e: report('%s\n' % e) sys.exit() cluster.disconnect()
def insert(self, mgmt_object, schema_file, record_size, start_record, record_count, uuid4=None, suppress_reporting=False, cluster=None): """ Does batch inserts into db from geppetto node. """ if not cluster: cluster = CassandraTestingCluster(self.ips, self.db_user, self.db_pass) if not cluster.connect(): report('ERROR: cannot connect to Cassandra cluster', 'critical') sys.exit(-1) if uuid4: if not suppress_reporting: report('%s do_insert(%s, %s, %s, %s, %s, %s, %s)' % (self.name, 'cluster', mgmt_object, schema_file, record_size, start_record, record_count, uuid4)) do_insert(cluster, mgmt_object, schema_file, record_size, start_record, record_count, uuid4, suppress_output=suppress_reporting) else: if not suppress_reporting: report('%s do_insert(%s, %s, %s, %s, %s, %s)' % (self.name, 'cluster', mgmt_object, schema_file, record_size, start_record, record_count)) do_insert(cluster, mgmt_object, schema_file, record_size, start_record, record_count, suppress_output=suppress_reporting) if not cluster: cluster.disconnect()
def insert(self, mgmt_object, schema_file, record_size, start_record, record_count, uuid4=None, suppress_reporting=False, cluster=None): """ Does batch inserts into db from geppetto node. """ if not cluster: cluster = CassandraTestingCluster(self.ips, self.db_user, self.db_pass) if not cluster.connect(): report('ERROR: cannot connect to Cassandra cluster', 'critical') sys.exit(-1) if uuid4: if not suppress_reporting : report('%s do_insert(%s, %s, %s, %s, %s, %s, %s)' % (self.name, 'cluster', mgmt_object, schema_file, record_size, start_record, record_count, uuid4)) do_insert(cluster, mgmt_object, schema_file, record_size, start_record, record_count, uuid4, suppress_output=suppress_reporting) else: if not suppress_reporting : report('%s do_insert(%s, %s, %s, %s, %s, %s)' % (self.name, 'cluster', mgmt_object, schema_file, record_size, start_record, record_count)) do_insert(cluster, mgmt_object, schema_file, record_size, start_record, record_count, suppress_output=suppress_reporting) if not cluster: cluster.disconnect()
def run(self): report("Hello World!")
def random_node_failures(self, wait_time_min=0, run_time_min=10, max_num_failed=1, max_failure_repeats=1): """ Simulates a node failure via rebooting a node. # TODO: (Aaron) currently assumes that node reboots to working condition. (need mounts in fstab and firewalls down and cass start as service.) """ assert (max_num_failed > 0) report(self.cassandra.status()) start_time = time.time() time.sleep(60 * wait_time_min) for _ in xrange(max_failure_repeats): picks = pick_x_different_num(max_num_failed, 0, len(self.cassandra.ips) - 1) currently_down = collections.deque() try: # First bring down those db's with some randomness thrown in. for i in xrange(len(picks)): currently_down.append(i) self.cassandra.node_reboot( self.cassandra.ips[picks[i]] ) # TODO: (Aaron) Let's do this with a ifdown etc like above. time.sleep(random.randint( 0, 30)) # TODO: (Aaron) Can make this more sophisticated. # Let's stay advised with what's down. report(self.cassandra.status()) # This is for future node failure implementation when we have a way to reboot like wake on lan. # Let them be down for a random period ... but long enough to reboot. time.sleep(random.randint( 60, 60 * 2)) # TODO: (Aaron) Can make this more sophisticated. # Now bring back up, with some randomness thrown in. for i in xrange(len(picks)): self.cassandra.node_restore(self.cassandra.ips[picks[i]]) currently_down.popleft() time.sleep(random.randint( 0, 30)) # TODO: (Aaron) Can make this more sophisticated. time.sleep(60) # Need to let Nodes rejoin cluster properly. # Let's stay advised with what's up again. report(self.cassandra.status()) except (KeyboardInterrupt, SystemExit) as e: # Do some clean up (restore db nodes) and some reporting, then re-raise exception. report('Exit detected ... restoring db state', 'critical') for i in currently_down: self.cassandra.db_start(self.cassandra.ips[picks[i]]) self.cassandra.status() # Logs will capture output. global global_vars global_vars['test_status'] = 'Aborted' add_test_note(e) raise e # Exit failure loop if we've reached max time. if (time.time() + wait_time_min * 60 - start_time >= 60 * run_time_min): break
def random_db_failures(self, wait_time_min=0, run_time_min=10, max_num_failed=1, max_failure_repeats=1): """ Shuts down cassandra nodes for given time, in a random pattern within specifications. """ assert (max_num_failed > 0) assert (run_time_min >= 0.1) assert (max_num_failed > 0) assert (max_failure_repeats > 0) self.cassandra.status() # Logs will capture output. start_time = time.time() time.sleep(60 * wait_time_min) for _ in xrange(max_failure_repeats): picks = pick_x_different_num(max_num_failed, 0, len(self.cassandra.ips) - 1) currently_down = collections.deque() try: # First bring down those db's with some randomness thrown in. for i in xrange(len(picks)): currently_down.append(i) self.cassandra.db_stop(self.cassandra.ips[picks[i]]) time.sleep(random.randint( 0, 60)) # TODO: (Aaron) Can make this more sophisticated. # Let them be down for a random period. time.sleep(random.randint( 0, 60 * 2)) # TODO: (Aaron) Can make this more sophisticated. # Let's stay advised with what's down. self.cassandra.status() # Logs will capture output. # Now bring back up, with some randomness thrown in. for i in xrange(len(picks)): self.cassandra.db_start(self.cassandra.ips[picks[i]]) currently_down.popleft() time.sleep(random.randint( 0, 30)) # TODO: (Aaron) Can make this more sophisticated. time.sleep(20) # Need to let Nodes rejoin cluster properly. # Let's stay advised with what's up again. self.cassandra.status() # Logs will capture output. # Sleep random time before next cycle. time.sleep(0) except (KeyboardInterrupt, SystemExit) as e: # Do some clean up (restore db nodes) and some reporting, then re-raise exception. report('Exit detected ... restoring db state', 'critical') for i in currently_down: self.cassandra.db_start(self.cassandra.ips[picks[i]]) time.sleep(20) self.cassandra.status() # Logs will capture output. global global_vars global_vars['test_status'] = 'Aborted' add_test_note(e) raise e # Exit failure loop if we've reached max time. if (time.time() + wait_time_min * 60 - start_time >= run_time_min * 60): break
def mass_worker(): record_count_per_node = int(record_count / len(population_ips)) node_start_record = start_record auth_string = '' if self.db_user: auth_string = '--db_user %s --db_pass %s' % (self.db_user, self.db_pass) for ip in population_ips: report( 'Setting mass population on cluster {%s} node {%s}.' % (self.name, ip), 'warning') # Clean log first. cmd = 'sudo rm /tmp/mass_population.log' rpc(ip, cmd, self.username, self.password, self.key) cmd = '(python ~/.geppetto/data_population.py ' \ '%s %s %s ' \ 'insert ' \ '-r %s ' \ '-s %s ' \ '-n %s ' \ '-t %s ' \ '--replication %s ' \ ') > /tmp/mass_population.log &' % \ (ip, schema_file, auth_string, record_size, node_start_record, record_count_per_node, mgmt_object, replication) node_start_record += record_count_per_node rpc(ip, cmd, self.username, self.password, self.key, no_tty=True) # No tty so we can run as bg & disconnect. if not async: cmd = 'ps -ef | grep geppetto | grep -v grep | wc -l' cmd2 = 'tail -1 /tmp/mass_population.log' while True: try: report('Populating ...') processes_running = 0 for ip in population_ips: out, err = rpc(ip, cmd, self.username, self.password, self.key, suppress_output=True) out2, err2 = rpc(ip, cmd2, self.username, self.password, self.key, suppress_output=True) report('<%s> %s' % (ip, out2)) try: processes_running += int(out) except Exception as e: report(e, 'critical') raise if processes_running == 0: break except Exception as e: report(e, 'critical') break time.sleep(15)
def single_random_node_failure(self, wait_time_min=0, run_time_min=10, time_length_of_failure=5, max_failure_repeats=1, randomness_time_injection=90): """ Shuts down a random cassandra node for a given time with some randomness thrown in for timing. """ assert (wait_time_min >= 0) assert (run_time_min >= 0.1) assert (time_length_of_failure >= 0.2) assert (max_failure_repeats > 0) assert (randomness_time_injection >= 0) self.cassandra.status() start_time = time.time() time.sleep(60 * wait_time_min) pick = pick_x_different_num(1, 0, len(self.cassandra.ips) - 1)[0] for _ in xrange(max_failure_repeats): time.sleep(random.randint( 0, randomness_time_injection)) # Randomize time that db fails. currently_down = collections.deque() try: # Bring down the db node for some time. currently_down.append(pick) try: note = '' add_test_note(note) rpc( self.cassandra.ips[pick], '(nohup sudo ifdown eth0; sleep %s ; sudo ifup eth0 ; ) > /tmp/datos_failure.log &' % (time_length_of_failure * 60), self.cassandra.username, self.cassandra.password, self.cassandra.key) except: report('Could not connect to node {%s}.' % db.ips[pick], 'warning') self.cassandra.status() # Let's see the db state. time.sleep(60 * time_length_of_failure + 60) # Bring db node back up. self.cassandra.node_restore( self.cassandra.ips[pick] ) # Currently we don't have good way to restore so this does nothing. currently_down.popleft() time.sleep(20) self.cassandra.status() except (KeyboardInterrupt, SystemExit) as e: # Do some clean up (restore db nodes) and some reporting, then re-raise exception. report('Exit detected ... restoring db state', 'critical') for i in currently_down: self.cassandra.node_restore(db.ips[pick]) time.sleep(20) self.cassandra.status() # Logs will capture output. global global_vars global_vars['test_status'] = 'Aborted' add_test_note(e) raise e # Exit failure loop if we've reached max time. if (time.time() + wait_time_min * 60 - start_time >= run_time_min * 60): break
def query(self, query, no_pause=False, suppress_reporting=False, retries=5): """ Performs a cql query on the database. """ assert (retries >= 0) # Format the query and make sure we have trailing ';' query = query.strip(' ') if not query: return if query[-1] != ';': query += ' ;' cluster = CassandraTestingCluster(self.ips, self.db_user, self.db_pass) if not cluster.connect(): report('Error cannot connect to Cassandra cluster', 'critical') if not no_pause: response = pause_execution_for_input( 'Error cannot connect to Cassandra cluster.') if response == 'r': result, success = self.query(query) else: return '', False else: return '', False else: # Persistent retry, then prompt use for action if still error. i = 0 wait_times = [ 0, 5, 15, 60, 60, ] result, success = '', False while i <= retries: if not suppress_reporting: report(query) result, success = cluster.runQuery(query) if success or i >= retries: break if not suppress_reporting: report(result, 'warning') report(success, 'warning') retry_time = wait_times[min(i, len(wait_times) - 1)] if not suppress_reporting: report('Retrying in %s seconds' % retry_time) time.sleep(retry_time) i += 1 # If retries did not produce successful query, then prompt user for input if we allow pausing. if not success and not no_pause: response = pause_execution_for_input('Error') if response == 'r': # 'retry'. result, success = self.query( query, retries=0) # Only try once on manual retries. cluster.disconnect() return result, success
def do_insert(cluster, target_db, schema_file, record_size, start_record, record_count, uuid4=None, replication_factor=3, suppress_output=False): record_size = int(record_size) record_num = int(start_record) record_count = int(record_count) end_record = record_num + record_count inserted_record = 0 #random.seed(0) ks_name, cf_name = getKSCFNames(target_db) if ks_name == None or cf_name == None: return createKeyspace(cluster, ks_name, replication_factor=replication_factor) createTable(cluster, ks_name, cf_name, schema_file) ts = TestSchema(cluster, ks_name, cf_name) ts.getSchema() if ts.counter_table: batch = BatchStatement(batch_type = BatchType.COUNTER) else: batch = BatchStatement() i = 0 while record_num < end_record: if uuid4: query = ts.getInsertQuerywithRandomData(record_num, record_size, uuid4) else: query = ts.getInsertQuerywithRandomData(record_num, record_size) if i == 0 and not suppress_output: report(query) batch.add(query) record_num += 1 inserted_record += 1 if (inserted_record % 100) == 0 or record_num == end_record: msg = '\rInserting %s %8d / %8d (%3d %%)' % (target_db, inserted_record, record_count, inserted_record * 100 / record_count) #sys.stdout.write(msg + '\n') ; sys.stdout.flush() if not suppress_output: report(msg) try: cluster.session.execute(batch) except Exception as e: print("\n**** Detected Exception ****") print(e) print('\n') p = subprocess.Popen('nodetool status', stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell= True) out, err = p.communicate() if "Failed to connect" in err: # This node's cassandra is down print("**** Restarting Cassandra ****") os.system('sudo service cassandra start') print('**** This Node crashed, sleeping for 3 minutes to reduce load. ****\n') time.sleep(60*3) print("") elif 'DN' in out: print('**** Another Node crashed, sleeping for 3 minutes to reduce load. ****\n') time.sleep(60*3) else: print("**** Sleeping for 3 minutes to reduce load. ****\n") time.sleep(60*3) if ts.counter_table: batch = BatchStatement(batch_type = BatchType.COUNTER) else: batch = BatchStatement() i += 1