def jam_attempt_impl(self): logging.info("stopping instance 2") self.starter_instances[2].terminate_instance() self.set_frontend_instances() prompt_user(self.basecfg, "instance stopped") if self.selenium: self.selenium.jam_step_1(self.new_cfg if self.new_cfg else self.cfg) # respawn instance, and get its state fixed self.starter_instances[2].respawn_instance() self.set_frontend_instances() while not self.starter_instances[2].is_instance_up(): progress('.') time.sleep(1) print() self.starter_instances[2].detect_instances() self.starter_instances[2].detect_instance_pids() self.starter_instances[2].detect_instance_pids_still_alive() self.set_frontend_instances() logging.info('jamming: Starting instance without jwt') dead_instance = StarterManager( self.basecfg, Path('CLUSTER'), 'nodeX', mode='cluster', jwtStr=None, expect_instances=[ InstanceType.agent, InstanceType.coordinator, InstanceType.dbserver, ], moreopts=['--starter.join', '127.0.0.1:9528']) dead_instance.run_starter() i = 0 while True: logging.info(". %d", i) if not dead_instance.is_instance_running(): break if i > 40: logging.info('Giving up wating for the starter to exit') raise Exception("non-jwt-ed starter won't exit") i += 1 time.sleep(10) logging.info(str(dead_instance.instance.wait(timeout=320))) logging.info('dead instance is dead?') prompt_user(self.basecfg, "cluster should be up") if self.selenium: self.selenium.jam_step_2(self.new_cfg if self.new_cfg else self.cfg)
def jam_attempt_impl(self): # pylint: disable=too-many-statements # this is simply to slow to be worth wile: # collections = self.get_collection_list() lh.subsubsection("wait for all shards to be in sync") retval = self.starter_instances[0].execute_frontend( self.check_collections_in_sync, True) if not retval: raise Exception("Failed to ensure the cluster is in sync: %s %s" % (retval, str(self.check_collections_in_sync))) print("all in sync.") agency_leader = self.agency_get_leader() terminate_instance = 2 survive_instance = 1 if self.starter_instances[terminate_instance].have_this_instance( agency_leader): print( "Cluster instance 2 has the agency leader; killing 1 instead") terminate_instance = 1 survive_instance = 2 logging.info("stopping instance %d" % terminate_instance) uuid = self.starter_instances[terminate_instance].get_dbservers( )[0].get_uuid() self.starter_instances[terminate_instance].terminate_instance( keep_instances=True) logging.info("relaunching agent!") self.starter_instances[terminate_instance].manually_launch_instances( [InstanceType.AGENT], [], False, False) self.set_frontend_instances() prompt_user(self.basecfg, "instance stopped") if self.selenium: self.selenium.jam_step_1() ret = self.starter_instances[0].arangosh.check_test_data( "Cluster one node missing", True, ["--disabledDbserverUUID", uuid]) if not ret[0]: raise Exception("check data failed " + ret[1]) ret = self.starter_instances[ survive_instance].arangosh.check_test_data( "Cluster one node missing", True, ["--disabledDbserverUUID", uuid]) if not ret[0]: raise Exception("check data failed " + ret[1]) # respawn instance, and get its state fixed self.starter_instances[terminate_instance].respawn_instance() self.set_frontend_instances() counter = 300 while not self.starter_instances[terminate_instance].is_instance_up(): if counter <= 0: raise Exception("Instance did not respawn in 5 minutes!") progress(".") time.sleep(1) counter -= 1 print() self.starter_instances[terminate_instance].detect_instances() self.starter_instances[terminate_instance].detect_instance_pids() self.starter_instances[ terminate_instance].detect_instance_pids_still_alive() self.set_frontend_instances() logging.info("jamming: Starting instance without jwt") moreopts = ["--starter.join", "127.0.0.1:9528"] if self.cfg.ssl and not self.cfg.use_auto_certs: keyfile = self.cert_dir / Path("nodeX") / "tls.keyfile" self.generate_keyfile(keyfile) moreopts.append(f"--ssl.keyfile={keyfile}") dead_instance = StarterManager( self.basecfg, Path("CLUSTER"), "nodeX", mode="cluster", jwt_str=None, expect_instances=[ InstanceType.AGENT, InstanceType.COORDINATOR, InstanceType.DBSERVER, ], moreopts=moreopts, ) dead_instance.run_starter(expect_to_fail=True) i = 0 while True: logging.info(". %d", i) if not dead_instance.is_instance_running(): dead_instance.check_that_starter_log_contains( "Unauthorized. Wrong credentials.") break if i > 40: logging.info("Giving up wating for the starter to exit") raise Exception("non-jwt-ed starter won't exit") i += 1 time.sleep(10) logging.info(str(dead_instance.instance.wait(timeout=320))) logging.info("dead instance is dead?") prompt_user(self.basecfg, "cluster should be up") if self.selenium: self.selenium.jam_step_2()