def starter_prepare_env_impl(self): opts = [] if self.cfg.ssl and not self.cfg.use_auto_certs: self.create_tls_ca_cert() tls_keyfile = self.cert_dir / Path("single") / "tls.keyfile" self.cert_op([ "tls", "keyfile", "--cacert=" + str(self.certificate_auth["cert"]), "--cakey=" + str(self.certificate_auth["key"]), "--keyfile=" + str(tls_keyfile), "--host=" + self.cfg.publicip, "--host=localhost", ]) opts.append(f"--ssl.keyfile={tls_keyfile}") self.starter_instance = StarterManager( self.basecfg, self.basedir, "single", mode="single", port=1234, expect_instances=[InstanceType.SINGLE], jwt_str="single", moreopts=opts, ) self.starter_instance.is_leader = True
def starter_prepare_env_impl(self): self.starter_instances.append( StarterManager(self.basecfg, self.basedir, 'node1', mode='activefailover', port=9528, expect_instances=[ InstanceType.agent, InstanceType.resilientsingle ], jwtStr="afo", moreopts=[])) self.starter_instances.append( StarterManager(self.basecfg, self.basedir, 'node2', mode='activefailover', port=9628, expect_instances=[ InstanceType.agent, InstanceType.resilientsingle ], jwtStr="afo", moreopts=['--starter.join', '127.0.0.1:9528'])) self.starter_instances.append( StarterManager(self.basecfg, self.basedir, 'node3', mode='activefailover', port=9728, expect_instances=[ InstanceType.agent, InstanceType.resilientsingle ], jwtStr="afo", moreopts=['--starter.join', '127.0.0.1:9528']))
def add_starter(val, port): val["instance"] = StarterManager( self.cfg, val["dir"], val["instance_dir"], port=port, mode='cluster', expect_instances=[ InstanceType.agent, InstanceType.agent, InstanceType.agent, InstanceType.coordinator, InstanceType.coordinator, InstanceType.coordinator, InstanceType.dbserver, InstanceType.dbserver, InstanceType.dbserver, InstanceType.syncmaster, InstanceType.syncmaster, InstanceType.syncmaster, InstanceType.syncworker, InstanceType.syncworker, InstanceType.syncworker ], moreopts=[ '--starter.sync', '--starter.local', '--auth.jwt-secret=' + str(val["JWTSecret"]), '--sync.server.keyfile=' + str(val["tlsKeyfile"]), '--sync.server.client-cafile=' + str(client_cert), '--sync.master.jwt-secret=' + str(val["SyncSecret"]), '--starter.address=' + self.cfg.publicip ]) if port is None: val["instance"].is_leader = True
def jam_attempt_impl(self): logging.info("stopping instance 2") self.starter_instances[2].terminate_instance() self.set_frontend_instances() prompt_user(self.basecfg, "instance stopped") if self.selenium: self.selenium.jam_step_1(self.new_cfg if self.new_cfg else self.cfg) # respawn instance, and get its state fixed self.starter_instances[2].respawn_instance() self.set_frontend_instances() while not self.starter_instances[2].is_instance_up(): progress('.') time.sleep(1) print() self.starter_instances[2].detect_instances() self.starter_instances[2].detect_instance_pids() self.starter_instances[2].detect_instance_pids_still_alive() self.set_frontend_instances() logging.info('jamming: Starting instance without jwt') dead_instance = StarterManager( self.basecfg, Path('CLUSTER'), 'nodeX', mode='cluster', jwtStr=None, expect_instances=[ InstanceType.agent, InstanceType.coordinator, InstanceType.dbserver, ], moreopts=['--starter.join', '127.0.0.1:9528']) dead_instance.run_starter() i = 0 while True: logging.info(". %d", i) if not dead_instance.is_instance_running(): break if i > 40: logging.info('Giving up wating for the starter to exit') raise Exception("non-jwt-ed starter won't exit") i += 1 time.sleep(10) logging.info(str(dead_instance.instance.wait(timeout=320))) logging.info('dead instance is dead?') prompt_user(self.basecfg, "cluster should be up") if self.selenium: self.selenium.jam_step_2(self.new_cfg if self.new_cfg else self.cfg)
def start_single_server(self): """start a single server setup""" # pylint: disable=attribute-defined-outside-init self.starter = StarterManager( basecfg=self.installer.cfg, install_prefix=Path(self.short_name), instance_prefix="single", expect_instances=[InstanceType.SINGLE], mode="single", jwt_str="single", ) self.starter.run_starter() self.starter.detect_instances() self.starter.detect_instance_pids() self.starter.set_passvoid(self.passvoid) self.instance = self.starter.instance
def _add_starter(val, port, moreopts=[]): # fmt: off opts = [ '--all.log.level=backup=trace', '--all.log.level=requests=debug', '--args.syncmasters.log.level=debug', '--args.syncworkers.log.level=debug', '--starter.sync', '--starter.local', '--auth.jwt-secret=' + str(val["JWTSecret"]), '--sync.server.keyfile=' + str(val["tlsKeyfile"]), '--sync.server.client-cafile=' + str(client_cert), '--sync.master.jwt-secret=' + str(val["SyncSecret"]), '--starter.address=' + self.cfg.publicip ] + moreopts # fmt: on if self.cfg.ssl and not self.cfg.use_auto_certs: opts.append("--ssl.keyfile=" + str(val["tlsKeyfile"])) val["instance"] = StarterManager( self.cfg, val["dir"], val["instance_dir"], port=port, mode="cluster", expect_instances=[ InstanceType.AGENT, InstanceType.AGENT, InstanceType.AGENT, InstanceType.COORDINATOR, InstanceType.COORDINATOR, InstanceType.COORDINATOR, InstanceType.DBSERVER, InstanceType.DBSERVER, InstanceType.DBSERVER, InstanceType.SYNCMASTER, InstanceType.SYNCMASTER, InstanceType.SYNCMASTER, InstanceType.SYNCWORKER, InstanceType.SYNCWORKER, InstanceType.SYNCWORKER, ], moreopts=opts, ) val["instance"].set_jwt_file(val["JWTSecret"]) if port == 7528: val["instance"].is_leader = True
def starter_prepare_env_impl(self): self.create_test_collection = (""" db._create("testCollection", { numberOfShards: 6, replicationFactor: 2}); db.testCollection.save({test: "document"}) """, "create test collection") self.starter_instances.append( StarterManager(self.basecfg, self.basedir, 'node1', mode='cluster', jwtStr=self.jwtdatastr, port=9528, expect_instances=[ InstanceType.agent, InstanceType.coordinator, InstanceType.dbserver, ], moreopts=[])) self.starter_instances.append( StarterManager(self.basecfg, self.basedir, 'node2', mode='cluster', jwtStr=self.jwtdatastr, port=9628, expect_instances=[ InstanceType.agent, InstanceType.coordinator, InstanceType.dbserver, ], moreopts=['--starter.join', '127.0.0.1:9528'])) self.starter_instances.append( StarterManager(self.basecfg, self.basedir, 'node3', mode='cluster', jwtStr=self.jwtdatastr, port=9728, expect_instances=[ InstanceType.agent, InstanceType.coordinator, InstanceType.dbserver, ], moreopts=['--starter.join', '127.0.0.1:9528'])) for instance in self.starter_instances: instance.is_leader = True
def starter_prepare_env_impl(self): self.leader_starter_instance = StarterManager( self.cfg, self.basedir, 'leader', mode='single', port=1234, expect_instances=[InstanceType.single], jwtStr="leader", moreopts=[]) self.leader_starter_instance.is_leader = True self.follower_starter_instance = StarterManager( self.cfg, self.basedir, 'follower', mode='single', port=2345, expect_instances=[InstanceType.single], jwtStr="follower", moreopts=[])
def add_starter(name, port, opts): self.starter_instances.append( StarterManager( self.basecfg, self.basedir, name, mode="activefailover", port=port, expect_instances=[ InstanceType.AGENT, InstanceType.RESILIENT_SINGLE, ], jwt_str="afo", moreopts=opts, ))
def add_starter(name, port, opts): self.starter_instances.append( StarterManager( self.basecfg, self.basedir, name, mode="cluster", jwt_str=self.jwtdatastr, port=port, expect_instances=[ InstanceType.AGENT, InstanceType.COORDINATOR, InstanceType.DBSERVER, ], moreopts=opts, ))
class LeaderFollower(Runner): """this runs a leader / Follower setup with synchronisation""" # pylint: disable=too-many-arguments disable=too-many-instance-attributes def __init__( self, runner_type, abort_on_error, installer_set, selenium, selenium_driver_args, testrun_name: str, ssl: bool, use_auto_certs: bool, ): super().__init__( runner_type, abort_on_error, installer_set, RunnerProperties("LeaderFollower", 400, 500, False, ssl, use_auto_certs), selenium, selenium_driver_args, testrun_name, ) self.leader_starter_instance = None self.follower_starter_instance = None self.success = False self.checks = { "beforeReplJS": ( "saving document before", """ db._create("testCollectionBefore"); db.testCollectionBefore.save({"hello": "world"}) """, ), "afterReplJS": ( "saving document after the replication", """ db._create("testCollectionAfter"); db.testCollectionAfter.save({"hello": "world"}) """, ), "checkReplJS": ( "checking documents", """ if (!db.testCollectionBefore) { throw new Error("before collection does not exist"); } if (!db.testCollectionAfter) { throw new Error("after collection does not exist - replication failed"); } if (!(db.testCollectionBefore.toArray()[0]["hello"] === "world")) { throw new Error("before not yet there?"); } if (!(db.testCollectionAfter.toArray()[0]["hello"] === "world")) { throw new Error("after not yet there?"); } """, ), } def starter_prepare_env_impl(self): leader_opts = [] follower_opts = [] if self.cfg.ssl and not self.cfg.use_auto_certs: self.create_tls_ca_cert() leader_tls_keyfile = self.cert_dir / Path("leader") / "tls.keyfile" follower_tls_keyfile = self.cert_dir / Path( "follower") / "tls.keyfile" self.cert_op([ "tls", "keyfile", "--cacert=" + str(self.certificate_auth["cert"]), "--cakey=" + str(self.certificate_auth["key"]), "--keyfile=" + str(leader_tls_keyfile), "--host=" + self.cfg.publicip, "--host=localhost", ]) self.cert_op([ "tls", "keyfile", "--cacert=" + str(self.certificate_auth["cert"]), "--cakey=" + str(self.certificate_auth["key"]), "--keyfile=" + str(follower_tls_keyfile), "--host=" + self.cfg.publicip, "--host=localhost", ]) leader_opts.append(f"--ssl.keyfile={leader_tls_keyfile}") follower_opts.append(f"--ssl.keyfile={follower_tls_keyfile}") self.leader_starter_instance = StarterManager( self.basecfg, self.basedir, "leader", mode="single", port=1234, expect_instances=[InstanceType.SINGLE], jwt_str="leader", moreopts=leader_opts, ) self.leader_starter_instance.is_leader = True self.follower_starter_instance = StarterManager( self.basecfg, self.basedir, "follower", mode="single", port=2345, expect_instances=[InstanceType.SINGLE], jwt_str="follower", moreopts=follower_opts, ) def starter_run_impl(self): self.leader_starter_instance.run_starter() self.follower_starter_instance.run_starter() self.leader_starter_instance.detect_instances() self.follower_starter_instance.detect_instances() self.leader_starter_instance.detect_instance_pids() self.follower_starter_instance.detect_instance_pids() self.passvoid = "leader" self.leader_starter_instance.set_passvoid(self.passvoid) # the replication will overwrite this passvoid anyways: self.follower_starter_instance.set_passvoid(self.passvoid) self.starter_instances = [ self.leader_starter_instance, self.follower_starter_instance, ] def finish_setup_impl(self): # finish setup by starting the replications self.set_frontend_instances() self.checks["startReplJS"] = ( "launching replication", """ print( require("@arangodb/replication").setupReplicationGlobal({ endpoint: "%s://127.0.0.1:%s", username: "******", password: "******", verbose: false, includeSystem: true, incremental: true, autoResync: true })); print("replication started") process.exit(0); """ % ( self.get_protocol(), str(self.leader_starter_instance.get_frontend_port()), self.leader_starter_instance.get_passvoid(), ), ) lh.subsubsection("prepare leader follower replication") arangosh_script = self.checks["beforeReplJS"] logging.info( str(self.leader_starter_instance.execute_frontend( arangosh_script))) lh.subsubsection("start leader follwer replication") arangosh_script = self.checks["startReplJS"] retval = self.follower_starter_instance.execute_frontend( arangosh_script) if not retval: raise Exception("Failed to start the replication using: %s %s" % (retval, str(self.checks["startReplJS"]))) logging.info("Replication started successfully") logging.info("save document") arangosh_script = self.checks["afterReplJS"] logging.info( str(self.leader_starter_instance.execute_frontend( arangosh_script))) self.makedata_instances.append(self.leader_starter_instance) @step def test_setup_impl(self): logging.info("testing the leader/follower setup") tries = 30 if not self.follower_starter_instance.execute_frontend( self.checks["checkReplJS"]): while tries: if self.follower_starter_instance.execute_frontend( self.checks["checkReplJS"]): break progress(".") time.sleep(1) tries -= 1 if not tries: if not self.follower_starter_instance.execute_frontend( self.checks["checkReplJS"]): raise Exception("replication didn't make it in 30s!") lh.subsection("leader/follower - check test data", "-") if self.selenium: self.selenium.test_after_install() # assert that data has been replicated self.follower_starter_instance.arangosh.read_only = True self.follower_starter_instance.supports_foxx_tests = False logging.info("Leader follower testing makedata on follower") self.makedata_instances.append(self.follower_starter_instance) self.make_data() if self.selenium: self.selenium.test_setup() logging.info("Leader follower setup successfully finished!") @step def upgrade_arangod_version_impl(self): """rolling upgrade this installation""" for node in [ self.leader_starter_instance, self.follower_starter_instance ]: node.replace_binary_for_upgrade(self.new_cfg) for node in [ self.leader_starter_instance, self.follower_starter_instance ]: node.command_upgrade() node.wait_for_upgrade() node.wait_for_upgrade_done_in_log() for node in [ self.leader_starter_instance, self.follower_starter_instance ]: node.detect_instances() node.wait_for_version_reply() if self.selenium: self.selenium.test_after_install() @step def upgrade_arangod_version_manual_impl(self): """manual upgrade this installation""" self.progress(True, "step 1 - shut down instances") instances = [ self.leader_starter_instance, self.follower_starter_instance ] for node in instances: node.replace_binary_setup_for_upgrade(self.new_cfg) node.terminate_instance(True) self.progress( True, "step 2 - launch instances with the upgrade options set") for node in instances: print("launch") node.manually_launch_instances( [InstanceType.SINGLE], [ "--database.auto-upgrade", "true", "--javascript.copy-installation", "true", ], ) self.progress(True, "step 3 - launch instances again") for node in instances: node.respawn_instance() self.progress(True, "step 4 - detect system state") for node in instances: node.detect_instances() node.wait_for_version_reply() if self.selenium: self.selenium.test_after_install() @step def jam_attempt_impl(self): """run the replication fuzzing test""" logging.info("running the replication fuzzing test") # add instace where makedata will be run on self.tcp_ping_all_nodes() ret = self.leader_starter_instance.arangosh.run_in_arangosh( (self.cfg.test_data_dir / Path("tests/js/server/replication/fuzz/replication-fuzz-global.js" )), [], [ self.follower_starter_instance.get_frontend().get_public_url( "root:%s@" % self.passvoid) ], ) if not ret[0]: if not self.cfg.verbose: print(ret[1]) raise Exception("replication fuzzing test failed") prompt_user(self.basecfg, "please test the installation.") if self.selenium: self.selenium.test_jam_attempt() @step def shutdown_impl(self): self.leader_starter_instance.terminate_instance() self.follower_starter_instance.terminate_instance() pslist = get_all_processes(False) if len(pslist) > 0: raise Exception("Not all processes terminated! [%s]" % str(pslist)) logging.info("test ended") def before_backup_impl(self): """nothing to see here""" def after_backup_impl(self): """nothing to see here""" def set_selenium_instances(self): """set instances in selenium runner""" self.selenium.set_instances( self.cfg, self.leader_starter_instance.arango_importer, self.leader_starter_instance.arango_restore, self.leader_starter_instance.all_instances[0], )
def starter_prepare_env_impl(self): mem = psutil.virtual_memory() os.environ['ARANGODB_OVERRIDE_DETECTED_TOTAL_MEMORY'] = str( int((mem.total * 0.8) / 9)) self.basecfg.index = 0 # pylint: disable=C0415 if self.remote: from arangodb.starter.manager import (StarterNonManager as StarterManager) else: from arangodb.starter.manager import StarterManager self.starter_instances.append( StarterManager( self.basecfg, self.basedir, 'node1', mode='cluster', jwtStr=self.jwtdatastr, port=9528, expect_instances=[ InstanceType.agent, InstanceType.coordinator, InstanceType.dbserver, ], moreopts=[ # '--agents.agency.election-timeout-min=5', # '--agents.agency.election-timeout-max=10', ])) self.starter_instances.append( StarterManager( self.basecfg, self.basedir, 'node2', mode='cluster', jwtStr=self.jwtdatastr, port=9628, expect_instances=[ InstanceType.agent, InstanceType.coordinator, InstanceType.dbserver, ], moreopts=[ '--starter.join', '127.0.0.1:9528', # '--agents.agency.election-timeout-min=5', # '--agents.agency.election-timeout-max=10', ])) self.starter_instances.append( StarterManager( self.basecfg, self.basedir, 'node3', mode='cluster', jwtStr=self.jwtdatastr, port=9728, expect_instances=[ InstanceType.agent, InstanceType.coordinator, InstanceType.dbserver, ], moreopts=[ '--starter.join', '127.0.0.1:9528', # '--agents.agency.election-timeout-min=5', # '--agents.agency.election-timeout-max=10', ])) for instance in self.starter_instances: instance.is_leader = True
def test_debug_symbols_attach_to_process_windows(self): """Debug arangod executable by attaching debugger to a running process (Windows)""" starter = StarterManager( basecfg=self.installer.cfg, install_prefix=Path(DebuggerTestSuite.STARTER_DIR), instance_prefix="single", expect_instances=[InstanceType.SINGLE], mode="single", jwt_str="single", ) try: with step("Start a single server deployment"): starter.run_starter() starter.detect_instances() starter.detect_instance_pids() starter.set_passvoid("") pid = starter.all_instances[0].pid pdb_dir = str(self.installer.cfg.debug_install_prefix) with step( "Check that stack trace with function names and line numbers can be acquired from cdb" ): cmd = " ".join([ "cdb", "-pv", "-p", str(pid), "-y", pdb_dir, "-lines", "-n" ]) attach(cmd, "CDB command", attachment_type=AttachmentType.TEXT) cdb = wexpect.spawn(cmd) cdb.expect(DebuggerTestSuite.CDB_PROMPT, timeout=300) cdb.sendline("k") cdb.expect(DebuggerTestSuite.CDB_PROMPT, timeout=300) stack = cdb.before cdb.sendline("q") attach(stack, "Stacktrace from cdb output", attachment_type=AttachmentType.TEXT) assert "arangod!main" in stack, "Stack must contain real function names." assert "arangod.cpp" in stack, "Stack must contain real source file names." finally: starter.terminate_instance() kill_all_processes()
def starter_prepare_env_impl(self): mem = psutil.virtual_memory() os.environ["ARANGODB_OVERRIDE_DETECTED_TOTAL_MEMORY"] = str( int((mem.total * 0.8) / 9)) self.basecfg.index = 0 # pylint: disable=import-outside-toplevel if self.remote: from arangodb.starter.manager import StarterNonManager as StarterManager else: from arangodb.starter.manager import StarterManager node1_opts = [] node2_opts = ["--starter.join", "127.0.0.1:9528"] node3_opts = ["--starter.join", "127.0.0.1:9528"] if self.cfg.ssl and not self.cfg.use_auto_certs: self.create_tls_ca_cert() node1_tls_keyfile = self.cert_dir / Path("node1") / "tls.keyfile" node2_tls_keyfile = self.cert_dir / Path("node2") / "tls.keyfile" node3_tls_keyfile = self.cert_dir / Path("node3") / "tls.keyfile" self.cert_op([ "tls", "keyfile", "--cacert=" + str(self.certificate_auth["cert"]), "--cakey=" + str(self.certificate_auth["key"]), "--keyfile=" + str(node1_tls_keyfile), "--host=" + self.cfg.publicip, "--host=localhost", ]) self.cert_op([ "tls", "keyfile", "--cacert=" + str(self.certificate_auth["cert"]), "--cakey=" + str(self.certificate_auth["key"]), "--keyfile=" + str(node2_tls_keyfile), "--host=" + self.cfg.publicip, "--host=localhost", ]) self.cert_op([ "tls", "keyfile", "--cacert=" + str(self.certificate_auth["cert"]), "--cakey=" + str(self.certificate_auth["key"]), "--keyfile=" + str(node3_tls_keyfile), "--host=" + self.cfg.publicip, "--host=localhost", ]) node1_opts.append(f"--ssl.keyfile={node1_tls_keyfile}") node2_opts.append(f"--ssl.keyfile={node2_tls_keyfile}") node3_opts.append(f"--ssl.keyfile={node2_tls_keyfile}") self.starter_instances.append( StarterManager( self.basecfg, self.basedir, "node1", mode="cluster", jwt_str=self.jwtdatastr, port=9528, expect_instances=[ InstanceType.AGENT, InstanceType.COORDINATOR, InstanceType.DBSERVER, ], moreopts= node1_opts # += ['--agents.agency.election-timeout-min=5', # '--agents.agency.election-timeout-max=10',] )) self.starter_instances.append( StarterManager( self.basecfg, self.basedir, "node2", mode="cluster", jwt_str=self.jwtdatastr, port=9628, expect_instances=[ InstanceType.AGENT, InstanceType.COORDINATOR, InstanceType.DBSERVER, ], moreopts= node2_opts # += ['--agents.agency.election-timeout-min=5', # '--agents.agency.election-timeout-max=10',] )) self.starter_instances.append( StarterManager( self.basecfg, self.basedir, "node3", mode="cluster", jwt_str=self.jwtdatastr, port=9728, expect_instances=[ InstanceType.AGENT, InstanceType.COORDINATOR, InstanceType.DBSERVER, ], moreopts= node3_opts # += ['--agents.agency.election-timeout-min=5', # '--agents.agency.election-timeout-max=10',] )) for instance in self.starter_instances: instance.is_leader = True
class Single(Runner): """this runs a single server setup""" # pylint: disable=too-many-arguments disable=too-many-instance-attributes def __init__( self, runner_type, abort_on_error, installer_set, selenium, selenium_driver_args, testrun_name: str, ssl: bool, use_auto_certs: bool, ): super().__init__( runner_type, abort_on_error, installer_set, RunnerProperties("Single", 400, 500, True, ssl, use_auto_certs), selenium, selenium_driver_args, testrun_name, ) self.starter_instance = None self.backup_instance_count = 1 self.success = False def starter_prepare_env_impl(self): opts = [] if self.cfg.ssl and not self.cfg.use_auto_certs: self.create_tls_ca_cert() tls_keyfile = self.cert_dir / Path("single") / "tls.keyfile" self.cert_op([ "tls", "keyfile", "--cacert=" + str(self.certificate_auth["cert"]), "--cakey=" + str(self.certificate_auth["key"]), "--keyfile=" + str(tls_keyfile), "--host=" + self.cfg.publicip, "--host=localhost", ]) opts.append(f"--ssl.keyfile={tls_keyfile}") self.starter_instance = StarterManager( self.basecfg, self.basedir, "single", mode="single", port=1234, expect_instances=[InstanceType.SINGLE], jwt_str="single", moreopts=opts, ) self.starter_instance.is_leader = True def starter_run_impl(self): self.starter_instance.run_starter() self.starter_instance.detect_instances() self.starter_instance.detect_instance_pids() self.passvoid = "single" self.starter_instance.set_passvoid(self.passvoid) self.starter_instances = [ self.starter_instance, ] def finish_setup_impl(self): # finish setup by starting the replications self.set_frontend_instances() self.makedata_instances.append(self.starter_instance) @step def test_setup_impl(self): logging.info("testing the single server setup") tries = 30 lh.subsection("single server - check test data", "-") if self.selenium: self.selenium.test_after_install() self.make_data() if self.selenium: self.selenium.test_setup() logging.info("Single setup successfully finished!") @step def upgrade_arangod_version_impl(self): """rolling upgrade this installation""" self.starter_instance.replace_binary_for_upgrade(self.new_cfg) self.starter_instance.command_upgrade() self.starter_instance.wait_for_upgrade() self.starter_instance.wait_for_upgrade_done_in_log() self.starter_instance.detect_instances() self.starter_instance.wait_for_version_reply() if self.selenium: self.selenium.test_after_install() @step def upgrade_arangod_version_manual_impl(self): """manual upgrade this installation""" self.progress(True, "step 1 - shut down instances") instances = [self.starter_instance] self.starter_instance.replace_binary_setup_for_upgrade(self.new_cfg) self.starter_instance.terminate_instance(True) self.progress( True, "step 2 - launch instances with the upgrade options set") print("launch") self.starter_instance.manually_launch_instances( [InstanceType.SINGLE], [ "--database.auto-upgrade", "true", "--javascript.copy-installation", "true", ], ) self.progress(True, "step 3 - launch instances again") self.starter_instance.respawn_instance() self.progress(True, "step 4 - detect system state") self.starter_instance.detect_instances() self.starter_instance.wait_for_version_reply() if self.selenium: self.selenium.test_after_install() @step def jam_attempt_impl(self): """run the replication fuzzing test""" prompt_user(self.basecfg, "please test the installation.") if self.selenium: self.selenium.test_jam_attempt() @step def shutdown_impl(self): self.starter_instance.terminate_instance() pslist = get_all_processes(False) if len(pslist) > 0: raise Exception("Not all processes terminated! [%s]" % str(pslist)) logging.info("test ended") def before_backup_impl(self): """nothing to see here""" def after_backup_impl(self): """nothing to see here""" def set_selenium_instances(self): """set instances in selenium runner""" self.selenium.set_instances( self.cfg, sself.starter_instance.arango_importer, self.starter_instance.arango_restore, self.starter_instance.all_instances[0], )
def create_arangod_dump(installer, starter_dir: str, dump_file_dir: str): """create arangod memory dump file""" starter = StarterManager( basecfg=installer.cfg, install_prefix=Path(starter_dir), instance_prefix="single", expect_instances=[InstanceType.SINGLE], mode="single", jwt_str="single", ) dump_filename = None try: with step("Start a single server deployment"): starter.run_starter() starter.detect_instances() starter.detect_instance_pids() starter.set_passvoid("") pid = starter.all_instances[0].pid with step("Create a dump of arangod process"): cmd = ["procdump", "-ma", str(pid), dump_file_dir] lh.log_cmd(cmd) with psutil.Popen(cmd, bufsize=-1, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as proc: (procdump_out, procdump_err) = proc.communicate() procdump_str = str(procdump_out, "UTF-8") attach(procdump_str, "procdump sdtout") attach(str(procdump_err), "procdump stderr") success_string = "Dump 1 complete" filename_regex = re.compile( r"^(\[\d{2}:\d{2}:\d{2}\] Dump 1 initiated: )(?P<filename>.*)$", re.MULTILINE) match = re.search(filename_regex, procdump_str) if procdump_str.find(success_string) < 0 or not match: raise Exception( "procdump wasn't able to create a dump file: " + procdump_str) dump_filename = match.group("filename") finally: starter.terminate_instance() kill_all_processes() return dump_filename
class LeaderFollower(Runner): """ this runs a leader / Follower setup with synchronisation """ # pylint: disable=R0913 disable=R0902 def __init__(self, runner_type, cfg, old_inst, new_cfg, new_inst, selenium, selenium_driver_args): super().__init__(runner_type, cfg, old_inst, new_cfg, new_inst, 'lf', 400, 500, selenium, selenium_driver_args) self.leader_starter_instance = None self.follower_starter_instance = None self.success = False self.checks = { "beforeReplJS": ("saving document before", """ db._create("testCollectionBefore"); db.testCollectionBefore.save({"hello": "world"}) """), "afterReplJS": ("saving document after the replication", """ db._create("testCollectionAfter"); db.testCollectionAfter.save({"hello": "world"}) """), "checkReplJS": ("checking documents", """ if (!db.testCollectionBefore) { throw new Error("before collection does not exist"); } if (!db.testCollectionAfter) { throw new Error("after collection does not exist - replication failed"); } if (!db.testCollectionBefore.toArray()[0]["hello"] === "world") { throw new Error("before not yet there?"); } if (!db.testCollectionAfter.toArray()[0]["hello"] === "world") { throw new Error("after not yet there?"); } """) } def starter_prepare_env_impl(self): self.leader_starter_instance = StarterManager( self.cfg, self.basedir, 'leader', mode='single', port=1234, expect_instances=[InstanceType.single], jwtStr="leader", moreopts=[]) self.leader_starter_instance.is_leader = True self.follower_starter_instance = StarterManager( self.cfg, self.basedir, 'follower', mode='single', port=2345, expect_instances=[InstanceType.single], jwtStr="follower", moreopts=[]) def starter_run_impl(self): self.leader_starter_instance.run_starter() self.follower_starter_instance.run_starter() self.leader_starter_instance.detect_instances() self.follower_starter_instance.detect_instances() self.leader_starter_instance.detect_instance_pids() self.follower_starter_instance.detect_instance_pids() self.passvoid = 'leader' self.leader_starter_instance.set_passvoid(self.passvoid) # the replication will overwrite this passvoid anyways: self.follower_starter_instance.set_passvoid(self.passvoid) self.starter_instances = [ self.leader_starter_instance, self.follower_starter_instance ] def finish_setup_impl(self): # finish setup by starting the replications self.set_frontend_instances() self.checks['startReplJS'] = ("launching replication", """ print( require("@arangodb/replication").setupReplicationGlobal({ endpoint: "tcp://127.0.0.1:%s", username: "******", password: "******", verbose: false, includeSystem: true, incremental: true, autoResync: true })); print("replication started") process.exit(0); """ % (str(self.leader_starter_instance.get_frontend_port()), self.leader_starter_instance.get_passvoid())) lh.subsubsection("prepare leader follower replication") arangosh_script = self.checks['beforeReplJS'] logging.info( str(self.leader_starter_instance.execute_frontend( arangosh_script))) lh.subsubsection("start leader follwer replication") arangosh_script = self.checks['startReplJS'] retval = self.follower_starter_instance.execute_frontend( arangosh_script) if not retval: raise Exception("Failed to start the replication using: %s %s" % (retval, str(self.checks['startReplJS']))) logging.info("Replication started successfully") logging.info("save document") arangosh_script = self.checks['afterReplJS'] logging.info( str(self.leader_starter_instance.execute_frontend( arangosh_script))) self.makedata_instances.append(self.leader_starter_instance) def test_setup_impl(self): logging.info("testing the leader/follower setup") tries = 30 if not self.follower_starter_instance.execute_frontend( self.checks['checkReplJS']): while tries: if self.follower_starter_instance.execute_frontend( self.checks['checkReplJS'], False): break progress(".") time.sleep(1) tries -= 1 if not tries: if not self.follower_starter_instance.execute_frontend( self.checks['checkReplJS']): raise Exception("replication didn't make it in 30s!") lh.subsection("leader/follower - check test data", "-") if self.selenium: self.selenium.connect_server_new_tab( self.follower_starter_instance.get_frontends(), '_system', self.cfg) self.selenium.check_old(self.new_cfg if self.new_cfg else self.cfg, False) self.selenium.close_tab_again() #assert that data has been replicated self.follower_starter_instance.arangosh.read_only = True self.makedata_instances.append(self.follower_starter_instance) self.make_data() logging.info("Leader follower setup successfully finished!") def supports_backup_impl(self): return False def upgrade_arangod_version_impl(self): """ upgrade this installation """ for node in [ self.leader_starter_instance, self.follower_starter_instance ]: node.replace_binary_for_upgrade(self.new_cfg) for node in [ self.leader_starter_instance, self.follower_starter_instance ]: node.command_upgrade() node.wait_for_upgrade() node.wait_for_upgrade_done_in_log() for node in [ self.leader_starter_instance, self.follower_starter_instance ]: node.detect_instances() node.wait_for_version_reply() if self.selenium: self.selenium.web.refresh() self.selenium.check_old(self.new_cfg, True) self.selenium.connect_server_new_tab( self.follower_starter_instance.get_frontends(), '_system', self.cfg) self.selenium.check_old(self.new_cfg, False) self.selenium.close_tab_again() def jam_attempt_impl(self): """ run the replication fuzzing test """ logging.info("running the replication fuzzing test") # add instace where makedata will be run on self.tcp_ping_all_nodes() ret = self.leader_starter_instance.arangosh.run_in_arangosh(( self.cfg.test_data_dir / Path('tests/js/server/replication/fuzz/replication-fuzz-global.js') ), [], [ self.follower_starter_instance.get_frontend().get_public_url( 'root:%s@' % self.passvoid) ]) if not ret[0]: if not self.cfg.verbose: print(ret[1]) raise Exception("replication fuzzing test failed") prompt_user(self.basecfg, "please test the installation.") if self.selenium: self.selenium.jam_step_1(self.cfg if self.cfg else self.new_cfg) def shutdown_impl(self): self.leader_starter_instance.terminate_instance() self.follower_starter_instance.terminate_instance() pslist = get_all_processes(False) if len(pslist) > 0: raise Exception("Not all processes terminated! [%s]" % str(pslist)) logging.info('test ended') def before_backup_impl(self): pass def after_backup_impl(self): pass
class LicenseManagerSingleServerTestSuite(LicenseManagerBaseTestSuite): """License manager tests: single server""" # pylint: disable=dangerous-default-value def __init__(self, new_version, installer_base_config): super().__init__( new_version, installer_base_config, ) self.short_name = "SingleServer" def get_default_instance_type(self): """get the instance type we should communicate with""" return InstanceType.SINGLE @collect_crash_data def save_data_dir(self): """save data dir and logs in case a test failed""" kill_all_processes() if self.starter.basedir.exists(): archive = shutil.make_archive( f"LicenseManagerSingleServerTestSuite(v. {self.base_cfg.version})", "bztar", self.starter.basedir) attach.file(archive, "test dir archive", "application/x-bzip2", "tar.bz2") else: print("test basedir doesn't exist, won't create report tar") @run_before_suite def start(self): """clean up the system before running license manager tests on a single server setup""" self.cleanup() self.start_single_server() @run_after_suite def teardown_suite(self): """Teardown suite environment: single server""" self.starter.terminate_instance() kill_all_processes() self.cleanup() def get_server_id(self): """read server ID from data directory""" datadir = self.starter.all_instances[0].basedir / "data" server_file_content = json.load(open(datadir / "SERVER")) server_id = server_file_content["serverId"] return server_id # pylint: disable=redefined-builtin def set_license(self, license): """set new license""" datadir = self.starter.all_instances[0].basedir / "data" with open(datadir / ".license", "w") as license_file: license_file.truncate() license_file.write(license) self.starter.terminate_instance() self.starter.respawn_instance() def cleanup(self): """remove all directories created by previous run of this test""" testdir = self.base_cfg.test_data_dir / self.short_name if testdir.exists(): shutil.rmtree(testdir) @step def start_single_server(self): """start a single server setup""" # pylint: disable=attribute-defined-outside-init self.starter = StarterManager( basecfg=self.installer.cfg, install_prefix=Path(self.short_name), instance_prefix="single", expect_instances=[InstanceType.SINGLE], mode="single", jwt_str="single", ) self.starter.run_starter() self.starter.detect_instances() self.starter.detect_instance_pids() self.starter.set_passvoid(self.passvoid) self.instance = self.starter.instance @testcase def clean_install_temp_license(self): """Check that server gets a 60-minute license after installation on a clean system""" self.check_that_license_is_not_expired(50 * 60) @testcase def goto_read_only_mode_when_license_expired(self): """Check that system goes to read-only mode when license is expired""" self.expire_license() self.check_readonly()
def jam_attempt_impl(self): # pylint: disable=too-many-statements # this is simply to slow to be worth wile: # collections = self.get_collection_list() lh.subsubsection("wait for all shards to be in sync") retval = self.starter_instances[0].execute_frontend( self.check_collections_in_sync, True) if not retval: raise Exception("Failed to ensure the cluster is in sync: %s %s" % (retval, str(self.check_collections_in_sync))) print("all in sync.") agency_leader = self.agency_get_leader() terminate_instance = 2 survive_instance = 1 if self.starter_instances[terminate_instance].have_this_instance( agency_leader): print( "Cluster instance 2 has the agency leader; killing 1 instead") terminate_instance = 1 survive_instance = 2 logging.info("stopping instance %d" % terminate_instance) uuid = self.starter_instances[terminate_instance].get_dbservers( )[0].get_uuid() self.starter_instances[terminate_instance].terminate_instance( keep_instances=True) logging.info("relaunching agent!") self.starter_instances[terminate_instance].manually_launch_instances( [InstanceType.AGENT], [], False, False) self.set_frontend_instances() prompt_user(self.basecfg, "instance stopped") if self.selenium: self.selenium.jam_step_1() ret = self.starter_instances[0].arangosh.check_test_data( "Cluster one node missing", True, ["--disabledDbserverUUID", uuid]) if not ret[0]: raise Exception("check data failed " + ret[1]) ret = self.starter_instances[ survive_instance].arangosh.check_test_data( "Cluster one node missing", True, ["--disabledDbserverUUID", uuid]) if not ret[0]: raise Exception("check data failed " + ret[1]) # respawn instance, and get its state fixed self.starter_instances[terminate_instance].respawn_instance() self.set_frontend_instances() counter = 300 while not self.starter_instances[terminate_instance].is_instance_up(): if counter <= 0: raise Exception("Instance did not respawn in 5 minutes!") progress(".") time.sleep(1) counter -= 1 print() self.starter_instances[terminate_instance].detect_instances() self.starter_instances[terminate_instance].detect_instance_pids() self.starter_instances[ terminate_instance].detect_instance_pids_still_alive() self.set_frontend_instances() logging.info("jamming: Starting instance without jwt") moreopts = ["--starter.join", "127.0.0.1:9528"] if self.cfg.ssl and not self.cfg.use_auto_certs: keyfile = self.cert_dir / Path("nodeX") / "tls.keyfile" self.generate_keyfile(keyfile) moreopts.append(f"--ssl.keyfile={keyfile}") dead_instance = StarterManager( self.basecfg, Path("CLUSTER"), "nodeX", mode="cluster", jwt_str=None, expect_instances=[ InstanceType.AGENT, InstanceType.COORDINATOR, InstanceType.DBSERVER, ], moreopts=moreopts, ) dead_instance.run_starter(expect_to_fail=True) i = 0 while True: logging.info(". %d", i) if not dead_instance.is_instance_running(): dead_instance.check_that_starter_log_contains( "Unauthorized. Wrong credentials.") break if i > 40: logging.info("Giving up wating for the starter to exit") raise Exception("non-jwt-ed starter won't exit") i += 1 time.sleep(10) logging.info(str(dead_instance.instance.wait(timeout=320))) logging.info("dead instance is dead?") prompt_user(self.basecfg, "cluster should be up") if self.selenium: self.selenium.jam_step_2()