Esempio n. 1
0
    def starter_prepare_env_impl(self):
        opts = []
        if self.cfg.ssl and not self.cfg.use_auto_certs:
            self.create_tls_ca_cert()
            tls_keyfile = self.cert_dir / Path("single") / "tls.keyfile"
            self.cert_op([
                "tls",
                "keyfile",
                "--cacert=" + str(self.certificate_auth["cert"]),
                "--cakey=" + str(self.certificate_auth["key"]),
                "--keyfile=" + str(tls_keyfile),
                "--host=" + self.cfg.publicip,
                "--host=localhost",
            ])
            opts.append(f"--ssl.keyfile={tls_keyfile}")

        self.starter_instance = StarterManager(
            self.basecfg,
            self.basedir,
            "single",
            mode="single",
            port=1234,
            expect_instances=[InstanceType.SINGLE],
            jwt_str="single",
            moreopts=opts,
        )
        self.starter_instance.is_leader = True
Esempio n. 2
0
 def starter_prepare_env_impl(self):
     self.starter_instances.append(
         StarterManager(self.basecfg,
                        self.basedir, 'node1',
                        mode='activefailover',
                        port=9528,
                        expect_instances=[
                            InstanceType.agent,
                            InstanceType.resilientsingle
                        ],
                        jwtStr="afo",
                        moreopts=[]))
     self.starter_instances.append(
         StarterManager(self.basecfg,
                        self.basedir, 'node2',
                        mode='activefailover',
                        port=9628,
                        expect_instances=[
                            InstanceType.agent,
                            InstanceType.resilientsingle
                        ],
                        jwtStr="afo",
                        moreopts=['--starter.join', '127.0.0.1:9528']))
     self.starter_instances.append(
         StarterManager(self.basecfg,
                        self.basedir, 'node3',
                        mode='activefailover',
                        port=9728,
                        expect_instances=[
                            InstanceType.agent,
                            InstanceType.resilientsingle
                        ],
                        jwtStr="afo",
                        moreopts=['--starter.join', '127.0.0.1:9528']))
 def add_starter(val, port):
     val["instance"] = StarterManager(
         self.cfg,
         val["dir"],
         val["instance_dir"],
         port=port,
         mode='cluster',
         expect_instances=[
             InstanceType.agent, InstanceType.agent, InstanceType.agent,
             InstanceType.coordinator, InstanceType.coordinator,
             InstanceType.coordinator, InstanceType.dbserver,
             InstanceType.dbserver, InstanceType.dbserver,
             InstanceType.syncmaster, InstanceType.syncmaster,
             InstanceType.syncmaster, InstanceType.syncworker,
             InstanceType.syncworker, InstanceType.syncworker
         ],
         moreopts=[
             '--starter.sync', '--starter.local',
             '--auth.jwt-secret=' + str(val["JWTSecret"]),
             '--sync.server.keyfile=' + str(val["tlsKeyfile"]),
             '--sync.server.client-cafile=' + str(client_cert),
             '--sync.master.jwt-secret=' + str(val["SyncSecret"]),
             '--starter.address=' + self.cfg.publicip
         ])
     if port is None:
         val["instance"].is_leader = True
    def jam_attempt_impl(self):
        logging.info("stopping instance 2")
        self.starter_instances[2].terminate_instance()
        self.set_frontend_instances()

        prompt_user(self.basecfg, "instance stopped")
        if self.selenium:
            self.selenium.jam_step_1(self.new_cfg if self.new_cfg else self.cfg)

        # respawn instance, and get its state fixed
        self.starter_instances[2].respawn_instance()
        self.set_frontend_instances()
        while not self.starter_instances[2].is_instance_up():
            progress('.')
            time.sleep(1)
        print()
        self.starter_instances[2].detect_instances()
        self.starter_instances[2].detect_instance_pids()
        self.starter_instances[2].detect_instance_pids_still_alive()
        self.set_frontend_instances()

        logging.info('jamming: Starting instance without jwt')
        dead_instance = StarterManager(
            self.basecfg,
            Path('CLUSTER'), 'nodeX',
            mode='cluster',
            jwtStr=None,
            expect_instances=[
                InstanceType.agent,
                InstanceType.coordinator,
                InstanceType.dbserver,
            ],
            moreopts=['--starter.join', '127.0.0.1:9528'])
        dead_instance.run_starter()

        i = 0
        while True:
            logging.info(". %d", i)
            if not dead_instance.is_instance_running():
                break
            if i > 40:
                logging.info('Giving up wating for the starter to exit')
                raise Exception("non-jwt-ed starter won't exit")
            i += 1
            time.sleep(10)
        logging.info(str(dead_instance.instance.wait(timeout=320)))
        logging.info('dead instance is dead?')

        prompt_user(self.basecfg, "cluster should be up")
        if self.selenium:
            self.selenium.jam_step_2(self.new_cfg if self.new_cfg else self.cfg)
 def start_single_server(self):
     """start a single server setup"""
     # pylint: disable=attribute-defined-outside-init
     self.starter = StarterManager(
         basecfg=self.installer.cfg,
         install_prefix=Path(self.short_name),
         instance_prefix="single",
         expect_instances=[InstanceType.SINGLE],
         mode="single",
         jwt_str="single",
     )
     self.starter.run_starter()
     self.starter.detect_instances()
     self.starter.detect_instance_pids()
     self.starter.set_passvoid(self.passvoid)
     self.instance = self.starter.instance
Esempio n. 6
0
 def _add_starter(val, port, moreopts=[]):
     # fmt: off
     opts = [
         '--all.log.level=backup=trace',
         '--all.log.level=requests=debug',
         '--args.syncmasters.log.level=debug',
         '--args.syncworkers.log.level=debug', '--starter.sync',
         '--starter.local',
         '--auth.jwt-secret=' + str(val["JWTSecret"]),
         '--sync.server.keyfile=' + str(val["tlsKeyfile"]),
         '--sync.server.client-cafile=' + str(client_cert),
         '--sync.master.jwt-secret=' + str(val["SyncSecret"]),
         '--starter.address=' + self.cfg.publicip
     ] + moreopts
     # fmt: on
     if self.cfg.ssl and not self.cfg.use_auto_certs:
         opts.append("--ssl.keyfile=" + str(val["tlsKeyfile"]))
     val["instance"] = StarterManager(
         self.cfg,
         val["dir"],
         val["instance_dir"],
         port=port,
         mode="cluster",
         expect_instances=[
             InstanceType.AGENT,
             InstanceType.AGENT,
             InstanceType.AGENT,
             InstanceType.COORDINATOR,
             InstanceType.COORDINATOR,
             InstanceType.COORDINATOR,
             InstanceType.DBSERVER,
             InstanceType.DBSERVER,
             InstanceType.DBSERVER,
             InstanceType.SYNCMASTER,
             InstanceType.SYNCMASTER,
             InstanceType.SYNCMASTER,
             InstanceType.SYNCWORKER,
             InstanceType.SYNCWORKER,
             InstanceType.SYNCWORKER,
         ],
         moreopts=opts,
     )
     val["instance"].set_jwt_file(val["JWTSecret"])
     if port == 7528:
         val["instance"].is_leader = True
    def starter_prepare_env_impl(self):
        self.create_test_collection = ("""
db._create("testCollection",  { numberOfShards: 6, replicationFactor: 2});
db.testCollection.save({test: "document"})
""", "create test collection")

        self.starter_instances.append(
            StarterManager(self.basecfg,
                           self.basedir, 'node1',
                           mode='cluster',
                           jwtStr=self.jwtdatastr,
                           port=9528,
                           expect_instances=[
                               InstanceType.agent,
                               InstanceType.coordinator,
                               InstanceType.dbserver,
                           ],
                           moreopts=[]))
        self.starter_instances.append(
            StarterManager(self.basecfg,
                           self.basedir, 'node2',
                           mode='cluster',
                           jwtStr=self.jwtdatastr,
                           port=9628,
                           expect_instances=[
                               InstanceType.agent,
                               InstanceType.coordinator,
                               InstanceType.dbserver,
                           ],
                           moreopts=['--starter.join', '127.0.0.1:9528']))
        self.starter_instances.append(
            StarterManager(self.basecfg,
                           self.basedir, 'node3',
                           mode='cluster',
                           jwtStr=self.jwtdatastr,
                           port=9728,
                           expect_instances=[
                               InstanceType.agent,
                               InstanceType.coordinator,
                               InstanceType.dbserver,
                           ],
                           moreopts=['--starter.join', '127.0.0.1:9528']))
        for instance in self.starter_instances:
            instance.is_leader = True
Esempio n. 8
0
    def starter_prepare_env_impl(self):
        self.leader_starter_instance = StarterManager(
            self.cfg,
            self.basedir,
            'leader',
            mode='single',
            port=1234,
            expect_instances=[InstanceType.single],
            jwtStr="leader",
            moreopts=[])
        self.leader_starter_instance.is_leader = True

        self.follower_starter_instance = StarterManager(
            self.cfg,
            self.basedir,
            'follower',
            mode='single',
            port=2345,
            expect_instances=[InstanceType.single],
            jwtStr="follower",
            moreopts=[])
 def add_starter(name, port, opts):
     self.starter_instances.append(
         StarterManager(
             self.basecfg,
             self.basedir,
             name,
             mode="activefailover",
             port=port,
             expect_instances=[
                 InstanceType.AGENT,
                 InstanceType.RESILIENT_SINGLE,
             ],
             jwt_str="afo",
             moreopts=opts,
         ))
Esempio n. 10
0
 def add_starter(name, port, opts):
     self.starter_instances.append(
         StarterManager(
             self.basecfg,
             self.basedir,
             name,
             mode="cluster",
             jwt_str=self.jwtdatastr,
             port=port,
             expect_instances=[
                 InstanceType.AGENT,
                 InstanceType.COORDINATOR,
                 InstanceType.DBSERVER,
             ],
             moreopts=opts,
         ))
Esempio n. 11
0
class LeaderFollower(Runner):
    """this runs a leader / Follower setup with synchronisation"""

    # pylint: disable=too-many-arguments disable=too-many-instance-attributes
    def __init__(
        self,
        runner_type,
        abort_on_error,
        installer_set,
        selenium,
        selenium_driver_args,
        testrun_name: str,
        ssl: bool,
        use_auto_certs: bool,
    ):
        super().__init__(
            runner_type,
            abort_on_error,
            installer_set,
            RunnerProperties("LeaderFollower", 400, 500, False, ssl,
                             use_auto_certs),
            selenium,
            selenium_driver_args,
            testrun_name,
        )

        self.leader_starter_instance = None
        self.follower_starter_instance = None

        self.success = False
        self.checks = {
            "beforeReplJS": (
                "saving document before",
                """
db._create("testCollectionBefore");
db.testCollectionBefore.save({"hello": "world"})
""",
            ),
            "afterReplJS": (
                "saving document after the replication",
                """
db._create("testCollectionAfter");
db.testCollectionAfter.save({"hello": "world"})
""",
            ),
            "checkReplJS": (
                "checking documents",
                """
if (!db.testCollectionBefore) {
  throw new Error("before collection does not exist");
}
if (!db.testCollectionAfter) {
  throw new Error("after collection does not exist - replication failed");
}
if (!(db.testCollectionBefore.toArray()[0]["hello"] === "world")) {
  throw new Error("before not yet there?");
}
if (!(db.testCollectionAfter.toArray()[0]["hello"] === "world")) {
  throw new Error("after not yet there?");
}
""",
            ),
        }

    def starter_prepare_env_impl(self):
        leader_opts = []
        follower_opts = []
        if self.cfg.ssl and not self.cfg.use_auto_certs:
            self.create_tls_ca_cert()
            leader_tls_keyfile = self.cert_dir / Path("leader") / "tls.keyfile"
            follower_tls_keyfile = self.cert_dir / Path(
                "follower") / "tls.keyfile"
            self.cert_op([
                "tls",
                "keyfile",
                "--cacert=" + str(self.certificate_auth["cert"]),
                "--cakey=" + str(self.certificate_auth["key"]),
                "--keyfile=" + str(leader_tls_keyfile),
                "--host=" + self.cfg.publicip,
                "--host=localhost",
            ])
            self.cert_op([
                "tls",
                "keyfile",
                "--cacert=" + str(self.certificate_auth["cert"]),
                "--cakey=" + str(self.certificate_auth["key"]),
                "--keyfile=" + str(follower_tls_keyfile),
                "--host=" + self.cfg.publicip,
                "--host=localhost",
            ])
            leader_opts.append(f"--ssl.keyfile={leader_tls_keyfile}")
            follower_opts.append(f"--ssl.keyfile={follower_tls_keyfile}")

        self.leader_starter_instance = StarterManager(
            self.basecfg,
            self.basedir,
            "leader",
            mode="single",
            port=1234,
            expect_instances=[InstanceType.SINGLE],
            jwt_str="leader",
            moreopts=leader_opts,
        )
        self.leader_starter_instance.is_leader = True

        self.follower_starter_instance = StarterManager(
            self.basecfg,
            self.basedir,
            "follower",
            mode="single",
            port=2345,
            expect_instances=[InstanceType.SINGLE],
            jwt_str="follower",
            moreopts=follower_opts,
        )

    def starter_run_impl(self):
        self.leader_starter_instance.run_starter()
        self.follower_starter_instance.run_starter()

        self.leader_starter_instance.detect_instances()
        self.follower_starter_instance.detect_instances()

        self.leader_starter_instance.detect_instance_pids()
        self.follower_starter_instance.detect_instance_pids()

        self.passvoid = "leader"
        self.leader_starter_instance.set_passvoid(self.passvoid)
        # the replication will overwrite this passvoid anyways:
        self.follower_starter_instance.set_passvoid(self.passvoid)

        self.starter_instances = [
            self.leader_starter_instance,
            self.follower_starter_instance,
        ]

    def finish_setup_impl(self):
        # finish setup by starting the replications
        self.set_frontend_instances()

        self.checks["startReplJS"] = (
            "launching replication",
            """
print(
require("@arangodb/replication").setupReplicationGlobal({
    endpoint: "%s://127.0.0.1:%s",
    username: "******",
    password: "******",
    verbose: false,
    includeSystem: true,
    incremental: true,
    autoResync: true
    }));
print("replication started")
process.exit(0);
""" % (
                self.get_protocol(),
                str(self.leader_starter_instance.get_frontend_port()),
                self.leader_starter_instance.get_passvoid(),
            ),
        )
        lh.subsubsection("prepare leader follower replication")
        arangosh_script = self.checks["beforeReplJS"]
        logging.info(
            str(self.leader_starter_instance.execute_frontend(
                arangosh_script)))

        lh.subsubsection("start leader follwer replication")
        arangosh_script = self.checks["startReplJS"]
        retval = self.follower_starter_instance.execute_frontend(
            arangosh_script)
        if not retval:
            raise Exception("Failed to start the replication using: %s %s" %
                            (retval, str(self.checks["startReplJS"])))

        logging.info("Replication started successfully")

        logging.info("save document")
        arangosh_script = self.checks["afterReplJS"]
        logging.info(
            str(self.leader_starter_instance.execute_frontend(
                arangosh_script)))
        self.makedata_instances.append(self.leader_starter_instance)

    @step
    def test_setup_impl(self):
        logging.info("testing the leader/follower setup")
        tries = 30
        if not self.follower_starter_instance.execute_frontend(
                self.checks["checkReplJS"]):
            while tries:
                if self.follower_starter_instance.execute_frontend(
                        self.checks["checkReplJS"]):
                    break
                progress(".")
                time.sleep(1)
                tries -= 1

        if not tries:
            if not self.follower_starter_instance.execute_frontend(
                    self.checks["checkReplJS"]):
                raise Exception("replication didn't make it in 30s!")

        lh.subsection("leader/follower - check test data", "-")

        if self.selenium:
            self.selenium.test_after_install()
        # assert that data has been replicated
        self.follower_starter_instance.arangosh.read_only = True
        self.follower_starter_instance.supports_foxx_tests = False
        logging.info("Leader follower testing makedata on follower")
        self.makedata_instances.append(self.follower_starter_instance)
        self.make_data()
        if self.selenium:
            self.selenium.test_setup()

        logging.info("Leader follower setup successfully finished!")

    @step
    def upgrade_arangod_version_impl(self):
        """rolling upgrade this installation"""
        for node in [
                self.leader_starter_instance, self.follower_starter_instance
        ]:
            node.replace_binary_for_upgrade(self.new_cfg)
        for node in [
                self.leader_starter_instance, self.follower_starter_instance
        ]:
            node.command_upgrade()
            node.wait_for_upgrade()
            node.wait_for_upgrade_done_in_log()

        for node in [
                self.leader_starter_instance, self.follower_starter_instance
        ]:
            node.detect_instances()
            node.wait_for_version_reply()
        if self.selenium:
            self.selenium.test_after_install()

    @step
    def upgrade_arangod_version_manual_impl(self):
        """manual upgrade this installation"""
        self.progress(True, "step 1 - shut down instances")
        instances = [
            self.leader_starter_instance, self.follower_starter_instance
        ]
        for node in instances:
            node.replace_binary_setup_for_upgrade(self.new_cfg)
            node.terminate_instance(True)
        self.progress(
            True, "step 2 - launch instances with the upgrade options set")
        for node in instances:
            print("launch")
            node.manually_launch_instances(
                [InstanceType.SINGLE],
                [
                    "--database.auto-upgrade",
                    "true",
                    "--javascript.copy-installation",
                    "true",
                ],
            )
        self.progress(True, "step 3 - launch instances again")
        for node in instances:
            node.respawn_instance()
        self.progress(True, "step 4 - detect system state")
        for node in instances:
            node.detect_instances()
            node.wait_for_version_reply()
        if self.selenium:
            self.selenium.test_after_install()

    @step
    def jam_attempt_impl(self):
        """run the replication fuzzing test"""
        logging.info("running the replication fuzzing test")
        # add instace where makedata will be run on
        self.tcp_ping_all_nodes()
        ret = self.leader_starter_instance.arangosh.run_in_arangosh(
            (self.cfg.test_data_dir /
             Path("tests/js/server/replication/fuzz/replication-fuzz-global.js"
                  )),
            [],
            [
                self.follower_starter_instance.get_frontend().get_public_url(
                    "root:%s@" % self.passvoid)
            ],
        )
        if not ret[0]:
            if not self.cfg.verbose:
                print(ret[1])
            raise Exception("replication fuzzing test failed")

        prompt_user(self.basecfg, "please test the installation.")
        if self.selenium:
            self.selenium.test_jam_attempt()

    @step
    def shutdown_impl(self):
        self.leader_starter_instance.terminate_instance()
        self.follower_starter_instance.terminate_instance()
        pslist = get_all_processes(False)
        if len(pslist) > 0:
            raise Exception("Not all processes terminated! [%s]" % str(pslist))
        logging.info("test ended")

    def before_backup_impl(self):
        """nothing to see here"""

    def after_backup_impl(self):
        """nothing to see here"""

    def set_selenium_instances(self):
        """set instances in selenium runner"""
        self.selenium.set_instances(
            self.cfg,
            self.leader_starter_instance.arango_importer,
            self.leader_starter_instance.arango_restore,
            self.leader_starter_instance.all_instances[0],
        )
    def starter_prepare_env_impl(self):
        mem = psutil.virtual_memory()
        os.environ['ARANGODB_OVERRIDE_DETECTED_TOTAL_MEMORY'] = str(
            int((mem.total * 0.8) / 9))

        self.basecfg.index = 0

        # pylint: disable=C0415
        if self.remote:
            from arangodb.starter.manager import (StarterNonManager as
                                                  StarterManager)
        else:
            from arangodb.starter.manager import StarterManager
        self.starter_instances.append(
            StarterManager(
                self.basecfg,
                self.basedir,
                'node1',
                mode='cluster',
                jwtStr=self.jwtdatastr,
                port=9528,
                expect_instances=[
                    InstanceType.agent,
                    InstanceType.coordinator,
                    InstanceType.dbserver,
                ],
                moreopts=[
                    #    '--agents.agency.election-timeout-min=5',
                    #    '--agents.agency.election-timeout-max=10',
                ]))
        self.starter_instances.append(
            StarterManager(
                self.basecfg,
                self.basedir,
                'node2',
                mode='cluster',
                jwtStr=self.jwtdatastr,
                port=9628,
                expect_instances=[
                    InstanceType.agent,
                    InstanceType.coordinator,
                    InstanceType.dbserver,
                ],
                moreopts=[
                    '--starter.join',
                    '127.0.0.1:9528',
                    #    '--agents.agency.election-timeout-min=5',
                    #    '--agents.agency.election-timeout-max=10',
                ]))
        self.starter_instances.append(
            StarterManager(
                self.basecfg,
                self.basedir,
                'node3',
                mode='cluster',
                jwtStr=self.jwtdatastr,
                port=9728,
                expect_instances=[
                    InstanceType.agent,
                    InstanceType.coordinator,
                    InstanceType.dbserver,
                ],
                moreopts=[
                    '--starter.join',
                    '127.0.0.1:9528',
                    #    '--agents.agency.election-timeout-min=5',
                    #    '--agents.agency.election-timeout-max=10',
                ]))
        for instance in self.starter_instances:
            instance.is_leader = True
 def test_debug_symbols_attach_to_process_windows(self):
     """Debug arangod executable by attaching debugger to a running process (Windows)"""
     starter = StarterManager(
         basecfg=self.installer.cfg,
         install_prefix=Path(DebuggerTestSuite.STARTER_DIR),
         instance_prefix="single",
         expect_instances=[InstanceType.SINGLE],
         mode="single",
         jwt_str="single",
     )
     try:
         with step("Start a single server deployment"):
             starter.run_starter()
             starter.detect_instances()
             starter.detect_instance_pids()
             starter.set_passvoid("")
             pid = starter.all_instances[0].pid
         pdb_dir = str(self.installer.cfg.debug_install_prefix)
         with step(
                 "Check that stack trace with function names and line numbers can be acquired from cdb"
         ):
             cmd = " ".join([
                 "cdb", "-pv", "-p",
                 str(pid), "-y", pdb_dir, "-lines", "-n"
             ])
             attach(cmd, "CDB command", attachment_type=AttachmentType.TEXT)
             cdb = wexpect.spawn(cmd)
             cdb.expect(DebuggerTestSuite.CDB_PROMPT, timeout=300)
             cdb.sendline("k")
             cdb.expect(DebuggerTestSuite.CDB_PROMPT, timeout=300)
             stack = cdb.before
             cdb.sendline("q")
             attach(stack,
                    "Stacktrace from cdb output",
                    attachment_type=AttachmentType.TEXT)
             assert "arangod!main" in stack, "Stack must contain real function names."
             assert "arangod.cpp" in stack, "Stack must contain real source file names."
     finally:
         starter.terminate_instance()
         kill_all_processes()
Esempio n. 14
0
    def starter_prepare_env_impl(self):
        mem = psutil.virtual_memory()
        os.environ["ARANGODB_OVERRIDE_DETECTED_TOTAL_MEMORY"] = str(
            int((mem.total * 0.8) / 9))

        self.basecfg.index = 0

        # pylint: disable=import-outside-toplevel
        if self.remote:
            from arangodb.starter.manager import StarterNonManager as StarterManager
        else:
            from arangodb.starter.manager import StarterManager

        node1_opts = []
        node2_opts = ["--starter.join", "127.0.0.1:9528"]
        node3_opts = ["--starter.join", "127.0.0.1:9528"]
        if self.cfg.ssl and not self.cfg.use_auto_certs:
            self.create_tls_ca_cert()
            node1_tls_keyfile = self.cert_dir / Path("node1") / "tls.keyfile"
            node2_tls_keyfile = self.cert_dir / Path("node2") / "tls.keyfile"
            node3_tls_keyfile = self.cert_dir / Path("node3") / "tls.keyfile"

            self.cert_op([
                "tls",
                "keyfile",
                "--cacert=" + str(self.certificate_auth["cert"]),
                "--cakey=" + str(self.certificate_auth["key"]),
                "--keyfile=" + str(node1_tls_keyfile),
                "--host=" + self.cfg.publicip,
                "--host=localhost",
            ])
            self.cert_op([
                "tls",
                "keyfile",
                "--cacert=" + str(self.certificate_auth["cert"]),
                "--cakey=" + str(self.certificate_auth["key"]),
                "--keyfile=" + str(node2_tls_keyfile),
                "--host=" + self.cfg.publicip,
                "--host=localhost",
            ])
            self.cert_op([
                "tls",
                "keyfile",
                "--cacert=" + str(self.certificate_auth["cert"]),
                "--cakey=" + str(self.certificate_auth["key"]),
                "--keyfile=" + str(node3_tls_keyfile),
                "--host=" + self.cfg.publicip,
                "--host=localhost",
            ])
            node1_opts.append(f"--ssl.keyfile={node1_tls_keyfile}")
            node2_opts.append(f"--ssl.keyfile={node2_tls_keyfile}")
            node3_opts.append(f"--ssl.keyfile={node2_tls_keyfile}")

        self.starter_instances.append(
            StarterManager(
                self.basecfg,
                self.basedir,
                "node1",
                mode="cluster",
                jwt_str=self.jwtdatastr,
                port=9528,
                expect_instances=[
                    InstanceType.AGENT,
                    InstanceType.COORDINATOR,
                    InstanceType.DBSERVER,
                ],
                moreopts=
                node1_opts  # += ['--agents.agency.election-timeout-min=5',
                #     '--agents.agency.election-timeout-max=10',]
            ))
        self.starter_instances.append(
            StarterManager(
                self.basecfg,
                self.basedir,
                "node2",
                mode="cluster",
                jwt_str=self.jwtdatastr,
                port=9628,
                expect_instances=[
                    InstanceType.AGENT,
                    InstanceType.COORDINATOR,
                    InstanceType.DBSERVER,
                ],
                moreopts=
                node2_opts  # += ['--agents.agency.election-timeout-min=5',
                #     '--agents.agency.election-timeout-max=10',]
            ))
        self.starter_instances.append(
            StarterManager(
                self.basecfg,
                self.basedir,
                "node3",
                mode="cluster",
                jwt_str=self.jwtdatastr,
                port=9728,
                expect_instances=[
                    InstanceType.AGENT,
                    InstanceType.COORDINATOR,
                    InstanceType.DBSERVER,
                ],
                moreopts=
                node3_opts  # += ['--agents.agency.election-timeout-min=5',
                #     '--agents.agency.election-timeout-max=10',]
            ))
        for instance in self.starter_instances:
            instance.is_leader = True
Esempio n. 15
0
class Single(Runner):
    """this runs a single server setup"""

    # pylint: disable=too-many-arguments disable=too-many-instance-attributes
    def __init__(
        self,
        runner_type,
        abort_on_error,
        installer_set,
        selenium,
        selenium_driver_args,
        testrun_name: str,
        ssl: bool,
        use_auto_certs: bool,
    ):
        super().__init__(
            runner_type,
            abort_on_error,
            installer_set,
            RunnerProperties("Single", 400, 500, True, ssl, use_auto_certs),
            selenium,
            selenium_driver_args,
            testrun_name,
        )

        self.starter_instance = None
        self.backup_instance_count = 1
        self.success = False

    def starter_prepare_env_impl(self):
        opts = []
        if self.cfg.ssl and not self.cfg.use_auto_certs:
            self.create_tls_ca_cert()
            tls_keyfile = self.cert_dir / Path("single") / "tls.keyfile"
            self.cert_op([
                "tls",
                "keyfile",
                "--cacert=" + str(self.certificate_auth["cert"]),
                "--cakey=" + str(self.certificate_auth["key"]),
                "--keyfile=" + str(tls_keyfile),
                "--host=" + self.cfg.publicip,
                "--host=localhost",
            ])
            opts.append(f"--ssl.keyfile={tls_keyfile}")

        self.starter_instance = StarterManager(
            self.basecfg,
            self.basedir,
            "single",
            mode="single",
            port=1234,
            expect_instances=[InstanceType.SINGLE],
            jwt_str="single",
            moreopts=opts,
        )
        self.starter_instance.is_leader = True

    def starter_run_impl(self):
        self.starter_instance.run_starter()

        self.starter_instance.detect_instances()

        self.starter_instance.detect_instance_pids()

        self.passvoid = "single"
        self.starter_instance.set_passvoid(self.passvoid)

        self.starter_instances = [
            self.starter_instance,
        ]

    def finish_setup_impl(self):
        # finish setup by starting the replications
        self.set_frontend_instances()

        self.makedata_instances.append(self.starter_instance)

    @step
    def test_setup_impl(self):
        logging.info("testing the single server setup")
        tries = 30
        lh.subsection("single server - check test data", "-")

        if self.selenium:
            self.selenium.test_after_install()
        self.make_data()
        if self.selenium:
            self.selenium.test_setup()

        logging.info("Single setup successfully finished!")

    @step
    def upgrade_arangod_version_impl(self):
        """rolling upgrade this installation"""
        self.starter_instance.replace_binary_for_upgrade(self.new_cfg)
        self.starter_instance.command_upgrade()
        self.starter_instance.wait_for_upgrade()
        self.starter_instance.wait_for_upgrade_done_in_log()

        self.starter_instance.detect_instances()
        self.starter_instance.wait_for_version_reply()
        if self.selenium:
            self.selenium.test_after_install()

    @step
    def upgrade_arangod_version_manual_impl(self):
        """manual upgrade this installation"""
        self.progress(True, "step 1 - shut down instances")
        instances = [self.starter_instance]
        self.starter_instance.replace_binary_setup_for_upgrade(self.new_cfg)
        self.starter_instance.terminate_instance(True)
        self.progress(
            True, "step 2 - launch instances with the upgrade options set")
        print("launch")
        self.starter_instance.manually_launch_instances(
            [InstanceType.SINGLE],
            [
                "--database.auto-upgrade",
                "true",
                "--javascript.copy-installation",
                "true",
            ],
        )
        self.progress(True, "step 3 - launch instances again")
        self.starter_instance.respawn_instance()
        self.progress(True, "step 4 - detect system state")
        self.starter_instance.detect_instances()
        self.starter_instance.wait_for_version_reply()
        if self.selenium:
            self.selenium.test_after_install()

    @step
    def jam_attempt_impl(self):
        """run the replication fuzzing test"""
        prompt_user(self.basecfg, "please test the installation.")
        if self.selenium:
            self.selenium.test_jam_attempt()

    @step
    def shutdown_impl(self):
        self.starter_instance.terminate_instance()
        pslist = get_all_processes(False)
        if len(pslist) > 0:
            raise Exception("Not all processes terminated! [%s]" % str(pslist))
        logging.info("test ended")

    def before_backup_impl(self):
        """nothing to see here"""

    def after_backup_impl(self):
        """nothing to see here"""

    def set_selenium_instances(self):
        """set instances in selenium runner"""
        self.selenium.set_instances(
            self.cfg,
            sself.starter_instance.arango_importer,
            self.starter_instance.arango_restore,
            self.starter_instance.all_instances[0],
        )
Esempio n. 16
0
def create_arangod_dump(installer, starter_dir: str, dump_file_dir: str):
    """create arangod memory dump file"""
    starter = StarterManager(
        basecfg=installer.cfg,
        install_prefix=Path(starter_dir),
        instance_prefix="single",
        expect_instances=[InstanceType.SINGLE],
        mode="single",
        jwt_str="single",
    )
    dump_filename = None
    try:
        with step("Start a single server deployment"):
            starter.run_starter()
            starter.detect_instances()
            starter.detect_instance_pids()
            starter.set_passvoid("")
            pid = starter.all_instances[0].pid
        with step("Create a dump of arangod process"):
            cmd = ["procdump", "-ma", str(pid), dump_file_dir]
            lh.log_cmd(cmd)
            with psutil.Popen(cmd,
                              bufsize=-1,
                              stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE) as proc:
                (procdump_out, procdump_err) = proc.communicate()
                procdump_str = str(procdump_out, "UTF-8")
                attach(procdump_str, "procdump sdtout")
                attach(str(procdump_err), "procdump stderr")
                success_string = "Dump 1 complete"
                filename_regex = re.compile(
                    r"^(\[\d{2}:\d{2}:\d{2}\] Dump 1 initiated: )(?P<filename>.*)$",
                    re.MULTILINE)
                match = re.search(filename_regex, procdump_str)
                if procdump_str.find(success_string) < 0 or not match:
                    raise Exception(
                        "procdump wasn't able to create a dump file: " +
                        procdump_str)
                dump_filename = match.group("filename")
    finally:
        starter.terminate_instance()
        kill_all_processes()
    return dump_filename
Esempio n. 17
0
class LeaderFollower(Runner):
    """ this runs a leader / Follower setup with synchronisation """

    # pylint: disable=R0913 disable=R0902
    def __init__(self, runner_type, cfg, old_inst, new_cfg, new_inst, selenium,
                 selenium_driver_args):
        super().__init__(runner_type, cfg, old_inst, new_cfg, new_inst, 'lf',
                         400, 500, selenium, selenium_driver_args)

        self.leader_starter_instance = None
        self.follower_starter_instance = None

        self.success = False
        self.checks = {
            "beforeReplJS": ("saving document before", """
db._create("testCollectionBefore");
db.testCollectionBefore.save({"hello": "world"})
"""),
            "afterReplJS": ("saving document after the replication", """
db._create("testCollectionAfter");
db.testCollectionAfter.save({"hello": "world"})
"""),
            "checkReplJS": ("checking documents", """
if (!db.testCollectionBefore) {
  throw new Error("before collection does not exist");
}
if (!db.testCollectionAfter) {
  throw new Error("after collection does not exist - replication failed");
}
if (!db.testCollectionBefore.toArray()[0]["hello"] === "world") {
  throw new Error("before not yet there?");
}
if (!db.testCollectionAfter.toArray()[0]["hello"] === "world") {
  throw new Error("after not yet there?");
}
""")
        }

    def starter_prepare_env_impl(self):
        self.leader_starter_instance = StarterManager(
            self.cfg,
            self.basedir,
            'leader',
            mode='single',
            port=1234,
            expect_instances=[InstanceType.single],
            jwtStr="leader",
            moreopts=[])
        self.leader_starter_instance.is_leader = True

        self.follower_starter_instance = StarterManager(
            self.cfg,
            self.basedir,
            'follower',
            mode='single',
            port=2345,
            expect_instances=[InstanceType.single],
            jwtStr="follower",
            moreopts=[])

    def starter_run_impl(self):
        self.leader_starter_instance.run_starter()
        self.follower_starter_instance.run_starter()

        self.leader_starter_instance.detect_instances()
        self.follower_starter_instance.detect_instances()

        self.leader_starter_instance.detect_instance_pids()
        self.follower_starter_instance.detect_instance_pids()

        self.passvoid = 'leader'
        self.leader_starter_instance.set_passvoid(self.passvoid)
        # the replication will overwrite this passvoid anyways:
        self.follower_starter_instance.set_passvoid(self.passvoid)

        self.starter_instances = [
            self.leader_starter_instance, self.follower_starter_instance
        ]

    def finish_setup_impl(self):
        # finish setup by starting the replications
        self.set_frontend_instances()

        self.checks['startReplJS'] = ("launching replication", """
print(
require("@arangodb/replication").setupReplicationGlobal({
    endpoint: "tcp://127.0.0.1:%s",
    username: "******",
    password: "******",
    verbose: false,
    includeSystem: true,
    incremental: true,
    autoResync: true
    }));
print("replication started")
process.exit(0);
""" % (str(self.leader_starter_instance.get_frontend_port()),
        self.leader_starter_instance.get_passvoid()))
        lh.subsubsection("prepare leader follower replication")
        arangosh_script = self.checks['beforeReplJS']
        logging.info(
            str(self.leader_starter_instance.execute_frontend(
                arangosh_script)))

        lh.subsubsection("start leader follwer replication")
        arangosh_script = self.checks['startReplJS']
        retval = self.follower_starter_instance.execute_frontend(
            arangosh_script)
        if not retval:
            raise Exception("Failed to start the replication using: %s %s" %
                            (retval, str(self.checks['startReplJS'])))

        logging.info("Replication started successfully")

        logging.info("save document")
        arangosh_script = self.checks['afterReplJS']
        logging.info(
            str(self.leader_starter_instance.execute_frontend(
                arangosh_script)))
        self.makedata_instances.append(self.leader_starter_instance)

    def test_setup_impl(self):
        logging.info("testing the leader/follower setup")
        tries = 30
        if not self.follower_starter_instance.execute_frontend(
                self.checks['checkReplJS']):
            while tries:
                if self.follower_starter_instance.execute_frontend(
                        self.checks['checkReplJS'], False):
                    break
                progress(".")
                time.sleep(1)
                tries -= 1

        if not tries:
            if not self.follower_starter_instance.execute_frontend(
                    self.checks['checkReplJS']):
                raise Exception("replication didn't make it in 30s!")

        lh.subsection("leader/follower - check test data", "-")

        if self.selenium:
            self.selenium.connect_server_new_tab(
                self.follower_starter_instance.get_frontends(), '_system',
                self.cfg)
            self.selenium.check_old(self.new_cfg if self.new_cfg else self.cfg,
                                    False)
            self.selenium.close_tab_again()

        #assert that data has been replicated
        self.follower_starter_instance.arangosh.read_only = True
        self.makedata_instances.append(self.follower_starter_instance)
        self.make_data()

        logging.info("Leader follower setup successfully finished!")

    def supports_backup_impl(self):
        return False

    def upgrade_arangod_version_impl(self):
        """ upgrade this installation """
        for node in [
                self.leader_starter_instance, self.follower_starter_instance
        ]:
            node.replace_binary_for_upgrade(self.new_cfg)
        for node in [
                self.leader_starter_instance, self.follower_starter_instance
        ]:
            node.command_upgrade()
            node.wait_for_upgrade()
            node.wait_for_upgrade_done_in_log()

        for node in [
                self.leader_starter_instance, self.follower_starter_instance
        ]:
            node.detect_instances()
            node.wait_for_version_reply()

        if self.selenium:
            self.selenium.web.refresh()
            self.selenium.check_old(self.new_cfg, True)

            self.selenium.connect_server_new_tab(
                self.follower_starter_instance.get_frontends(), '_system',
                self.cfg)
            self.selenium.check_old(self.new_cfg, False)
            self.selenium.close_tab_again()

    def jam_attempt_impl(self):
        """ run the replication fuzzing test """
        logging.info("running the replication fuzzing test")
        # add instace where makedata will be run on
        self.tcp_ping_all_nodes()
        ret = self.leader_starter_instance.arangosh.run_in_arangosh((
            self.cfg.test_data_dir /
            Path('tests/js/server/replication/fuzz/replication-fuzz-global.js')
        ), [], [
            self.follower_starter_instance.get_frontend().get_public_url(
                'root:%s@' % self.passvoid)
        ])
        if not ret[0]:
            if not self.cfg.verbose:
                print(ret[1])
            raise Exception("replication fuzzing test failed")

        prompt_user(self.basecfg, "please test the installation.")
        if self.selenium:
            self.selenium.jam_step_1(self.cfg if self.cfg else self.new_cfg)

    def shutdown_impl(self):
        self.leader_starter_instance.terminate_instance()
        self.follower_starter_instance.terminate_instance()
        pslist = get_all_processes(False)
        if len(pslist) > 0:
            raise Exception("Not all processes terminated! [%s]" % str(pslist))
        logging.info('test ended')

    def before_backup_impl(self):
        pass

    def after_backup_impl(self):
        pass
class LicenseManagerSingleServerTestSuite(LicenseManagerBaseTestSuite):
    """License manager tests: single server"""

    # pylint: disable=dangerous-default-value
    def __init__(self, new_version, installer_base_config):
        super().__init__(
            new_version,
            installer_base_config,
        )
        self.short_name = "SingleServer"

    def get_default_instance_type(self):
        """get the instance type we should communicate with"""
        return InstanceType.SINGLE

    @collect_crash_data
    def save_data_dir(self):
        """save data dir and logs in case a test failed"""
        kill_all_processes()
        if self.starter.basedir.exists():
            archive = shutil.make_archive(
                f"LicenseManagerSingleServerTestSuite(v. {self.base_cfg.version})",
                "bztar", self.starter.basedir)
            attach.file(archive, "test dir archive", "application/x-bzip2",
                        "tar.bz2")
        else:
            print("test basedir doesn't exist, won't create report tar")

    @run_before_suite
    def start(self):
        """clean up the system before running license manager tests on a single server setup"""
        self.cleanup()
        self.start_single_server()

    @run_after_suite
    def teardown_suite(self):
        """Teardown suite environment: single server"""
        self.starter.terminate_instance()
        kill_all_processes()
        self.cleanup()

    def get_server_id(self):
        """read server ID from data directory"""
        datadir = self.starter.all_instances[0].basedir / "data"
        server_file_content = json.load(open(datadir / "SERVER"))
        server_id = server_file_content["serverId"]
        return server_id

    # pylint: disable=redefined-builtin
    def set_license(self, license):
        """set new license"""
        datadir = self.starter.all_instances[0].basedir / "data"
        with open(datadir / ".license", "w") as license_file:
            license_file.truncate()
            license_file.write(license)
        self.starter.terminate_instance()
        self.starter.respawn_instance()

    def cleanup(self):
        """remove all directories created by previous run of this test"""
        testdir = self.base_cfg.test_data_dir / self.short_name
        if testdir.exists():
            shutil.rmtree(testdir)

    @step
    def start_single_server(self):
        """start a single server setup"""
        # pylint: disable=attribute-defined-outside-init
        self.starter = StarterManager(
            basecfg=self.installer.cfg,
            install_prefix=Path(self.short_name),
            instance_prefix="single",
            expect_instances=[InstanceType.SINGLE],
            mode="single",
            jwt_str="single",
        )
        self.starter.run_starter()
        self.starter.detect_instances()
        self.starter.detect_instance_pids()
        self.starter.set_passvoid(self.passvoid)
        self.instance = self.starter.instance

    @testcase
    def clean_install_temp_license(self):
        """Check that server gets a 60-minute license after installation on a clean system"""
        self.check_that_license_is_not_expired(50 * 60)

    @testcase
    def goto_read_only_mode_when_license_expired(self):
        """Check that system goes to read-only mode when license is expired"""
        self.expire_license()
        self.check_readonly()
Esempio n. 19
0
    def jam_attempt_impl(self):
        # pylint: disable=too-many-statements
        # this is simply to slow to be worth wile:
        # collections = self.get_collection_list()
        lh.subsubsection("wait for all shards to be in sync")
        retval = self.starter_instances[0].execute_frontend(
            self.check_collections_in_sync, True)
        if not retval:
            raise Exception("Failed to ensure the cluster is in sync: %s %s" %
                            (retval, str(self.check_collections_in_sync)))
        print("all in sync.")
        agency_leader = self.agency_get_leader()
        terminate_instance = 2
        survive_instance = 1
        if self.starter_instances[terminate_instance].have_this_instance(
                agency_leader):
            print(
                "Cluster instance 2 has the agency leader; killing 1 instead")
            terminate_instance = 1
            survive_instance = 2

        logging.info("stopping instance %d" % terminate_instance)
        uuid = self.starter_instances[terminate_instance].get_dbservers(
        )[0].get_uuid()
        self.starter_instances[terminate_instance].terminate_instance(
            keep_instances=True)
        logging.info("relaunching agent!")
        self.starter_instances[terminate_instance].manually_launch_instances(
            [InstanceType.AGENT], [], False, False)

        self.set_frontend_instances()

        prompt_user(self.basecfg, "instance stopped")
        if self.selenium:
            self.selenium.jam_step_1()

        ret = self.starter_instances[0].arangosh.check_test_data(
            "Cluster one node missing", True, ["--disabledDbserverUUID", uuid])
        if not ret[0]:
            raise Exception("check data failed " + ret[1])

        ret = self.starter_instances[
            survive_instance].arangosh.check_test_data(
                "Cluster one node missing", True,
                ["--disabledDbserverUUID", uuid])
        if not ret[0]:
            raise Exception("check data failed " + ret[1])

        # respawn instance, and get its state fixed
        self.starter_instances[terminate_instance].respawn_instance()
        self.set_frontend_instances()
        counter = 300
        while not self.starter_instances[terminate_instance].is_instance_up():
            if counter <= 0:
                raise Exception("Instance did not respawn in 5 minutes!")
            progress(".")
            time.sleep(1)
            counter -= 1
        print()
        self.starter_instances[terminate_instance].detect_instances()
        self.starter_instances[terminate_instance].detect_instance_pids()
        self.starter_instances[
            terminate_instance].detect_instance_pids_still_alive()
        self.set_frontend_instances()

        logging.info("jamming: Starting instance without jwt")
        moreopts = ["--starter.join", "127.0.0.1:9528"]
        if self.cfg.ssl and not self.cfg.use_auto_certs:
            keyfile = self.cert_dir / Path("nodeX") / "tls.keyfile"
            self.generate_keyfile(keyfile)
            moreopts.append(f"--ssl.keyfile={keyfile}")
        dead_instance = StarterManager(
            self.basecfg,
            Path("CLUSTER"),
            "nodeX",
            mode="cluster",
            jwt_str=None,
            expect_instances=[
                InstanceType.AGENT,
                InstanceType.COORDINATOR,
                InstanceType.DBSERVER,
            ],
            moreopts=moreopts,
        )
        dead_instance.run_starter(expect_to_fail=True)

        i = 0
        while True:
            logging.info(". %d", i)
            if not dead_instance.is_instance_running():
                dead_instance.check_that_starter_log_contains(
                    "Unauthorized. Wrong credentials.")
                break
            if i > 40:
                logging.info("Giving up wating for the starter to exit")
                raise Exception("non-jwt-ed starter won't exit")
            i += 1
            time.sleep(10)
        logging.info(str(dead_instance.instance.wait(timeout=320)))
        logging.info("dead instance is dead?")

        prompt_user(self.basecfg, "cluster should be up")
        if self.selenium:
            self.selenium.jam_step_2()