Beispiel #1
0
    def jam_attempt_impl(self):
        self.first_leader.terminate_instance()
        logging.info("waiting for new leader...")
        self.new_leader = None

        while self.new_leader is None:
            for node in self.follower_nodes:
                node.detect_leader()
                if node.is_leader:
                    logging.info('have a new leader: %s', str(node.arguments))
                    self.new_leader = node
                    self.leader = node
                    break
                progress('.')
            time.sleep(1)
        if self.selenium:
            self.selenium.connect_server(self.leader.get_frontends(), '_system',
                                         self.new_cfg if self.new_cfg else self.cfg)
            self.selenium.check_old(self.new_cfg if self.new_cfg else self.cfg, 1)
        print()

        logging.info(str(self.new_leader))
        url = '{host}/_db/_system/_admin/aardvark/index.html#replication'.format(
            host=self.new_leader.get_frontend().get_local_url(''))
        reply = requests.get(url, auth=HTTPBasicAuth('root', self.leader.passvoid))
        logging.info(str(reply))
        if reply.status_code != 200:
            logging.info(reply.text)
            self.success = False
        self.set_frontend_instances()

        prompt_user(self.basecfg,
                    '''The leader failover has happened.
please revalidate the UI states on the new leader; you should see *one* follower.''')
        self.first_leader.respawn_instance()
        self.first_leader.detect_instances()
        logging.info("waiting for old leader to show up as follower")
        while not self.first_leader.active_failover_detect_host_now_follower():
            progress('.')
            time.sleep(1)
        print()

        url = self.first_leader.get_frontend().get_local_url('')

        reply = requests.get(url, auth=HTTPBasicAuth('root', self.leader.passvoid))
        logging.info(str(reply))
        logging.info(str(reply.text))

        if reply.status_code != 503:
            self.success = False

        prompt_user(self.basecfg,
                    'The old leader has been respawned as follower (%s),'
                    ' so there should be two followers again.'
                    % self.first_leader.get_frontend().get_public_url('root@') )

        logging.info("state of this test is: %s",
                     "Success" if self.success else "Failed")
        if self.selenium:
            self.selenium.check_old(self.new_cfg if self.new_cfg else self.cfg, 2, 20)
    def jam_attempt_impl(self):
        logging.info("stopping instance 2")
        self.starter_instances[2].terminate_instance()
        self.set_frontend_instances()

        prompt_user(self.basecfg, "instance stopped")
        if self.selenium:
            self.selenium.jam_step_1(self.new_cfg if self.new_cfg else self.cfg)

        # respawn instance, and get its state fixed
        self.starter_instances[2].respawn_instance()
        self.set_frontend_instances()
        while not self.starter_instances[2].is_instance_up():
            progress('.')
            time.sleep(1)
        print()
        self.starter_instances[2].detect_instances()
        self.starter_instances[2].detect_instance_pids()
        self.starter_instances[2].detect_instance_pids_still_alive()
        self.set_frontend_instances()

        logging.info('jamming: Starting instance without jwt')
        dead_instance = StarterManager(
            self.basecfg,
            Path('CLUSTER'), 'nodeX',
            mode='cluster',
            jwtStr=None,
            expect_instances=[
                InstanceType.agent,
                InstanceType.coordinator,
                InstanceType.dbserver,
            ],
            moreopts=['--starter.join', '127.0.0.1:9528'])
        dead_instance.run_starter()

        i = 0
        while True:
            logging.info(". %d", i)
            if not dead_instance.is_instance_running():
                break
            if i > 40:
                logging.info('Giving up wating for the starter to exit')
                raise Exception("non-jwt-ed starter won't exit")
            i += 1
            time.sleep(10)
        logging.info(str(dead_instance.instance.wait(timeout=320)))
        logging.info('dead instance is dead?')

        prompt_user(self.basecfg, "cluster should be up")
        if self.selenium:
            self.selenium.jam_step_2(self.new_cfg if self.new_cfg else self.cfg)
Beispiel #3
0
    def jam_attempt_impl(self):
        """ run the replication fuzzing test """
        logging.info("running the replication fuzzing test")
        # add instace where makedata will be run on
        self.tcp_ping_all_nodes()
        ret = self.leader_starter_instance.arangosh.run_in_arangosh((
            self.cfg.test_data_dir /
            Path('tests/js/server/replication/fuzz/replication-fuzz-global.js')
        ), [], [
            self.follower_starter_instance.get_frontend().get_public_url(
                'root:%s@' % self.passvoid)
        ])
        if not ret[0]:
            if not self.cfg.verbose:
                print(ret[1])
            raise Exception("replication fuzzing test failed")

        prompt_user(self.basecfg, "please test the installation.")
        if self.selenium:
            self.selenium.jam_step_1(self.cfg if self.cfg else self.new_cfg)
Beispiel #4
0
 def locator_finder_by_xpath(self,
                             locator_name,
                             timeout=10,
                             expec_fail=False):
     """This method will used for finding all the locators by their xpath"""
     try:
         self.locator = WebDriverWait(self.webdriver, timeout).until(
             EC.element_to_be_clickable((BY.XPATH, locator_name)),
             message="UI-Test: " + locator_name + " locator was not found.",
         )
     except Exception as ex:
         if expec_fail:
             raise ex
         ti.prompt_user(
             self.cfg, "ERROR " * 10 +
             "\nError while wating for web element:\n" + str(ex) + "\n" +
             "".join(traceback.format_stack(ex.__traceback__.tb_frame)))
         raise ex
     if self.locator is None:
         raise Exception("UI-Test: ", locator_name,
                         " locator was not found.")
     return self.locator
    def jam_attempt_impl(self):
        self.makedata_instances = self.starter_instances[:]
        logging.info('jamming: starting data stress')
        assert self.makedata_instances
        logging.debug("makedata instances")
        for i in self.makedata_instances:
            logging.debug(str(i))

        tcount = 0
        jobs = Queue()
        resultq = Queue()
        results = []
        workers = []
        no_dbs = self.scenario.db_count
        for i in range(self.scenario.db_count_chunks):
            jobs.put({
                'args': [
                    'TESTDB',
                    '--minReplicationFactor',
                    str(self.scenario.min_replication_factor),
                    '--maxReplicationFactor',
                    str(self.scenario.max_replication_factor),
                    '--dataMultiplier',
                    str(self.scenario.data_multiplier),
                    '--numberOfDBs',
                    str(no_dbs),
                    '--countOffset',
                    str((i + self.scenario.db_offset) * no_dbs + 1),
                    '--collectionMultiplier',
                    str(self.scenario.collection_multiplier),
                    '--singleShard',
                    'true' if self.scenario.single_shard else 'false',
                ]
            })

        while len(workers) < self.scenario.parallelity:
            starter = self.makedata_instances[len(workers) %
                                              len(self.makedata_instances)]
            assert starter.arangosh
            arangosh = starter.arangosh

            #must be writabe that the setup may not have already data
            if not arangosh.read_only and not self.has_makedata_data:
                workers.append(
                    Thread(target=makedata_runner,
                           args=(jobs, resultq, arangosh,
                                 self.scenario.progressive_timeout)))

        thread_count = len(workers)
        for worker in workers:
            worker.start()
            time.sleep(self.scenario.launch_delay)

        while tcount < thread_count:
            res_line = resultq.get()
            if isinstance(res_line, bytes):
                results.append(str(res_line).split(','))
            else:
                tcount += 1

        for worker in workers:
            worker.join()
        ti.prompt_user(self.basecfg,
                       "DONE! press any key to shut down the SUT.")
Beispiel #6
0
 def jam_attempt_impl(self):
     """run the replication fuzzing test"""
     prompt_user(self.basecfg, "please test the installation.")
     if self.selenium:
         self.selenium.test_jam_attempt()
Beispiel #7
0
    def run(self):
        """run the full lifecycle flow of this deployment"""
        # pylint: disable=too-many-statements disable=too-many-branches
        if self.do_starter_test and not self.remote:
            detect_file_ulimit()

        self.progress(False, "Runner of type {0}".format(str(self.name)), "<3")

        if self.do_install or self.do_system_test:
            self.progress(
                False,
                "INSTALLATION for {0}".format(str(self.name)),
            )
            self.install(self.old_installer)
        else:
            self.basecfg.set_directories(self.old_installer.cfg)

        if self.do_starter_test:
            self.progress(
                False,
                "PREPARING DEPLOYMENT of {0}".format(str(self.name)),
            )
            self.starter_prepare_env()
            self.starter_run()
            self.finish_setup()
            self.make_data()
            if self.selenium:
                self.set_selenium_instances()
                self.selenium.test_empty_ui()
            ti.prompt_user(
                self.basecfg,
                "{0}{1} Deployment started. Please test the UI!".format((self.versionstr), str(self.name)),
            )
            if self.hot_backup:
                self.progress(False, "TESTING HOTBACKUP")
                self.backup_name = self.create_backup("thy_name_is_" + self.name)
                self.validate_local_backup(self.backup_name)
                self.tcp_ping_all_nodes()
                self.create_non_backup_data()
                backups = self.list_backup()
                print(backups)
                self.upload_backup(backups[0])
                self.tcp_ping_all_nodes()
                self.delete_backup(backups[0])
                self.tcp_ping_all_nodes()
                backups = self.list_backup()
                if len(backups) != 0:
                    raise Exception("expected backup to be gone, " "but its still there: " + str(backups))
                self.download_backup(self.backup_name)
                self.validate_local_backup(self.backup_name)
                self.tcp_ping_all_nodes()
                backups = self.list_backup()
                if backups[0] != self.backup_name:
                    raise Exception("downloaded backup has different name? " + str(backups))
                self.before_backup()
                self.restore_backup(backups[0])
                self.tcp_ping_all_nodes()
                self.after_backup()
                self.check_data_impl()
                if not self.check_non_backup_data():
                    raise Exception("data created after backup" " is still there??")

        if self.new_installer:
            if self.hot_backup:
                self.create_non_backup_data()
            self.versionstr = "NEW[" + self.new_cfg.version + "] "

            self.progress(
                False,
                "UPGRADE OF DEPLOYMENT {0}".format(str(self.name)),
            )
            self.new_installer.calculate_package_names()
            self.new_installer.upgrade_server_package(self.old_installer)
            lh.subsection("outputting version")
            self.new_installer.output_arangod_version()
            self.new_installer.get_starter_version()
            self.new_installer.get_sync_version()
            self.new_installer.stop_service()
            self.cfg.set_directories(self.new_installer.cfg)
            self.new_cfg.set_directories(self.new_installer.cfg)

            self.upgrade_arangod_version()  # make sure to pass new version
            self.old_installer.un_install_server_package_for_upgrade()
            if self.is_minor_upgrade() and self.new_installer.supports_backup():
                self.new_installer.check_backup_is_created()
            if self.hot_backup:
                self.check_data_impl()
                self.progress(False, "TESTING HOTBACKUP AFTER UPGRADE")
                backups = self.list_backup()
                print(backups)
                self.upload_backup(backups[0])
                self.tcp_ping_all_nodes()
                self.delete_backup(backups[0])
                self.tcp_ping_all_nodes()
                backups = self.list_backup()
                if len(backups) != 0:
                    raise Exception("expected backup to be gone, " "but its still there: " + str(backups))
                self.download_backup(self.backup_name)
                self.validate_local_backup(self.backup_name)
                self.tcp_ping_all_nodes()
                backups = self.list_backup()
                if backups[0] != self.backup_name:
                    raise Exception("downloaded backup has different name? " + str(backups))
                time.sleep(20)  # TODO fix
                self.before_backup()
                self.restore_backup(backups[0])
                self.tcp_ping_all_nodes()
                self.after_backup()
                if not self.check_non_backup_data():
                    raise Exception("data created after " "backup is still there??")
            self.check_data_impl()
        else:
            logging.info("skipping upgrade step no new version given")

        try:
            if self.do_starter_test:
                self.progress(
                    False,
                    "{0} TESTS FOR {1}".format(self.testrun_name, str(self.name)),
                )
                self.test_setup()
                self.jam_attempt()
                self.starter_shutdown()
                for starter in self.starter_instances:
                    starter.detect_fatal_errors()
            if self.do_uninstall:
                self.uninstall(self.old_installer if not self.new_installer else self.new_installer)
        finally:
            if self.selenium:
                ui_test_results_table = BeautifulTable(maxwidth=160)
                for result in self.selenium.test_results:
                    ui_test_results_table.rows.append(
                        [result.name, "PASSED" if result.success else "FAILED", result.message, result.traceback]
                    )
                    if not result.success:
                        self.ui_tests_failed = True
                ui_test_results_table.columns.header = ["Name", "Result", "Message", "Traceback"]
                self.progress(False, "UI test results table:", supress_allure=True)
                self.progress(False, "\n" + str(ui_test_results_table), supress_allure=True)
                self.ui_test_results_table = ui_test_results_table

                self.quit_selenium()

        self.progress(False, "Runner of type {0} - Finished!".format(str(self.name)))
    def jam_attempt_impl(self):
        # pylint: disable=too-many-statements
        agency_leader = self.agency_get_leader()
        if self.first_leader.have_this_instance(agency_leader):
            print(
                "AFO-Leader and agency leader are attached by the same starter!"
            )
            self.agency_trigger_leader_relection(agency_leader)

        self.first_leader.terminate_instance(keep_instances=True)
        logging.info("relaunching agent!")
        self.first_leader.manually_launch_instances([InstanceType.AGENT], [],
                                                    False, False)

        logging.info("waiting for new leader...")
        self.new_leader = None

        count = 0
        while self.new_leader is None:
            for node in self.follower_nodes:
                node.detect_instance_pids_still_alive()
                node.detect_leader()
                if node.is_leader:
                    logging.info("have a new leader: %s", str(node.arguments))
                    self.new_leader = node
                    self.leader = node
                    break
                progress(".")
            time.sleep(1)
            if count > 360:
                # self.progress(False, "Timeout waiting for new leader - crashing!")
                # for node in self.starter_instances:
                #    node.crash_instances()
                raise TimeoutError("Timeout waiting for new leader!")
            count += 1

        print()
        ret = self.new_leader.arangosh.check_test_data(
            "checking active failover new leader node", True)
        if not ret[0]:
            raise Exception("check data failed " + ret[1])

        logging.info("\n" + str(self.new_leader))
        url = "{host}/_db/_system/_admin/aardvark/index.html#replication".format(
            host=self.new_leader.get_frontend().get_local_url(""))
        reply = requests.get(url,
                             auth=HTTPBasicAuth("root", self.leader.passvoid))
        logging.info(str(reply))
        if reply.status_code != 200:
            logging.info(reply.text)
            self.success = False
        self.set_frontend_instances()

        if self.selenium:
            # cfg = self.new_cfg if self.new_cfg else self.cfg
            self.set_selenium_instances()
            self.selenium.test_jam_attempt()

        prompt_user(
            self.basecfg,
            """The leader failover has happened.
please revalidate the UI states on the new leader; you should see *one* follower.""",
        )
        self.first_leader.respawn_instance()
        self.first_leader.detect_instances()
        logging.info("waiting for old leader to show up as follower")
        while not self.first_leader.active_failover_detect_host_now_follower():
            progress(".")
            time.sleep(1)
        print()

        url = self.first_leader.get_frontend().get_local_url("")

        reply = requests.get(url,
                             auth=HTTPBasicAuth("root", self.leader.passvoid))
        logging.info(str(reply))
        logging.info(str(reply.text))

        if reply.status_code != 503:
            self.success = False

        prompt_user(
            self.basecfg,
            "The old leader has been respawned as follower (%s),"
            " so there should be two followers again." %
            self.first_leader.get_frontend().get_public_url("root@"),
        )

        logging.info("state of this test is: %s",
                     "Success" if self.success else "Failed")
        if self.selenium:
            # cfg = self.new_cfg if self.new_cfg else self.cfg
            self.set_selenium_instances()
            self.selenium.test_wait_for_upgrade()
    def run(self):
        """ run the full lifecycle flow of this deployment """
        # pylint: disable=R0915 disable=R0912
        if self.do_starter_test and not self.remote:
            self.detect_file_ulimit()

        lh.section("Runner of type {0}".format(str(self.name)), "<3")

        if self.do_install or self.do_system_test:
            lh.section("INSTALLATION for {0}".format(str(self.name)), )
            self.install(self.old_installer)

        if self.do_starter_test:
            lh.section("PREPARING DEPLOYMENT of {0}".format(str(self.name)), )
            self.starter_prepare_env()
            self.starter_run()
            self.finish_setup()
            self.make_data()
            if self.selenium:
                self.selenium.connect_server(self.get_frontend_instances(),
                                             '_system', self.cfg)
                self.selenium.check_old(self.old_installer.cfg)
            ti.prompt_user(
                self.basecfg,
                "{0}{1} Deployment started. Please test the UI!".format(
                    (self.versionstr), str(self.name)))
            if self.hot_backup:
                lh.section("TESTING HOTBACKUP")
                self.before_backup()
                # TODO generate name?
                self.backup_name = self.create_backup("thy_name_is")
                self.tcp_ping_all_nodes()
                self.create_non_backup_data()
                backups = self.list_backup()
                print(backups)
                self.upload_backup(backups[0])
                self.tcp_ping_all_nodes()
                self.delete_backup(backups[0])
                self.tcp_ping_all_nodes()
                backups = self.list_backup()
                if len(backups) != 0:
                    raise Exception("expected backup to be gone, "
                                    "but its still there: " + str(backups))
                self.download_backup(self.backup_name)
                self.tcp_ping_all_nodes()
                backups = self.list_backup()
                if backups[0] != self.backup_name:
                    raise Exception("downloaded backup has different name? " +
                                    str(backups))
                time.sleep(20)  # TODO fix
                self.restore_backup(backups[0])
                self.tcp_ping_all_nodes()
                self.after_backup()
                self.check_data_impl()
                if not self.check_non_backup_data():
                    raise Exception("data created after backup"
                                    " is still there??")
                self.create_non_backup_data()

        if self.new_installer:
            self.versionstr = "NEW[" + self.new_cfg.version + "] "

            lh.section("UPGRADE OF DEPLOYMENT {0}".format(str(self.name)), )
            if self.cfg.have_debug_package:
                print('removing *old* debug package in advance')
                self.old_installer.un_install_debug_package()

            self.new_installer.upgrade_package(self.old_installer)
            # only install debug package for new package.
            lh.subsection('installing debug package:')
            self.cfg.have_debug_package = self.new_installer.install_debug_package(
            )
            if self.cfg.have_debug_package:
                self.new_installer.gdb_test()
            self.new_installer.stop_service()
            self.cfg.set_directories(self.new_installer.cfg)
            self.new_cfg.set_directories(self.new_installer.cfg)
            self.old_installer.un_install_package_for_upgrade()

            self.upgrade_arangod_version()  #make sure to pass new version
            self.make_data_after_upgrade()
            if self.hot_backup:
                lh.section("TESTING HOTBACKUP AFTER UPGRADE")
                self.before_backup()
                backups = self.list_backup()
                print(backups)
                self.upload_backup(backups[0])
                self.tcp_ping_all_nodes()
                self.delete_backup(backups[0])
                self.tcp_ping_all_nodes()
                backups = self.list_backup()
                if len(backups) != 0:
                    raise Exception("expected backup to be gone, "
                                    "but its still there: " + str(backups))
                self.download_backup(self.backup_name)
                self.tcp_ping_all_nodes()
                backups = self.list_backup()
                if backups[0] != self.backup_name:
                    raise Exception("downloaded backup has different name? " +
                                    str(backups))
                time.sleep(20)  # TODO fix
                self.restore_backup(backups[0])
                self.tcp_ping_all_nodes()
                self.after_backup()
                if not self.check_non_backup_data():
                    raise Exception("data created after "
                                    "backup is still there??")
            self.check_data_impl()
        else:
            logging.info("skipping upgrade step no new version given")

        if self.do_starter_test:
            lh.section("TESTS FOR {0}".format(str(self.name)), )
            self.test_setup()
            self.jam_attempt()
            self.starter_shutdown()
        if self.do_uninstall:
            self.uninstall(self.old_installer
                           if not self.new_installer else self.new_installer)
        self.selenium.disconnect()
        lh.section("Runner of type {0} - Finished!".format(str(self.name)))
Beispiel #10
0
    def jam_attempt_impl(self):
        # pylint: disable=too-many-statements
        # this is simply to slow to be worth wile:
        # collections = self.get_collection_list()
        lh.subsubsection("wait for all shards to be in sync")
        retval = self.starter_instances[0].execute_frontend(
            self.check_collections_in_sync, True)
        if not retval:
            raise Exception("Failed to ensure the cluster is in sync: %s %s" %
                            (retval, str(self.check_collections_in_sync)))
        print("all in sync.")
        agency_leader = self.agency_get_leader()
        terminate_instance = 2
        survive_instance = 1
        if self.starter_instances[terminate_instance].have_this_instance(
                agency_leader):
            print(
                "Cluster instance 2 has the agency leader; killing 1 instead")
            terminate_instance = 1
            survive_instance = 2

        logging.info("stopping instance %d" % terminate_instance)
        uuid = self.starter_instances[terminate_instance].get_dbservers(
        )[0].get_uuid()
        self.starter_instances[terminate_instance].terminate_instance(
            keep_instances=True)
        logging.info("relaunching agent!")
        self.starter_instances[terminate_instance].manually_launch_instances(
            [InstanceType.AGENT], [], False, False)

        self.set_frontend_instances()

        prompt_user(self.basecfg, "instance stopped")
        if self.selenium:
            self.selenium.jam_step_1()

        ret = self.starter_instances[0].arangosh.check_test_data(
            "Cluster one node missing", True, ["--disabledDbserverUUID", uuid])
        if not ret[0]:
            raise Exception("check data failed " + ret[1])

        ret = self.starter_instances[
            survive_instance].arangosh.check_test_data(
                "Cluster one node missing", True,
                ["--disabledDbserverUUID", uuid])
        if not ret[0]:
            raise Exception("check data failed " + ret[1])

        # respawn instance, and get its state fixed
        self.starter_instances[terminate_instance].respawn_instance()
        self.set_frontend_instances()
        counter = 300
        while not self.starter_instances[terminate_instance].is_instance_up():
            if counter <= 0:
                raise Exception("Instance did not respawn in 5 minutes!")
            progress(".")
            time.sleep(1)
            counter -= 1
        print()
        self.starter_instances[terminate_instance].detect_instances()
        self.starter_instances[terminate_instance].detect_instance_pids()
        self.starter_instances[
            terminate_instance].detect_instance_pids_still_alive()
        self.set_frontend_instances()

        logging.info("jamming: Starting instance without jwt")
        moreopts = ["--starter.join", "127.0.0.1:9528"]
        if self.cfg.ssl and not self.cfg.use_auto_certs:
            keyfile = self.cert_dir / Path("nodeX") / "tls.keyfile"
            self.generate_keyfile(keyfile)
            moreopts.append(f"--ssl.keyfile={keyfile}")
        dead_instance = StarterManager(
            self.basecfg,
            Path("CLUSTER"),
            "nodeX",
            mode="cluster",
            jwt_str=None,
            expect_instances=[
                InstanceType.AGENT,
                InstanceType.COORDINATOR,
                InstanceType.DBSERVER,
            ],
            moreopts=moreopts,
        )
        dead_instance.run_starter(expect_to_fail=True)

        i = 0
        while True:
            logging.info(". %d", i)
            if not dead_instance.is_instance_running():
                dead_instance.check_that_starter_log_contains(
                    "Unauthorized. Wrong credentials.")
                break
            if i > 40:
                logging.info("Giving up wating for the starter to exit")
                raise Exception("non-jwt-ed starter won't exit")
            i += 1
            time.sleep(10)
        logging.info(str(dead_instance.instance.wait(timeout=320)))
        logging.info("dead instance is dead?")

        prompt_user(self.basecfg, "cluster should be up")
        if self.selenium:
            self.selenium.jam_step_2()