Exemple #1
0
    def setUp(self):
        super(ApiTestCaseWithTestReset, self).setUp()

        if config.get('simulator', False):
            # When we're running with the simulator, parts of the simulated
            # Copytools use agent code, and the agent code expects to find
            # a populated agent-side configuration. The safe way to handle
            # this requirement is to use mock to patch in a fresh
            # ConfigStore instance for each test run.
            try:
                from chroma_agent.config_store import ConfigStore
            except ImportError:
                raise ImportError(
                    "Cannot import agent, do you need to do a 'setup.py develop' of it?"
                )

            import mock  # Mock is only available when running the simulator, hence local inclusion
            self.mock_config = ConfigStore(tempfile.mkdtemp())
            mock.patch('chroma_agent.config', self.mock_config).start()
            from chroma_agent.action_plugins.settings_management import reset_agent_config, set_agent_config
            reset_agent_config()
            # Allow the worker to create a fifo in /tmp rather than /var/spool
            set_agent_config('copytool_fifo_directory',
                             self.COPYTOOL_TESTING_FIFO_ROOT)

            try:
                from cluster_sim.simulator import ClusterSimulator
            except ImportError:
                raise ImportError(
                    "Cannot import simulator, do you need to do a 'setup.py develop' of it?"
                )

            # The simulator's state directory will be left behind when a test fails,
            # so make sure it has a unique-per-run name
            timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S")
            state_path = 'simulator_state_%s.%s_%s' % (
                self.__class__.__name__, self._testMethodName, timestamp)
            if os.path.exists(state_path):
                raise RuntimeError(
                    "Simulator state folder already exists at '%s'!" %
                    state_path)

            # Hook up the agent log to a file
            from chroma_agent.agent_daemon import daemon_log
            handler = logging.FileHandler(
                os.path.join(config.get('log_dir', '/var/log/'),
                             'chroma_test_agent.log'))
            handler.setFormatter(
                logging.Formatter('[%(asctime)s] %(message)s',
                                  '%d/%b/%Y:%H:%M:%S'))
            daemon_log.addHandler(handler)
            daemon_log.setLevel(logging.DEBUG)

            self.simulator = ClusterSimulator(
                state_path, config['chroma_managers'][0]['server_http_url'])
            volume_count = max(
                [len(s['device_paths']) for s in self.config_servers])
            self.simulator.setup(len(self.config_servers),
                                 len(self.config_workers),
                                 volume_count,
                                 self.SIMULATOR_NID_COUNT,
                                 self.SIMULATOR_CLUSTER_SIZE,
                                 len(config['power_distribution_units']),
                                 su_size=0)
            self.remote_operations = SimulatorRemoteOperations(
                self, self.simulator)
            if self.TESTS_NEED_POWER_CONTROL:
                self.simulator.power.start()
        else:
            self.remote_operations = RealRemoteOperations(self)

        if self.quick_setup is False:
            # Ensure that all servers are up and available
            for server in self.TEST_SERVERS:
                logger.info(
                    "Checking that %s is running and restarting if necessary..."
                    % server['fqdn'])
                self.remote_operations.await_server_boot(server['fqdn'],
                                                         restart=True)
                logger.info("%s is running" % server['fqdn'])
                self.remote_operations.inject_log_message(
                    server['fqdn'], "==== "
                    "starting test %s "
                    "=====" % self)

            if config.get('reset', True):
                self.reset_cluster()
            elif config.get('soft_reset', True):
                # Reset the manager via the API
                self.wait_until_true(self.api_contactable)
                self.remote_operations.unmount_clients()
                self.api_force_clear()
                self.remote_operations.clear_ha(self.TEST_SERVERS)
                self.remote_operations.clear_lnet_config(self.TEST_SERVERS)

            if config.get('managed'):
                # Ensure that config from previous runs doesn't linger into
                # this one.
                self.remote_operations.remove_config(self.TEST_SERVERS)

                # If there are no configuration options for a given server
                # (e.g. corosync_config), then this is a noop and no config file
                # is written.
                self.remote_operations.write_config(self.TEST_SERVERS)

                # cleanup linux devices
                self.cleanup_linux_devices(self.TEST_SERVERS)

                # cleanup zfs pools
                self.cleanup_zfs_pools(
                    self.config_servers, self.CZP_EXPORTPOOLS |
                    (self.CZP_RECREATEZPOOLS if config.get(
                        'new_zpools_each_test', False) else
                     self.CZP_REMOVEDATASETS), None, True)

            # Enable agent debugging
            self.remote_operations.enable_agent_debug(self.TEST_SERVERS)

        self.wait_until_true(self.supervisor_controlled_processes_running)
        self.initial_supervisor_controlled_process_start_times = self.get_supervisor_controlled_process_start_times(
        )
Exemple #2
0
    def main(self):
        log.addHandler(logging.StreamHandler())

        daemon_log.addHandler(logging.StreamHandler())
        daemon_log.setLevel(logging.DEBUG)
        handler = logging.FileHandler("chroma-agent.log")
        handler.setFormatter(
            logging.Formatter('[%(asctime)s] %(message)s',
                              '%d/%b/%Y:%H:%M:%S'))
        daemon_log.addHandler(handler)

        # Usually on our Intel laptops https_proxy is set, and needs to be unset for tests,
        # but let's not completely rule out the possibility that someone might want to run
        # the tests on a remote system using a proxy.
        if 'https_proxy' in os.environ:
            sys.stderr.write(
                "Warning: Using proxy %s from https_proxy" %
                os.environ['https_proxy'] +
                " environment variable, you probably don't want that\n")

        parser = argparse.ArgumentParser(description="Cluster simulator")
        parser.add_argument('--config',
                            required=False,
                            help="Simulator configuration/state directory",
                            default="cluster_sim")
        parser.add_argument('--url',
                            required=False,
                            help="Manager URL",
                            default="https://localhost:8000/")
        subparsers = parser.add_subparsers()
        setup_parser = subparsers.add_parser("setup")
        setup_parser.add_argument('--su_size',
                                  required=False,
                                  help="Servers per SU",
                                  default='0')
        setup_parser.add_argument('--cluster_size',
                                  required=False,
                                  help="Number of simulated storage servers",
                                  default='4')
        setup_parser.add_argument('--server_count',
                                  required=False,
                                  help="Number of simulated storage servers",
                                  default='8')
        setup_parser.add_argument('--worker_count',
                                  required=False,
                                  help="Number of simulated HSM workers",
                                  default='1')
        setup_parser.add_argument(
            '--nid_count',
            required=False,
            help=
            "Number of LNet NIDs per storage server, defaults to 1 per server",
            default='1')
        setup_parser.add_argument(
            '--volume_count',
            required=False,
            help=
            "Number of simulated storage devices, defaults to twice the number of servers"
        )
        setup_parser.add_argument(
            '--psu_count',
            required=False,
            help=
            "Number of simulated server Power Supply Units, defaults to one per server",
            default='1')
        setup_parser.set_defaults(func=self.setup)

        register_parser = subparsers.add_parser(
            "register",
            help=
            "Provide a secret for registration, or provide API credentials for the simulator to acquire a token itself"
        )
        register_parser.add_argument('--secret',
                                     required=False,
                                     help="Registration token secret")
        register_parser.add_argument('--username',
                                     required=False,
                                     help="API username")
        register_parser.add_argument('--password',
                                     required=False,
                                     help="API password")
        register_parser.add_argument('--create_pdu_entries',
                                     action='store_true',
                                     help="Create PDU entries on the manager")
        register_parser.set_defaults(func=self.register)

        run_parser = subparsers.add_parser("run")
        run_parser.set_defaults(func=self.run)

        args = parser.parse_args()
        simulator = args.func(args)
        if simulator:
            self.simulator = simulator

            rpc_thread = RpcThread(self.simulator)
            rpc_thread.start()

            # Wake up periodically to handle signals, instead of going straight into join
            while not self._stopping.is_set():
                self._stopping.wait(timeout=1)
            log.info("Running indefinitely.")

            self.simulator.join()

            rpc_thread.stop()
            rpc_thread.join()