Beispiel #1
0
    def __init__(self,
                 vm_name,
                 output_path,
                 exp_config: DictConfig,
                 host_config: DictConfig,
                 application: application.Application,
                 benchmark: cloudexp.guest.application.benchmark.Benchmark,
                 guest_mom_config: Union[DictConfig, dict, None] = None,
                 shared_terminable: Terminable = None,
                 shared_deferred_start: DeferredStartThread = None,
                 **props):
        libvirt_uri = host_config.get('main', 'libvirt-hypervisor-uri')
        username = exp_config.get('guest-credentials', 'username')
        password = exp_config.get('guest-credentials', 'password')
        GuestMachine.__init__(self,
                              vm_name,
                              libvirt_uri,
                              username,
                              password,
                              application.get_image_name(),
                              shared_terminable=shared_terminable,
                              **props)

        self.output_path = output_path
        self.exp_config = exp_config
        self.host_conf = host_config
        self.guest_config = DictConfig(momguestd.DEFAULT_CONFIG,
                                       guest_mom_config)

        self.application = application
        self.benchmark = benchmark

        self.load_func = props.get('load_func', None)
        self.load_interval = props.get('load_interval', None)
        self.expected_load_rounds = props.get("expected_load_rounds", None)

        self.benchmark.set_guest_machine(self)
        self.application.set_guest_server_port(
            self.host_conf.get("guest-client", "port"))
        self.guest_client = load_guest_client(self.ip,
                                              self.vm_name,
                                              self.host_conf,
                                              base_name='exp-machine-client')
        self.benchmark_thread = BenchmarkExecutor(
            self.benchmark,
            self.load_func,
            self.load_interval,
            self.expected_load_rounds,
            name=self.vm_name,
            shared_terminable=shared_terminable,
            shared_deferred_start=shared_deferred_start)

        self.threads = {}
        self.ssh_connections = {}
        self._is_ready_for_experiment = Event()
class MomHostDaemon(Terminable):
    def __init__(self,
                 config: Union[DictConfig, dict, None] = None,
                 shared_terminable: Terminable = None):
        Terminable.__init__(self, shared_terminable=shared_terminable)
        self.logger = logging.getLogger(self.__class__.__name__)
        self.config = DictConfig(DEFAULT_CONFIG, config)

        # Set up a shared libvirt connection
        libvirt_uri = self.config.get('main', 'libvirt-hypervisor-uri')
        self.libvirt_iface = LibvirtInterface(libvirt_uri)

        self.guest_manager = GuestManager(self.config,
                                          libvirt_iface=self.libvirt_iface,
                                          shared_terminable=shared_terminable)
        self.host_monitor = HostMonitor(self.config,
                                        libvirt_iface=self.libvirt_iface,
                                        shared_terminable=shared_terminable)
        self.host_policy = HostPolicy(self.config,
                                      self.libvirt_iface,
                                      self.host_monitor,
                                      self.guest_manager,
                                      shared_terminable=shared_terminable)

        self.threads = {
            self.guest_manager, self.host_policy, self.host_monitor
        }

    def run_mom(self):
        # Start threads
        self.logger.info("Starting")

        for t in self.threads:
            if not t.is_alive() and self.should_run:
                t.start()

        interval = self.config.get('main', 'check-loop-interval')
        while self.should_run:
            self.terminable_sleep(interval)
            if not threads_ok(self.threads):
                if self.should_run:
                    self.logger.warning(
                        "One of the threads ended before it should. Terminating."
                    )
                break

        self.terminate()

        for t in self.threads:
            if t.is_alive():
                t.join()

        self.logger.info("Daemon ending")
class MomGuestDaemon(LoggedObject):
    def __init__(self,
                 config: Union[DictConfig, dict, None] = None,
                 guest_name=None):
        LoggedObject.__init__(self, guest_name)
        self.config = DictConfig(DEFAULT_CONFIG, config)

        self.monitor = Monitor(
            self.config, monitor_name=guest_name
        )  # for collecting data, not running as a thread
        self.policy = ClassImporter(guest_policy).get_class(
            self.config.get('policy', 'policy'))(self.config)
        self.server = GuestServer(self.config,
                                  self.monitor,
                                  self.policy,
                                  guest_name=guest_name)

    def start(self):
        self.logger.info("Starting guest server, listening on %s:%s",
                         self.server.host, self.server.port)
        self.server.serve_forever()
        self.server.shutdown()
        self.logger.info("Ended")

    def terminate(self):
        self.server.shutdown()
    def __init__(self,
                 config: DictConfig,
                 libvirt_iface,
                 host_monitor: Monitor,
                 guest_manager: GuestManager,
                 shared_terminable: Terminable = None):
        LoggedThread.__init__(self, daemon=True)
        Terminable.__init__(self, shared_terminable=shared_terminable)

        self.policy_lock = threading.RLock()
        self.host_monitor = host_monitor
        self.guest_manager = guest_manager
        self.config = config

        self.interval: float = max(self.config.get('policy', 'interval'), 1.)
        self.grace_period: float = min(
            self.config.get('policy', 'grace-period'), self.interval)
        self.inquiry_timeout: float = min(
            self.config.get('policy', 'inquiry-timeout'), self.interval)

        self.resources = [
            r.strip() for r in self.config.get('policy', 'resources')
        ]
        self.policy = ClassImporter(allocators).get_class(
            config.get('policy', 'allocator'))(self.resources)
        self.controllers = []

        self.properties = {
            'libvirt_iface': libvirt_iface,
            'host_monitor': host_monitor,
            'guest_manager': guest_manager,
            'config': config
        }

        self.policy_data_loggers = {}
        self.client_executor = None

        self.get_controllers()
Beispiel #5
0
class ExpMachine(GuestMachine):
    def __init__(self,
                 vm_name,
                 output_path,
                 exp_config: DictConfig,
                 host_config: DictConfig,
                 application: application.Application,
                 benchmark: cloudexp.guest.application.benchmark.Benchmark,
                 guest_mom_config: Union[DictConfig, dict, None] = None,
                 shared_terminable: Terminable = None,
                 shared_deferred_start: DeferredStartThread = None,
                 **props):
        libvirt_uri = host_config.get('main', 'libvirt-hypervisor-uri')
        username = exp_config.get('guest-credentials', 'username')
        password = exp_config.get('guest-credentials', 'password')
        GuestMachine.__init__(self,
                              vm_name,
                              libvirt_uri,
                              username,
                              password,
                              application.get_image_name(),
                              shared_terminable=shared_terminable,
                              **props)

        self.output_path = output_path
        self.exp_config = exp_config
        self.host_conf = host_config
        self.guest_config = DictConfig(momguestd.DEFAULT_CONFIG,
                                       guest_mom_config)

        self.application = application
        self.benchmark = benchmark

        self.load_func = props.get('load_func', None)
        self.load_interval = props.get('load_interval', None)
        self.expected_load_rounds = props.get("expected_load_rounds", None)

        self.benchmark.set_guest_machine(self)
        self.application.set_guest_server_port(
            self.host_conf.get("guest-client", "port"))
        self.guest_client = load_guest_client(self.ip,
                                              self.vm_name,
                                              self.host_conf,
                                              base_name='exp-machine-client')
        self.benchmark_thread = BenchmarkExecutor(
            self.benchmark,
            self.load_func,
            self.load_interval,
            self.expected_load_rounds,
            name=self.vm_name,
            shared_terminable=shared_terminable,
            shared_deferred_start=shared_deferred_start)

        self.threads = {}
        self.ssh_connections = {}
        self._is_ready_for_experiment = Event()

    def as_dict(self):
        return dict(
            GuestMachine.as_dict(self),
            load_interval=self.load_interval,
            bm_cpus=self.get_benchmark_cpus(),
            guest_config=self.guest_config.get_dict(),
        )

    def get_benchmark_cpu_count(self):
        return self.benchmark.get_required_cpus()

    def set_benchmark_cpus(self, bm_cpus):
        self.benchmark.set_benchmark_cpus(bm_cpus)

    def get_benchmark_cpus(self):
        return self.benchmark.get_benchmark_cpus()

    def init_experiment(self):
        self.logger.info("Initiating experiment on VM...")

        try:
            self.start_domain(restart_if_activated=False)
            self.wait_for_ssh_server()
            self.set_guest_host_name()
            self.set_vm_props()
            self.log_vm_sysctl_properties()
            self.install_framework()
            self.install_application()
            self.disable_cron()
            self.drop_caches()

            self.start_guest_server()
            self.guest_client.wait_for_server(shared_terminable=self)

            self.start_application()
            self.benchmark.wait_for_application()

            # Start benchmark thread
            self.benchmark_thread.start()
        except Exception as e:
            self.logger.exception("Failed to initiate experiment: %s", e)
            raise e
        else:
            self._is_ready_for_experiment.set()

    @property
    def is_ready_for_experiment(self):
        return self._is_ready_for_experiment.is_set()

    def end_experiment(self):
        self.terminate()
        self.benchmark_thread.terminate()
        self.close_client()
        self._is_ready_for_experiment.clear()

        self.logger.info("Waiting for remote guest server to end...")
        self.join_guest_server(20)
        self.logger.info("Waiting for remote program to end...")
        self.join_application(20)

    def close_client(self):
        if self.guest_client is not None:
            self.guest_client.close()

    def disable_cron(self):
        self.logger.info("Disabling Cron...")
        ssh_cron = self.ssh("service", "cron", "stop", name="disable-cron")

        ssh_cron.communicate()  # short blocking

        if not ssh_cron.err:
            self.logger.info("Cron disabled: %s",
                             ssh_cron.out.replace("\n", " ").strip())
        else:
            self.logger.error("Fail to disable cron: out=%s, err=%s",
                              ssh_cron.out, ssh_cron.err)

    def drop_caches(self):
        ssh_sync = self.ssh("sync", name="sync-cache")
        ssh_sync.communicate()  # short blocking

        if ssh_sync.err:
            self.logger.error("Fail to sync cache to secondary memory: %s",
                              ssh_sync.err)

        ssh_drop = self.ssh("echo 3 > /proc/sys/vm/drop_caches",
                            name="drop-cache")
        if ssh_drop.err:
            self.logger.error("Fail to drop caches: %s", ssh_drop.err)

        self.logger.info(
            "Dropped caches.%s%s",
            (" [sync: %s]" % ssh_sync.out) if ssh_sync.out else "",
            (" [drop: %s]" % ssh_drop.out) if ssh_drop.out else "")

    def start_background_thread(self,
                                target,
                                name_prefix=None,
                                args=(),
                                kwargs=None):
        if not self.should_run:
            return

        name = f"{name_prefix}-{self.vm_name}"

        self.logger.info("Starting background thread: %s", name)
        try:
            t = LoggedThread(target=target,
                             name=name,
                             args=args,
                             kwargs=kwargs,
                             daemon=True)
            self.threads[name] = t
            t.start()
        except Exception as e:
            self.logger.exception("Failed to initiated thread '%s': %s", name,
                                  e)

    def _remote_function_call_thread(self, name, remote_function: Callable,
                                     *args, **kwargs):
        output_file = os.path.join(self.output_path,
                                   f'{name}-{self.vm_name}.log')
        conn = self.remote_function_call(remote_function,
                                         *args,
                                         output_file=output_file,
                                         **kwargs)
        self.ssh_connections[name] = conn
        out, err = conn.communicate()  # blocking
        if self.should_run:
            self.logger.error(
                "%s crashed before its time - [out]: %s, [err]: %s", name, out,
                err)

    def start_remote_function_call_thread(self, name,
                                          remote_function: Callable, *args,
                                          **kwargs):
        self.start_background_thread(self._remote_function_call_thread,
                                     name,
                                     args=(name, remote_function, *args),
                                     kwargs=kwargs)

    def start_application(self):
        self.start_remote_function_call_thread(
            "application", remote_application, self.application,
            self.guest_config.get('logging', 'verbosity'))

    def start_guest_server(self):
        self.start_remote_function_call_thread("guest-server",
                                               remote_guest_server,
                                               self.guest_config.get(
                                                   'logging', 'verbosity'),
                                               self.guest_config,
                                               guest_name=self.vm_name)

    def terminate_ssh(self, ssh_name, timeout):
        ssh_thread = self.ssh_connections.get(ssh_name, None)
        if ssh_thread is None:
            return
        if ssh_thread.is_alive():
            ssh_thread.terminate()
            ssh_thread.join(timeout)

    def join_application(self, timeout=None):
        self.terminate_ssh('application', timeout)

    def join_guest_server(self, timeout=None):
        self.terminate_ssh('guest-server', timeout)

    def install_framework(self):
        self.logger.info("Copy and install Python's code...")

        exclude = '*.pyc', '*.pyo', '__pycache__', '.*'
        self.rsync_retry(cloudexp.repository_path,
                         '~',
                         name='framework',
                         exclude=exclude,
                         delete=True)

        args = ['guest_setup.py', 'develop', '--user']
        out, err = self.ssh(*args,
                            cwd=cloudexp.repository_name,
                            is_python=True,
                            is_module=False,
                            name="install-packages").communicate()
        if err:
            raise GuestMachineException(
                f"Could not install framework:\nOUT: {out}\nERR: {err}")

    def install_application(self):
        bin_path = self.application.get_bin_path()
        if bin_path is None:
            self.logger.info(
                "Application does not require to install binaries...")
            return
        self.logger.info("Copy and install application's binaries...")
        if type(bin_path) not in (list, tuple):
            assert isinstance(bin_path, str), "Binaries path must be a string."
            bin_path = (bin_path, )

        for p in bin_path:
            self.rsync_retry(p, '~', name='app-binaries', delete=True)