def start(self, port: int = 8080, debug: bool = False) -> None: """ Start the experiment in background. This method will raise exception on failure. If it returns, the experiment should have been successfully started. Parameters ---------- port The port of web UI. debug Whether to start in debug mode. """ atexit.register(self.stop) self.id = management.generate_experiment_id() if self.config.experiment_working_directory is not None: log_dir = Path(self.config.experiment_working_directory, self.id, 'log') else: log_dir = Path.home() / f'nni-experiments/{self.id}/log' nni.runtime.log.start_experiment_log(self.id, log_dir, debug) self._proc, self._pipe = launcher.start_experiment_retiarii( self.id, self.config, port, debug) assert self._proc is not None assert self._pipe is not None self.port = port # port will be None if start up failed # dispatcher must be launched after pipe initialized # the logic to launch dispatcher in background should be refactored into dispatcher api self._dispatcher = self._create_dispatcher() self._dispatcher_thread = Thread(target=self._dispatcher.run) self._dispatcher_thread.start() ips = [self.config.nni_manager_ip] for interfaces in psutil.net_if_addrs().values(): for interface in interfaces: if interface.family == socket.AF_INET: ips.append(interface.address) ips = [f'http://{ip}:{port}' for ip in ips if ip] msg = 'Web UI URLs: ' + colorama.Fore.CYAN + ' '.join( ips) + colorama.Style.RESET_ALL _logger.info(msg) exp_status_checker = Thread(target=self._check_exp_status) exp_status_checker.start() self._start_strategy() # TODO: the experiment should be completed, when strategy exits and there is no running job _logger.info( 'Waiting for experiment to become DONE (you can ctrl+c if there is no running trial jobs)...' ) exp_status_checker.join()
def start(self, port: int = 8080, debug: bool = False) -> None: """ Start the experiment in background. This method will raise exception on failure. If it returns, the experiment should have been successfully started. Parameters ---------- port The port of web UI. debug Whether to start in debug mode. """ atexit.register(self.stop) self.config = self.config.canonical_copy() # we will probably need a execution engine factory to make this clean and elegant if self.config.execution_engine == 'base': from ..execution.base import BaseExecutionEngine engine = BaseExecutionEngine() elif self.config.execution_engine == 'cgo': from ..execution.cgo_engine import CGOExecutionEngine assert self.config.training_service.platform == 'remote', \ "CGO execution engine currently only supports remote training service" assert self.config.batch_waiting_time is not None devices = self._construct_devices() engine = CGOExecutionEngine( devices, max_concurrency=self.config.max_concurrency_cgo, batch_waiting_time=self.config.batch_waiting_time) elif self.config.execution_engine == 'py': from ..execution.python import PurePythonExecutionEngine engine = PurePythonExecutionEngine() elif self.config.execution_engine == 'benchmark': from ..execution.benchmark import BenchmarkExecutionEngine engine = BenchmarkExecutionEngine(self.config.benchmark) set_execution_engine(engine) self.id = management.generate_experiment_id() if self.config.experiment_working_directory is not None: log_dir = Path(self.config.experiment_working_directory, self.id, 'log') else: log_dir = Path.home() / f'nni-experiments/{self.id}/log' nni.runtime.log.start_experiment_log(self.id, log_dir, debug) self._proc, self._pipe = launcher.start_experiment_retiarii( self.id, self.config, port, debug) assert self._proc is not None assert self._pipe is not None self.port = port # port will be None if start up failed # dispatcher must be launched after pipe initialized # the logic to launch dispatcher in background should be refactored into dispatcher api self._dispatcher = self._create_dispatcher() self._dispatcher_thread = Thread(target=self._dispatcher.run) self._dispatcher_thread.start() ips = [self.config.nni_manager_ip] for interfaces in psutil.net_if_addrs().values(): for interface in interfaces: if interface.family == socket.AF_INET: ips.append(interface.address) ips = [f'http://{ip}:{port}' for ip in ips if ip] msg = 'Web UI URLs: ' + colorama.Fore.CYAN + ' '.join( ips) + colorama.Style.RESET_ALL _logger.info(msg) exp_status_checker = Thread(target=self._check_exp_status) exp_status_checker.start() self._start_strategy() # TODO: the experiment should be completed, when strategy exits and there is no running job _logger.info( 'Waiting for experiment to become DONE (you can ctrl+c if there is no running trial jobs)...' ) exp_status_checker.join()