def test_load_remote_batch(): remote_batch_file = os.path.join(HERE, "remote_batch.json") batch_config_dict = load_json(remote_batch_file) batch_app = BatchApplication.load(batch_config_dict, "/tmp/test_remote_batch") print(batch_app) batch = batch_app.batch assert batch_app.batch.name == "remote-batch-example" assert batch_app.batch.data_dir_path.as_posix() == f"/tmp/test_remote_batch" assert len(batch.jobs) == 2 job1: ShellJob = batch.jobs[0] assert job1.name == "job1" assert job1.params == { "learning_rate": 0.1} assert job1.command == "sleep 3;echo \"finished\"" assert job1.data_dir == "/tmp/test_remote_batch/job1" assert job1.working_dir == job1.data_dir executor_manager = batch_app.job_scheduler.executor_manager assert isinstance(executor_manager, RemoteSSHExecutorManager) assert len(executor_manager.machines) == 2 machine: SSHRemoteMachine = executor_manager.machines[0] assert machine.connection == {"hostname": "host1", "username": "******", "password": "******"} assert machine.environments == {"JAVA_HOME": "/usr/local/jdk"} job_scheduler = batch_app.job_scheduler assert job_scheduler.interval == 5000 assert not job_scheduler.exit_on_finish assert batch_app.server_host == "localhost" assert batch_app.server_port == 8061
def setup_class(cls): super(TestLocaBatch, cls).setup_class() batch = create_local_batch() app = BatchApplication(batch, server_port=8082, scheduler_exit_on_finish=True, scheduler_interval=1000) cls.app = app
def create_app(batches_data_dir, port): batch = batch_factory.create_local_batch( batches_data_dir=batches_data_dir) app = BatchApplication(batch, server_port=port, scheduler_exit_on_finish=True, backend_conf={'type': 'local'}, scheduler_interval=1000) return app
def setup_class(cls): super(TestMinimumLocalBatch, cls).setup_class() batch = create_minimum_batch() app = BatchApplication(batch, server_port=8061, scheduler_exit_on_finish=True, scheduler_interval=1000, scheduler_callbacks=[ConsoleCallback()]) cls.app = app
def _load_batch_data_dir(batches_data_dir: str, batch_name) -> BatchApplication: batch_data_dir_path = Path(batches_data_dir) / batch_name spec_file_path = batch_data_dir_path / Batch.FILE_CONFIG if not spec_file_path.exists(): raise RuntimeError(f"batch {batch_name} not exists") batch_spec_dict = load_json(spec_file_path) return BatchApplication.load(batch_spec_dict, batch_data_dir_path)
def create_batch_app(batches_data_dir): batch = create_minimum_batch(batches_data_dir=batches_data_dir) app = BatchApplication(batch, server_port=8086, scheduler_exit_on_finish=True, scheduler_interval=1000) return app
def test_batch_to_config(): server_port = 8061 scheduler_interval = 1 # 1. create a batch batch = create_minimum_batch() app = BatchApplication(batch, server_port=server_port, scheduler_exit_on_finish=True, scheduler_interval=scheduler_interval) # 2. to_config batch_config_dict = app.to_config() # 3. asset config content # 3.1. check jobs jobs_config = batch_config_dict['jobs'] assert len(jobs_config) == 1 job_config = jobs_config[0] assert job_config['name'] == 'job1' assert job_config['params']["learning_rate"] == 0.1 assert job_config['command'] == 'pwd' assert job_config['data_dir'] assert job_config['working_dir'] # 3.2 TODO check backend # backend_config = batch_config_dict['backend'] # assert backend_config['type'] == 'local' # 3.3 check server config server_config = batch_config_dict['server'] assert server_config['host'] == 'localhost' assert server_config['port'] == server_port # 3.4 check scheduler scheduler_config = batch_config_dict['scheduler'] assert scheduler_config['exit_on_finish'] is True assert scheduler_config['interval'] == 1 # 3.4. check version assert batch_config_dict['version']
def setup_class(cls): super(TestStopScheduler, cls).setup_class() batch = create_minimum_batch() app = BatchApplication(batch, server_port=8086, scheduler_exit_on_finish=False, scheduler_interval=1000) runner = BatchRunner(app) runner.start() cls.runner = runner time.sleep(2) # wait for starting
def setup_class(cls): super(TestRemoteBatch, cls).setup_class() batch = create_remote_batch() backend_conf = create_remote_backend_conf() app = BatchApplication(batch, server_port=8088, backend_conf=backend_conf, scheduler_exit_on_finish=True, scheduler_interval=1000) cls.app = app
def test_run_batch(self): app1 = self.app1 # run the bach base on previous batch app2 = BatchApplication.load(app1.to_config(), batch_data_dir=app1.batch.data_dir_path) # app2 = create_batch_app(batches_data_dir) app2.start() app2._http_server.stop() scheduler2 = app2.job_scheduler # all ran jobs should not run again assert scheduler2.n_allocated == 0 assert scheduler2.n_skipped == len(app1.batch.jobs)
def setup_class(cls): super(TestLocalHostEnv, cls).setup_class() batch = batch_factory.create_assert_env_batch() app = BatchApplication( batch, server_port=8088, scheduler_exit_on_finish=True, backend_type='local', backend_conf=dict( environments={ "hyn_test_conda_home": "/home/hyperctl/miniconda3" }), scheduler_interval=1000) cls.app = app
def setup_class(cls): super(TestKillLocalJob, cls).setup_class() batch = create_minimum_batch(command='sleep 6; echo "finished"') server_port = 8063 cls.server_port = server_port app = BatchApplication(batch, server_port=server_port, scheduler_exit_on_finish=True, scheduler_interval=1000) runner = BatchRunner(app) cls.app = app cls.runner = runner runner.start()
def setup_class(cls): super(TestRunRemoteWithAssets, cls).setup_class() # clear ioloop asyncio.set_event_loop(asyncio.new_event_loop()) # create a batch with assets batch = batch_factory.create_assets_batch(cls.data_dir) backend_conf = create_remote_backend_conf() app = BatchApplication(batch, server_port=8089, backend_type='remote', backend_conf=backend_conf, scheduler_exit_on_finish=True, scheduler_interval=1000) cls.app = app
def run_batch_config(config_dict, batches_data_dir): # add batch name batch_name = config_dict.get('name') if batch_name is None: batch_name = common_util.generate_short_id() logger.debug(f"generated batch name {batch_name}") # add job name jobs_dict = config_dict['jobs'] for job_dict in jobs_dict: if job_dict.get('name') is None: job_name = common_util.generate_short_id() logger.debug(f"generated job name {job_name}") job_dict['name'] = job_name app = BatchApplication.load(config_dict, batches_data_dir) app.start()
def setup_class(cls): super(TestRemoteHostEnv, cls).setup_class() backend_conf = { "type": 'remote', "machines": [{ 'connection': load_ssh_psw_config(), 'environments': { "hyn_test_conda_home": "/home/hyperctl/miniconda3" } }] } batch = batch_factory.create_assert_env_batch() app = BatchApplication(batch, server_port=8089, scheduler_exit_on_finish=True, backend_conf=backend_conf, scheduler_interval=1000) cls.app = app
def test_load_local_batch_config(): # 1. load batch from config job2_data_dir = "/tmp/hyperctl-batch-data/job2" local_batch = { "name": "local_batch_test", "jobs": [ { "name": "job1", "params": { "learning_rate": 0.1 }, "command": "pwd" }, { "name": "job2", "params": { "learning_rate": 0.2 }, "command": "sleep 3", "working_dir": job2_data_dir, "data_dir": job2_data_dir, } ], "backend": { "type": "local", "conf": {} }, "scheduler": { "exit_on_finish": False, "interval": 1 }, "server": { "host": "local_machine", "port": 18060 } } batch_working_dir = tempfile.mkdtemp(prefix="hyperctl-test-batches") batch_app = BatchApplication.load(local_batch, batch_working_dir) # 2. assert batch assert batch_app.batch.name == "local_batch_test" jobs = batch_app.batch.jobs assert len(jobs) == 2 job1: ShellJob = jobs[0] assert isinstance(job1, ShellJob) assert job1.name == "job1" assert job1.params['learning_rate'] == 0.1 assert job1.command == "pwd" assert job1.data_dir == (Path(batch_working_dir) / "job1").absolute().as_posix() assert job1.working_dir == (Path(batch_working_dir) / "job1").absolute().as_posix() job2: ShellJob = jobs[1] assert isinstance(job2, ShellJob) assert job2.name == "job2" assert job2.params['learning_rate'] == 0.2 assert job2.command == "sleep 3" assert job2.data_dir == job2_data_dir assert job2.working_dir == job2_data_dir # check backend assert isinstance(batch_app.job_scheduler.executor_manager, LocalExecutorManager) # check server assert batch_app.server_host == 'local_machine' assert batch_app.server_port == 18060 # check scheduler assert batch_app.job_scheduler.exit_on_finish is False assert batch_app.job_scheduler.interval == 1