def test_on_policy_batch_memory(request): spec = spec_util.get('experimental/misc/base.json', 'base_on_policy_batch_memory') spec_util.tick(spec, 'trial') agent, env = make_agent_env(spec) res = (agent.body.memory, ) + request.param return res
def test_prioritized_replay_memory(request): spec = spec_util.get('experimental/misc/base.json', 'base_prioritized_replay_memory') spec_util.tick(spec, 'trial') agent, env = make_agent_env(spec) res = (agent.body.memory, ) + request.param return res
def test_session(test_spec): spec_util.tick(test_spec, 'trial') spec_util.tick(test_spec, 'session') spec_util.save(test_spec, unit='trial') session = Session(test_spec) session_metrics = session.run() assert isinstance(session_metrics, dict)
def run_trial_test(spec_file, spec_name=False): spec = spec_util.get(spec_file, spec_name) spec = spec_util.override_test_spec(spec) spec_util.tick(spec, 'trial') trial = Trial(spec) trial_metrics = trial.run() assert isinstance(trial_metrics, dict)
def test_trial_demo(): spec = spec_util.get('demo.json', 'dqn_cartpole') spec_util.save(spec, unit='experiment') spec = spec_util.override_spec(spec, 'test') spec_util.tick(spec, 'trial') trial_metrics = Trial(spec).run() assert isinstance(trial_metrics, dict)
def test_experiment(): spec = spec_util.get('demo.json', 'dqn_cartpole') spec_util.save(spec, unit='experiment') spec = spec_util.override_spec(spec, 'test') spec_util.tick(spec, 'experiment') experiment_df = Experiment(spec).run() assert isinstance(experiment_df, pd.DataFrame)
def ray_trainable(config, reporter): ''' Create an instance of a trainable function for ray: https://ray.readthedocs.io/en/latest/tune-usage.html#training-api Lab needs a spec and a trial_index to be carried through config, pass them with config in ray.run() like so: config = { 'spec': spec, 'trial_index': tune.sample_from(lambda spec: gen_trial_index()), ... # normal ray config with sample, grid search etc. } ''' import os os.environ.pop('CUDA_VISIBLE_DEVICES', None) # remove CUDA id restriction from ray from slm_lab.experiment.control import Trial # restore data carried from ray.run() config spec = config.pop('spec') spec = inject_config(spec, config) # tick trial_index with proper offset trial_index = config.pop('trial_index') spec['meta']['trial'] = trial_index - 1 spec_util.tick(spec, 'trial') # run SLM Lab trial metrics = Trial(spec).run() metrics.update(config) # carry config for analysis too # ray report to carry data in ray trial.last_result reporter(trial_data={trial_index: metrics})
def run_sessions(self): logger.info('Running sessions') if self.spec['meta']['max_session'] == 1: spec = deepcopy(self.spec) spec_util.tick(spec, 'session') session_metrics_list = [Session(spec).run()] else: session_metrics_list = self.parallelize_sessions() return session_metrics_list
def train(self, specfile, specname): "enjoy + optimizes agent + periodic eval" logger.info( f'Running lab mode:train with specfile:{specfile} specname:{specname}' ) spec = spec_util.get(specfile, specname) # FIXME Why does this need to be in env? os.environ['lab_mode'] = 'train' spec_util.save(spec) # first save the new spec spec_util.tick(spec, 'trial') Trial(spec).run()
def test_demo_performance(): spec = spec_util.get('demo.json', 'dqn_cartpole') spec_util.save(spec, unit='experiment') for env_spec in spec['env']: env_spec['max_frame'] = 2000 spec_util.tick(spec, 'trial') trial = Trial(spec) spec_util.tick(spec, 'session') session = Session(spec) session.run() last_reward = session.agent.body.train_df.iloc[-1]['total_reward'] assert last_reward > 50, f'last_reward is too low: {last_reward}'
def parallelize_sessions(self, global_nets=None): mp_dict = mp.Manager().dict() workers = [] for _s in range(self.spec['meta']['max_session']): spec_util.tick(self.spec, 'session') w = mp.Process(target=mp_run_session, args=(deepcopy(self.spec), global_nets, mp_dict)) w.start() workers.append(w) for w in workers: w.join() session_metrics_list = [mp_dict[idx] for idx in sorted(mp_dict.keys())] return session_metrics_list
def dev(self, specfile, specname): "train + limit the number of trials & sessions. Useful for iterative development." logger.info( f'Running lab mode:dev with specfile:{specfile} specname:{specname}' ) spec = spec_util.get(specfile, specname) # FIXME Why does this need to be in env? os.environ['lab_mode'] = 'dev' spec_util.save(spec) # first save the new spec # spec = spec_util.override_dev_spec(spec) spec['meta']['max_session'] = 1 spec['meta']['max_trial'] = 2 spec_util.tick(spec, 'trial') Trial(spec).run()
def run_spec(spec, lab_mode): '''Run a spec in lab_mode''' os.environ['lab_mode'] = lab_mode # set lab_mode spec = spec_util.override_spec(spec, lab_mode) # conditionally override spec if lab_mode in TRAIN_MODES: spec_util.save(spec) # first save the new spec if lab_mode == 'search': spec_util.tick(spec, 'experiment') Experiment(spec).run() else: spec_util.tick(spec, 'trial') Trial(spec).run() elif lab_mode in EVAL_MODES: Session(spec).run() else: raise ValueError(f'Unrecognizable lab_mode not of {TRAIN_MODES} or {EVAL_MODES}')
def run_trial_test_dist(spec_file, spec_name=False): spec = spec_util.get(spec_file, spec_name) spec = spec_util.override_spec(spec, 'test') spec_util.tick(spec, 'trial') spec['meta']['distributed'] = 'synced' spec['meta']['max_session'] = 2 trial = Trial(spec) # manually run the logic to obtain global nets for testing to ensure global net gets updated global_nets = trial.init_global_nets() # only test first network if ps.is_list(global_nets): # multiagent only test first net = list(global_nets[0].values())[0] else: net = list(global_nets.values())[0] session_metrics_list = trial.parallelize_sessions(global_nets) trial_metrics = analysis.analyze_trial(spec, session_metrics_list) trial.close() assert isinstance(trial_metrics, dict)
def test_spec(): spec = spec_util.get('experimental/misc/base.json', 'base_case_openai') spec_util.tick(spec, 'trial') spec = spec_util.override_spec(spec, 'test') return spec
def test_trial(test_spec): spec_util.tick(test_spec, 'trial') spec_util.save(test_spec, unit='trial') trial = Trial(test_spec) trial_metrics = trial.run() assert isinstance(trial_metrics, dict)