def get_spec(spec_file, spec_name, lab_mode, pre_): '''Get spec using args processed from inputs''' if lab_mode in TRAIN_MODES: if pre_ is None: # new train trial spec = spec_util.get(spec_file, spec_name) else: # for resuming with train@{predir} # e.g. train@latest (fill find the latest predir) # e.g. train@data/reinforce_cartpole_2020_04_13_232521 predir = pre_ if predir == 'latest': predir = sorted(glob(f'data/{spec_name}*/'))[ -1] # get the latest predir with spec_name _, _, _, _, experiment_ts = util.prepath_split( predir) # get experiment_ts to resume train spec logger.info(f'Resolved to train@{predir}') spec = spec_util.get(spec_file, spec_name, experiment_ts) elif lab_mode == 'enjoy' or lab_mode == 'record': # for enjoy@{session_spec_file} # e.g. enjoy@data/reinforce_cartpole_2020_04_13_232521/reinforce_cartpole_t0_s0_spec.json session_spec_file = pre_ assert session_spec_file is not None, 'enjoy/record mode must specify a `enjoy/record@{session_spec_file}`' spec = util.read(f'{session_spec_file}') else: raise ValueError( f'Unrecognizable lab_mode not of {TRAIN_MODES} or {EVAL_MODES}') return spec
def test_trial_demo(): spec = spec_util.get('demo.json', 'dqn_cartpole') spec_util.save(spec, unit='experiment') spec = spec_util.override_spec(spec, 'test') spec_util.tick(spec, 'trial') trial_metrics = Trial(spec).run() assert isinstance(trial_metrics, dict)
def test_experiment(): spec = spec_util.get('demo.json', 'dqn_cartpole') spec_util.save(spec, unit='experiment') spec = spec_util.override_spec(spec, 'test') spec_util.tick(spec, 'experiment') experiment_df = Experiment(spec).run() assert isinstance(experiment_df, pd.DataFrame)
def run_by_mode(spec_file, spec_name, run_mode): spec = spec_util.get(spec_file, spec_name) # TODO remove when analysis can save all plotly plots os.environ['run_mode'] = run_mode if run_mode == 'search': Experiment(spec).run() elif run_mode == 'train': Trial(spec).run() elif run_mode == 'enjoy': # TODO turn on save/load model mode # Session(spec).run() pass elif run_mode == 'generate_benchmark': benchmarker.generate_specs(spec, const='agent') elif run_mode == 'benchmark': # TODO allow changing const to env run_benchmark(spec, const='agent') elif run_mode == 'dev': os.environ['PY_ENV'] = 'test' # to not save in viz spec = util.override_dev_spec(spec) Trial(spec).run() else: logger.warn( 'run_mode not recognized; must be one of `search, train, enjoy, benchmark, dev`.' )
def run_trial_test(spec_file, spec_name=False): spec = spec_util.get(spec_file, spec_name) spec = spec_util.override_test_spec(spec) spec_util.tick(spec, 'trial') trial = Trial(spec) trial_metrics = trial.run() assert isinstance(trial_metrics, dict)
def run_by_mode(spec_file, spec_name, lab_mode): logger.info(f'Running lab in mode: {lab_mode}') spec = spec_util.get(spec_file, spec_name) info_space = InfoSpace() analysis.save_spec(spec, info_space, unit='experiment') # '@' is reserved for 'enjoy@{prepath}' os.environ['lab_mode'] = lab_mode.split('@')[0] os.environ['PREPATH'] = util.get_prepath(spec, info_space) reload(logger) # to set PREPATH properly if lab_mode == 'search': info_space.tick('experiment') Experiment(spec, info_space).run() elif lab_mode.startswith('train'): if '@' in lab_mode: prepath = lab_mode.split('@')[1] spec, info_space = util.prepath_to_spec_info_space(prepath) else: info_space.tick('trial') Trial(spec, info_space).run() elif lab_mode.startswith('enjoy'): prepath = lab_mode.split('@')[1] spec, info_space = util.prepath_to_spec_info_space(prepath) Session(spec, info_space).run() elif lab_mode.startswith('enjoy'): prepath = lab_mode.split('@')[1] spec, info_space = util.prepath_to_spec_info_space(prepath) Session(spec, info_space).run() elif lab_mode == 'dev': spec = util.override_dev_spec(spec) info_space.tick('trial') Trial(spec, info_space).run() else: logger.warn('lab_mode not recognized; must be one of `search, train, enjoy, benchmark, dev`.')
def test_experiment(test_info_space): spec = spec_util.get('demo.json', 'dqn_cartpole') analysis.save_spec(spec, test_info_space, unit='experiment') spec = spec_util.override_test_spec(spec) test_info_space.tick('experiment') experiment_data = Experiment(spec, test_info_space).run() assert isinstance(experiment_data, pd.DataFrame)
def test_on_policy_batch_memory(request): spec = spec_util.get('experimental/misc/base.json', 'base_on_policy_batch_memory') spec_util.tick(spec, 'trial') agent, env = make_agent_env(spec) res = (agent.body.memory, ) + request.param return res
def test_prioritized_replay_memory(request): spec = spec_util.get('experimental/misc/base.json', 'base_prioritized_replay_memory') spec_util.tick(spec, 'trial') agent, env = make_agent_env(spec) res = (agent.body.memory, ) + request.param return res
def run_by_mode(spec_file, spec_name, lab_mode): logger.info(f'Running lab in mode: {lab_mode}') spec = spec_util.get(spec_file, spec_name) info_space = InfoSpace() os.environ['PREPATH'] = util.get_prepath(spec, info_space) reload(logger) # to set PREPATH properly # expose to runtime, '@' is reserved for 'enjoy@{prepath}' os.environ['lab_mode'] = lab_mode.split('@')[0] if lab_mode == 'search': info_space.tick('experiment') Experiment(spec, info_space).run() elif lab_mode == 'train': info_space.tick('trial') Trial(spec, info_space).run() elif lab_mode.startswith('enjoy'): prepath = lab_mode.split('@')[1] spec, info_space = util.prepath_to_spec_info_space(prepath) Session(spec, info_space).run() elif lab_mode == 'generate_benchmark': benchmarker.generate_specs(spec, const='agent') elif lab_mode == 'benchmark': # TODO allow changing const to env run_benchmark(spec, const='agent') elif lab_mode == 'dev': spec = util.override_dev_spec(spec) info_space.tick('trial') Trial(spec, info_space).run() else: logger.warn( 'lab_mode not recognized; must be one of `search, train, enjoy, benchmark, dev`.' )
def test_trial_demo(test_info_space): spec = spec_util.get('demo.json', 'dqn_cartpole') spec = spec_util.override_test_spec(spec) spec['env'][0]['save_frequency'] = 1 test_info_space.tick('trial') trial_data = Trial(spec, test_info_space).run() assert isinstance(trial_data, pd.DataFrame)
def test_vanilla_dqn(): algo_name = 'unit_test_vanilla_dqn' spec = spec_util.get('test.json', 'unit_test_dqn') spec['name'] = algo_name spec['agent'][0]['algorithm']['name'] = "VanillaDQN" spec['meta']['max_episode'] = 25 assert generic_algo_test(spec, algo_name) > 100
def test_trial_demo(test_info_space): spec = spec_util.get('demo.json', 'dqn_cartpole') analysis.save_spec(spec, test_info_space, unit='experiment') spec = spec_util.override_test_spec(spec) spec['meta']['eval_frequency'] = 1 test_info_space.tick('trial') trial_data = Trial(spec, test_info_space).run() assert isinstance(trial_data, pd.DataFrame)
def run_trial_test(spec_file, spec_name=False): spec = spec_util.get(spec_file, spec_name) spec = util.override_test_spec(spec) info_space = InfoSpace() info_space.tick('trial') trial = Trial(spec, info_space) trial_data = trial.run() assert isinstance(trial_data, pd.DataFrame)
def test_sarsa_recurrent(): algo_name = 'unit_test_sarsa_recurrent' spec = spec_util.get('test.json', 'unit_test_sarsa') spec['agent'][0]['memory']['name'] = "OnPolicyNStepBatchReplay" spec['agent'][0]['memory']['length_history'] = 4 spec['agent'][0]['net']['type'] = "RecurrentNet" spec['agent'][0]['net']['hid_layers'] = [64] assert generic_algo_test(spec, algo_name) > 100
def test_actor_critic_recurrent_episodic_shared(): algo_name = 'unit_test_actor_critic_recurrent_episodic_shared' spec = spec_util.get('test.json', 'unit_test_actor_critic') spec['name'] = algo_name spec['agent'][0]['memory']['name'] = "OnPolicyNStepReplay" spec['agent'][0]['memory']['length_history'] = 4 spec['agent'][0]['net']['type'] = "Recurrentshared" spec['agent'][0]['net']['hid_layers'] = [16] assert generic_algo_test(spec, algo_name) > 100
def search(self, specfile, specname): "runs train mode multiple times across the parameterized specs" logger.info( f'Running lab mode:search with specfile:{specfile} specname:{specname}' ) spec = spec_util.get(specfile, specname) # assert 'spec_params' in spec param_specs = spec_util.get_param_specs(spec) search.run_param_specs(param_specs)
def main(): # logger.set_level('DEBUG') spec = spec_util.get('dqn.json', 'dqn_cartpole') # spec = spec_util.get('dqn.json', 'dqn_cartpole_cartpole') # spec = spec_util.get('dqn.json', 'dqn_cartpole_multitask') # spec = spec_util.get('dqn.json', 'dqn_cartpole_cartpole_cartpole') # spec = spec_util.get('dqn.json', 'dqn_acrobot_cartpole') # spec = spec_util.get('dqn.json', 'dqn_2dball_cartpole') Trial(spec).run()
def train(self, specfile, specname): "enjoy + optimizes agent + periodic eval" logger.info( f'Running lab mode:train with specfile:{specfile} specname:{specname}' ) spec = spec_util.get(specfile, specname) # FIXME Why does this need to be in env? os.environ['lab_mode'] = 'train' spec_util.save(spec) # first save the new spec spec_util.tick(spec, 'trial') Trial(spec).run()
def test_prioritized_replay_memory(request): memspec = spec_util.get('base.json', 'base_prioritized_replay_memory') memspec = util.override_test_spec(memspec) aeb_mem_space = AEBSpace(memspec, InfoSpace()) env_space = EnvSpace(memspec, aeb_mem_space) aeb_mem_space.init_body_space() agent_space = AgentSpace(memspec, aeb_mem_space) agent = agent_space.agents[0] body = agent.nanflat_body_a[0] res = (body.memory, ) + request.param return res
def test_prioritized_replay_memory(request): memspec = spec_util.get('base.json', 'base_prioritized_replay_memory') memspec = util.override_test_spec(memspec) aeb_mem_space = AEBSpace(memspec, InfoSpace()) env_space = EnvSpace(memspec, aeb_mem_space) agent_space = AgentSpace(memspec, aeb_mem_space) aeb_mem_space.init_body_space() aeb_mem_space.post_body_init() agent = agent_space.agents[0] body = agent.nanflat_body_a[0] res = (body.memory, ) + request.param return res
def run_trial_test(spec_file, spec_name=False, distributed=False): spec = spec_util.get(spec_file, spec_name) spec = util.override_test_spec(spec) info_space = InfoSpace() info_space.tick('trial') if distributed: spec['meta']['distributed'] = True if os.environ.get('CI') != 'true': # CI has not enough CPU spec['meta']['max_session'] = 2 trial = Trial(spec, info_space) trial_data = trial.run() assert isinstance(trial_data, pd.DataFrame)
def test_demo_performance(): spec = spec_util.get('demo.json', 'dqn_cartpole') spec_util.save(spec, unit='experiment') for env_spec in spec['env']: env_spec['max_frame'] = 2000 spec_util.tick(spec, 'trial') trial = Trial(spec) spec_util.tick(spec, 'session') session = Session(spec) session.run() last_reward = session.agent.body.train_df.iloc[-1]['total_reward'] assert last_reward > 50, f'last_reward is too low: {last_reward}'
def read_spec_and_run(spec_file, spec_name, lab_mode): '''Read a spec and run it in lab mode''' logger.info(f'Running lab spec_file:{spec_file} spec_name:{spec_name} in mode:{lab_mode}') if lab_mode in TRAIN_MODES: spec = spec_util.get(spec_file, spec_name) else: # eval mode lab_mode, prename = lab_mode.split('@') spec = spec_util.get_eval_spec(spec_file, prename) if 'spec_params' not in spec: run_spec(spec, lab_mode) else: # spec is parametrized; run them in parallel using ray param_specs = spec_util.get_param_specs(spec) search.run_param_specs(param_specs)
def dev(self, specfile, specname): "train + limit the number of trials & sessions. Useful for iterative development." logger.info( f'Running lab mode:dev with specfile:{specfile} specname:{specname}' ) spec = spec_util.get(specfile, specname) # FIXME Why does this need to be in env? os.environ['lab_mode'] = 'dev' spec_util.save(spec) # first save the new spec # spec = spec_util.override_dev_spec(spec) spec['meta']['max_session'] = 1 spec['meta']['max_trial'] = 2 spec_util.tick(spec, 'trial') Trial(spec).run()
def run_new_mode(spec_file, spec_name, lab_mode): '''Run to generate new data with `search, train, dev`''' spec = spec_util.get(spec_file, spec_name) info_space = InfoSpace() analysis.save_spec(spec, info_space, unit='experiment') # first save the new spec if lab_mode == 'search': info_space.tick('experiment') Experiment(spec, info_space).run() elif lab_mode.startswith('train'): info_space.tick('trial') Trial(spec, info_space).run() elif lab_mode == 'dev': spec = spec_util.override_dev_spec(spec) info_space.tick('trial') Trial(spec, info_space).run() else: raise ValueError(f'Unrecognizable lab_mode not of {TRAIN_MODES}')
def run_trial_test_dist(spec_file, spec_name=False): spec = spec_util.get(spec_file, spec_name) spec = spec_util.override_spec(spec, 'test') spec_util.tick(spec, 'trial') spec['meta']['distributed'] = 'synced' spec['meta']['max_session'] = 2 trial = Trial(spec) # manually run the logic to obtain global nets for testing to ensure global net gets updated global_nets = trial.init_global_nets() # only test first network if ps.is_list(global_nets): # multiagent only test first net = list(global_nets[0].values())[0] else: net = list(global_nets.values())[0] session_metrics_list = trial.parallelize_sessions(global_nets) trial_metrics = analysis.analyze_trial(spec, session_metrics_list) trial.close() assert isinstance(trial_metrics, dict)
def run_trial_test_dist(spec_file, spec_name=False): spec = spec_util.get(spec_file, spec_name) spec = spec_util.override_test_spec(spec) info_space = InfoSpace() info_space.tick('trial') spec['meta']['distributed'] = True spec['meta']['max_session'] = 2 trial = Trial(spec, info_space) # manually run the logic to obtain global nets for testing to ensure global net gets updated global_nets = trial.init_global_nets() # only test first network if ps.is_list(global_nets): # multiagent only test first net = list(global_nets[0].values())[0] else: net = list(global_nets.values())[0] session_datas = trial.parallelize_sessions(global_nets) trial.session_data_dict = {data.index[0]: data for data in session_datas} trial_data = analysis.analyze_trial(trial) trial.close() assert isinstance(trial_data, pd.DataFrame)
def run_by_mode(spec_file, spec_name, run_mode): spec = spec_util.get(spec_file, spec_name) if run_mode == 'search': Experiment(spec).run() elif run_mode == 'train': Trial(spec).run() elif run_mode == 'enjoy': # TODO turn on save/load model mode # Session(spec).run() pass elif run_mode == 'benchmark': # TODO need to spread benchmark over spec on Experiment pass elif run_mode == 'dev': os.environ['PY_ENV'] = 'test' # to not save in viz logger.set_level('DEBUG') spec = util.override_dev_spec(spec) Trial(spec).run() else: logger.warn( 'run_mode not recognized; must be one of `search, train, enjoy, benchmark, dev`.' )
def test_spec(): global spec spec = spec_util.get('base.json', 'base_case') spec['meta']['train_mode'] = True return spec
def test_check(): spec = spec_util.get('base.json', 'base_case') assert spec_util.check(spec)
def test_spec(): spec = spec_util.get('experimental/misc/base.json', 'base_case_openai') spec = spec_util.override_test_spec(spec) return spec
def test_get(): spec = spec_util.get('base.json', 'base_case') assert spec is not None
def test_resolve_aeb(spec_name, aeb_list): spec = spec_util.get('base.json', spec_name) resolved_aeb_list = spec_util.resolve_aeb(spec) assert resolved_aeb_list == aeb_list
def test_spec(): global spec spec = spec_util.get('base.json', 'base_case') spec = util.override_test_spec(spec) return spec
def run_trial_test(spec_file, spec_name): spec = spec_util.get(spec_file, spec_name) spec = util.override_test_spec(spec) trial = Trial(spec) trial_data = trial.run() assert isinstance(trial_data, pd.DataFrame)
def test_trial_demo(): spec = spec_util.get('demo.json', 'dqn_cartpole') spec = util.override_test_spec(spec) trial_data = Trial(spec).run() assert isinstance(trial_data, pd.DataFrame)