) net_config = dict( hidden_units=(256, 256), activation=torch.nn.Tanh ) eg = ExperimentGrid(name=experiment_name) eg.add('env_name', env_config['env_name'], '', False) # eg.add('seed', 0) # eg.add('resume', '/home/c2/src/tmp/spinningup/data/intersection_2_agents_fine_tune_add_left_yield2/intersection_2_agents_fine_tune_add_left_yield2_s0_2020_03-23_22-40.11') # eg.add('reinitialize_optimizer_on_resume', True) # eg.add('num_inputs_to_add', 0) # eg.add('pi_lr', 3e-6) # eg.add('vf_lr', 1e-5) # eg.add('boost_explore', 5) eg.add('epochs', 20000) eg.add('steps_per_epoch', 4000) eg.add('ac_kwargs:hidden_sizes', net_config['hidden_units'], 'hid') eg.add('ac_kwargs:activation', net_config['activation'], '') eg.add('notes', notes, '') eg.add('run_filename', os.path.realpath(__file__), '') eg.add('env_config', env_config, '') def train(): eg.run(ppo_pytorch) if __name__ == '__main__': utils.run(train_fn=train, env_config=env_config, net_config=net_config)
physics_steps_per_observation=12, discrete_actions=COMFORTABLE_ACTIONS, ) net_config = dict(hidden_units=(64, 64), activation=torch.nn.Tanh) eg = ExperimentGrid(name=experiment_name) eg.add('env_name', env_config['env_name'], '', False) pso = env_config['physics_steps_per_observation'] effective_horizon_seconds = 10 eg.add( 'gamma', 1 - pso / (effective_horizon_seconds * FPS) ) # Lower gamma so seconds of effective horizon remains at 10s with current physics steps = 12 * 1/60s * 1 / (1-gamma) eg.add('epochs', 10000) eg.add('steps_per_epoch', 8000) eg.add('ac_kwargs:hidden_sizes', net_config['hidden_units'], 'hid') eg.add('ac_kwargs:activation', net_config['activation'], '') eg.add('notes', notes, '') eg.add('run_filename', os.path.realpath(__file__), '') eg.add('env_config', env_config, '') def train(): eg.run(ppo_pytorch) if __name__ == '__main__': utils.run(train_fn=train, env_config=env_config, net_config=net_config, num_eval_episodes=10)
effective_horizon_seconds = 10 eg.add( 'gamma', 1 - pso / (effective_horizon_seconds * FPS) ) # Lower gamma so seconds of effective horizon remains at 10s with current physics steps = 12 * 1/60s * 1 / (1-gamma) eg.add('epochs', 417) eg.add('steps_per_epoch', 20000) eg.add('ac_kwargs:hidden_sizes', net_config['hidden_units'], 'hid') eg.add('ac_kwargs:activation', net_config['activation'], '') eg.add('notes', notes, '') eg.add('run_filename', os.path.realpath(__file__), '') eg.add('env_config', env_config, '') def train(): eg.run(ppo_pytorch) if __name__ == '__main__': utils.run(train_fn=train, env_config=env_config, net_config=net_config, try_rollouts=20, steps_per_try_rollout=10, num_eval_episodes=10) # | VALUE | REWARD # 2 rollouts 10 steps | 7.71 | 7.78 # 2 rollouts 20 steps | 7.72 | 7.65 # 2 rollouts 30 steps | 7.52 | # 4 rollouts 10 steps | 7.75 | # 20 rollouts 10 steps | | 7.55!