コード例 #1
0
)

net_config = dict(
    hidden_units=(256, 256),
    activation=torch.nn.Tanh
)

eg = ExperimentGrid(name=experiment_name)
eg.add('env_name', env_config['env_name'], '', False)
# eg.add('seed', 0)
# eg.add('resume', '/home/c2/src/tmp/spinningup/data/intersection_2_agents_fine_tune_add_left_yield2/intersection_2_agents_fine_tune_add_left_yield2_s0_2020_03-23_22-40.11')
# eg.add('reinitialize_optimizer_on_resume', True)
# eg.add('num_inputs_to_add', 0)
# eg.add('pi_lr', 3e-6)
# eg.add('vf_lr', 1e-5)
# eg.add('boost_explore', 5)
eg.add('epochs', 20000)
eg.add('steps_per_epoch', 4000)
eg.add('ac_kwargs:hidden_sizes', net_config['hidden_units'], 'hid')
eg.add('ac_kwargs:activation', net_config['activation'], '')
eg.add('notes', notes, '')
eg.add('run_filename', os.path.realpath(__file__), '')
eg.add('env_config', env_config, '')

def train():
    eg.run(ppo_pytorch)


if __name__ == '__main__':
    utils.run(train_fn=train, env_config=env_config, net_config=net_config)
    physics_steps_per_observation=12,
    discrete_actions=COMFORTABLE_ACTIONS,
)

net_config = dict(hidden_units=(64, 64), activation=torch.nn.Tanh)

eg = ExperimentGrid(name=experiment_name)
eg.add('env_name', env_config['env_name'], '', False)
pso = env_config['physics_steps_per_observation']
effective_horizon_seconds = 10
eg.add(
    'gamma', 1 - pso / (effective_horizon_seconds * FPS)
)  # Lower gamma so seconds of effective horizon remains at 10s with current physics steps = 12 * 1/60s * 1 / (1-gamma)
eg.add('epochs', 10000)
eg.add('steps_per_epoch', 8000)
eg.add('ac_kwargs:hidden_sizes', net_config['hidden_units'], 'hid')
eg.add('ac_kwargs:activation', net_config['activation'], '')
eg.add('notes', notes, '')
eg.add('run_filename', os.path.realpath(__file__), '')
eg.add('env_config', env_config, '')


def train():
    eg.run(ppo_pytorch)


if __name__ == '__main__':
    utils.run(train_fn=train,
              env_config=env_config,
              net_config=net_config,
              num_eval_episodes=10)
コード例 #3
0
effective_horizon_seconds = 10
eg.add(
    'gamma', 1 - pso / (effective_horizon_seconds * FPS)
)  # Lower gamma so seconds of effective horizon remains at 10s with current physics steps = 12 * 1/60s * 1 / (1-gamma)
eg.add('epochs', 417)
eg.add('steps_per_epoch', 20000)
eg.add('ac_kwargs:hidden_sizes', net_config['hidden_units'], 'hid')
eg.add('ac_kwargs:activation', net_config['activation'], '')
eg.add('notes', notes, '')
eg.add('run_filename', os.path.realpath(__file__), '')
eg.add('env_config', env_config, '')


def train():
    eg.run(ppo_pytorch)


if __name__ == '__main__':
    utils.run(train_fn=train,
              env_config=env_config,
              net_config=net_config,
              try_rollouts=20,
              steps_per_try_rollout=10,
              num_eval_episodes=10)

#                     | VALUE |  REWARD
#  2 rollouts 10 steps | 7.71  |  7.78
#  2 rollouts 20 steps | 7.72  |  7.65
#  2 rollouts 30 steps | 7.52  |
#  4 rollouts 10 steps | 7.75  |
# 20 rollouts 10 steps |       | 7.55!