Exemple #1
0
 def __init__(self, cell_nb, lr=4e-3, nb_blocks=5, gamma=0.99):
     self.cell_nb = cell_nb
     self.gamma = gamma
     self.ActorCritic = ActorCritic(lr, cell_nb**2, nb_blocks)
     self.log_probs = None
Exemple #2
0
        env = gym.make(env_id)
        env.seed(seed + rank)
        return env

    set_global_seeds(seed)
    return _init


if __name__ == "__main__":
    envs = SubprocVecEnv([make_env(env_name, i) for i in range(num_envs)])
    env = gym.make(env_name)

    num_inputs = envs.observation_space.shape
    num_outputs = envs.action_space.shape

    model = ActorCritic(num_inputs[0], num_outputs[0]).to(device)
    if os.path.isfile(modelpath):
        model.load_state_dict(torch.load(modelpath))

    ppo = PPO(model=model,
              envs=envs,
              device=device,
              lr=lr,
              modelpath=modelpath)
    if not play_mode:
        ppo.ppo_train(num_steps,
                      mini_batch_size,
                      ppo_epochs,
                      max_frames,
                      max_pol_updates,
                      save_interval,
Exemple #3
0
        return env

    set_global_seeds(seed)
    return _init


if __name__ == "__main__":

    envs = SubprocVecEnv([make_env(env_name, i) for i in range(num_envs)])
    env = gym.make(env_name)

    img_size = envs.observation_space[0].shape
    sensor_size = envs.observation_space[1].shape
    num_outputs = envs.action_space.shape

    model = ActorCritic([img_size[1], img_size[0]], sensor_size[0],
                        num_outputs[0]).to(device)
    if args.onnx_converter and os.path.isfile(modelpath):
        model.load_state_dict(torch.load(modelpath))

        model.export("gvsets_early_fusion.onnx")
        exit(1)

    if os.path.isfile(modelpath):
        model.load_state_dict(torch.load(modelpath))

    ppo = PPO(model=model,
              envs=envs,
              device=device,
              lr=lr,
              modelpath=modelpath,
              tuple_ob=True)