def __init__(self, cell_nb, lr=4e-3, nb_blocks=5, gamma=0.99): self.cell_nb = cell_nb self.gamma = gamma self.ActorCritic = ActorCritic(lr, cell_nb**2, nb_blocks) self.log_probs = None
env = gym.make(env_id) env.seed(seed + rank) return env set_global_seeds(seed) return _init if __name__ == "__main__": envs = SubprocVecEnv([make_env(env_name, i) for i in range(num_envs)]) env = gym.make(env_name) num_inputs = envs.observation_space.shape num_outputs = envs.action_space.shape model = ActorCritic(num_inputs[0], num_outputs[0]).to(device) if os.path.isfile(modelpath): model.load_state_dict(torch.load(modelpath)) ppo = PPO(model=model, envs=envs, device=device, lr=lr, modelpath=modelpath) if not play_mode: ppo.ppo_train(num_steps, mini_batch_size, ppo_epochs, max_frames, max_pol_updates, save_interval,
return env set_global_seeds(seed) return _init if __name__ == "__main__": envs = SubprocVecEnv([make_env(env_name, i) for i in range(num_envs)]) env = gym.make(env_name) img_size = envs.observation_space[0].shape sensor_size = envs.observation_space[1].shape num_outputs = envs.action_space.shape model = ActorCritic([img_size[1], img_size[0]], sensor_size[0], num_outputs[0]).to(device) if args.onnx_converter and os.path.isfile(modelpath): model.load_state_dict(torch.load(modelpath)) model.export("gvsets_early_fusion.onnx") exit(1) if os.path.isfile(modelpath): model.load_state_dict(torch.load(modelpath)) ppo = PPO(model=model, envs=envs, device=device, lr=lr, modelpath=modelpath, tuple_ob=True)