def train(): device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') if sys.platform == "darwin": binary_path = "./bin/Reacher.app" elif sys.platform == "linux": binary_path = "./bin/Reacher_Linux_NoVis/Reacher.x86_64" else: binary_path = "./bin/Reacher_Windows_x86_64/Reacher.exe" env = UnityEnvironment(file_name=binary_path) n_agent, state_dim, action_dim = get_env_info(env) model = GaussianActorCriticNetwork(state_dim, action_dim, hiddens=[512, 256]) model = model.to(device) agent = PPOAgent(env, model, tmax=128, n_epoch=10, batch_size=128, eps=0.1, device=device) n_step = 2000 n_episodes = 0 for step in range(n_step): agent.step() scores = agent.scores_by_episode if n_episodes < len(scores): n_episodes = len(scores) print(f" episode #{n_episodes} : score = {scores[-1]:.2f}", end="") if 100 <= n_episodes: rewards_ma = np.mean(scores[-100:]) print(f", mean score of last 100 episodes = {rewards_ma:.2f}") if 30. <= rewards_ma: torch.save(model.state_dict(), "bestmodel.pth") with open('rewards.pickle', 'wb') as fp: pickle.dump(scores, fp) print("\n ==== Achieved criteria! Model is saved.") break else: print() sys.stdout.flush() print("Finished.")
def view(): # for different environments please change these parameters if sys.platform == "darwin": binary_path = "./bin/Reacher.app" elif sys.platform == "linux": binary_path = "./bin/Reacher_Linux/Reacher.x86_64" else: binary_path = "./bin/Reacher_Windows_x86_64/Reacher.exe" env = UnityEnvironment(file_name=binary_path) n_agent, state_dim, action_dim = get_env_info(env) model = GaussianActorCriticNetwork(state_dim, action_dim, hiddens=[512, 256]) model = model.to(device) # load best Model state_dict = torch.load("model.pth", map_location=lambda storage, loc: storage) model.load_state_dict(state_dict) # Reset Env brain_name = env.brain_names[0] env_info = env.reset(train_mode=False)[brain_name] states = to_tensor(env_info.vector_observations) n_step = 5000 model.eval() for step in range(n_step): # draw action from model actions, _, _, _ = model(states) # one step forward actions_np = actions.cpu().numpy() env_info = env.step(actions_np)[brain_name] states = to_tensor(env_info.vector_observations)