Example #1
0
def train():
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    if sys.platform == "darwin":
        binary_path = "./bin/Reacher.app"
    elif sys.platform == "linux":
        binary_path = "./bin/Reacher_Linux_NoVis/Reacher.x86_64"
    else:
        binary_path = "./bin/Reacher_Windows_x86_64/Reacher.exe"

    env = UnityEnvironment(file_name=binary_path)
    n_agent, state_dim, action_dim = get_env_info(env)
    model = GaussianActorCriticNetwork(state_dim,
                                       action_dim,
                                       hiddens=[512, 256])
    model = model.to(device)
    agent = PPOAgent(env,
                     model,
                     tmax=128,
                     n_epoch=10,
                     batch_size=128,
                     eps=0.1,
                     device=device)

    n_step = 2000
    n_episodes = 0
    for step in range(n_step):
        agent.step()
        scores = agent.scores_by_episode
        if n_episodes < len(scores):
            n_episodes = len(scores)
            print(f" episode #{n_episodes} : score = {scores[-1]:.2f}", end="")
            if 100 <= n_episodes:
                rewards_ma = np.mean(scores[-100:])
                print(f", mean score of last 100 episodes = {rewards_ma:.2f}")
                if 30. <= rewards_ma:
                    torch.save(model.state_dict(), "bestmodel.pth")
                    with open('rewards.pickle', 'wb') as fp:
                        pickle.dump(scores, fp)
                    print("\n ==== Achieved criteria! Model is saved.")
                    break
            else:
                print()

        sys.stdout.flush()

    print("Finished.")
Example #2
0
def view():
    # for different environments please change these parameters
    if sys.platform == "darwin":
        binary_path = "./bin/Reacher.app"
    elif sys.platform == "linux":
        binary_path = "./bin/Reacher_Linux/Reacher.x86_64"
    else:
        binary_path = "./bin/Reacher_Windows_x86_64/Reacher.exe"

    env = UnityEnvironment(file_name=binary_path)
    n_agent, state_dim, action_dim = get_env_info(env)
    model = GaussianActorCriticNetwork(state_dim, action_dim, hiddens=[512, 256])
    model = model.to(device)

    # load best Model
    state_dict = torch.load("model.pth",
                            map_location=lambda storage, loc: storage)
    model.load_state_dict(state_dict)

    # Reset Env
    brain_name = env.brain_names[0]
    env_info = env.reset(train_mode=False)[brain_name]
    states = to_tensor(env_info.vector_observations)

    n_step = 5000
    model.eval()
    for step in range(n_step):
        # draw action from model
        actions, _, _, _ = model(states)

        # one step forward
        actions_np = actions.cpu().numpy()
        env_info = env.step(actions_np)[brain_name]
        states = to_tensor(env_info.vector_observations)