Python GaussianActorCriticNetwork Examples

Programming Language: Python

Namespace/Package Name: PPO

Examples at hotexamples.com: 2

Python GaussianActorCriticNetwork - 2 examples found. These are the top rated real world Python examples of PPO.GaussianActorCriticNetwork extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

GaussianActorCriticNetwork(2)

to(2)

eval(1)

load_state_dict(1)

state_dict(1)

Example #1

Show file

def train():
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    if sys.platform == "darwin":
        binary_path = "./bin/Reacher.app"
    elif sys.platform == "linux":
        binary_path = "./bin/Reacher_Linux_NoVis/Reacher.x86_64"
    else:
        binary_path = "./bin/Reacher_Windows_x86_64/Reacher.exe"

    env = UnityEnvironment(file_name=binary_path)
    n_agent, state_dim, action_dim = get_env_info(env)
    model = GaussianActorCriticNetwork(state_dim,
                                       action_dim,
                                       hiddens=[512, 256])
    model = model.to(device)
    agent = PPOAgent(env,
                     model,
                     tmax=128,
                     n_epoch=10,
                     batch_size=128,
                     eps=0.1,
                     device=device)

    n_step = 2000
    n_episodes = 0
    for step in range(n_step):
        agent.step()
        scores = agent.scores_by_episode
        if n_episodes < len(scores):
            n_episodes = len(scores)
            print(f" episode #{n_episodes} : score = {scores[-1]:.2f}", end="")
            if 100 <= n_episodes:
                rewards_ma = np.mean(scores[-100:])
                print(f", mean score of last 100 episodes = {rewards_ma:.2f}")
                if 30. <= rewards_ma:
                    torch.save(model.state_dict(), "bestmodel.pth")
                    with open('rewards.pickle', 'wb') as fp:
                        pickle.dump(scores, fp)
                    print("\n ==== Achieved criteria! Model is saved.")
                    break
            else:
                print()

        sys.stdout.flush()

    print("Finished.")

Example #2

Show file

def view():
    # for different environments please change these parameters
    if sys.platform == "darwin":
        binary_path = "./bin/Reacher.app"
    elif sys.platform == "linux":
        binary_path = "./bin/Reacher_Linux/Reacher.x86_64"
    else:
        binary_path = "./bin/Reacher_Windows_x86_64/Reacher.exe"

    env = UnityEnvironment(file_name=binary_path)
    n_agent, state_dim, action_dim = get_env_info(env)
    model = GaussianActorCriticNetwork(state_dim, action_dim, hiddens=[512, 256])
    model = model.to(device)

    # load best Model
    state_dict = torch.load("model.pth",
                            map_location=lambda storage, loc: storage)
    model.load_state_dict(state_dict)

    # Reset Env
    brain_name = env.brain_names[0]
    env_info = env.reset(train_mode=False)[brain_name]
    states = to_tensor(env_info.vector_observations)

    n_step = 5000
    model.eval()
    for step in range(n_step):
        # draw action from model
        actions, _, _, _ = model(states)

        # one step forward
        actions_np = actions.cpu().numpy()
        env_info = env.step(actions_np)[brain_name]
        states = to_tensor(env_info.vector_observations)