Ejemplo n.º 1
0
import numpy as np
from ppo_torch import Agent
import os
from utils import plot_learning_curve


if __name__ == "__main__":
    env = gym.make("CartPole-v0")
    N = 20
    batch_size = 5
    n_epochs = 4
    alpha = 0.0003
    agent = Agent(
        n_actions=env.action_space.n,
        batch_size=batch_size,
        alpha=alpha,
        n_epochs=n_epochs,
        input_dims=env.observation_space.shape,
    )
    n_games = 300

    if not os.path.exists("plots"):
      os.makedirs("plots")
    figure_file = os.path.join("plots", "cartpole.png")


    best_score = env.reward_range[0]
    score_history = []

    learn_iters = 0
    avg_score = 0
Ejemplo n.º 2
0
import gym
import numpy as np
from ppo_torch import Agent
from utils import plot_learning_curve
from ppo_torch import ActorNetwork
from newmaze import Maze
if __name__ == '__main__':
    #env = gym.make('FrozenLake-v0', is_slippery=False )
    env = Maze()
    N = 20
    batch_size = 5
    n_epochs = 4
    alpha = 0.0003
    agent = Agent(n_actions=env.action_space.n,
                  batch_size=batch_size,
                  alpha=alpha,
                  n_epochs=n_epochs,
                  input_dims=(1, ))

    agent.load_models()

    observation = env.reset()
    done = False
    env.render()
    while not done:
        action, prob, val = agent.choose_action(observation)
        observation_, reward, done, info = env.step(action)
        env.render()
Ejemplo n.º 3
0
        N = 20
        batch_size = 5
        n_epochs = 4
        alpha = 0.0003
        best_score = env.reward_range[0]
        perturbation = p

        score_book = {}
        actor_loss_book = {}
        critic_loss_book = {}
        total_loss_book = {}

        for trial in range(n_trials):
            print('\nTrial:', trial+1)
            agent = Agent(n_actions=env.action_space.n, batch_size=batch_size, alpha=alpha,
                            n_epochs=n_epochs, input_dims=env.observation_space.shape,
                            fc1_dims=256, fc2_dims=256, chkpt_dir='tmp/ppo')
            
            advAgent = adv_Agent(input_dims=env.observation_space.shape, layer1_size=200, layer2_size=200,
                            env=env, n_actions=env.action_space.n, gamma=0.99, max_size=1000000, tau=0.005, ent_alpha=0.5, batch_size=100,
                            reward_scale = 1, chkpt_dir='advModels/SACd/tmp/adv')
            
            score_history = []
            avg_score_history = []
            loss = []
            actor_loss = []
            critic_loss = []
            total_loss = []

            learn_iters = 0
            avg_score = 0
Ejemplo n.º 4
0
    best_score = env.reward_range[0]
    layer_1_dim = 128
    layer_2_dim = 128

    # Final results
    score_book = {}
    actor_loss_book = {}
    critic_loss_book = {}
    total_loss_book = {}

    for trial in range(n_trials):
        print('\nTrial:', trial + 1)
        agent = Agent(n_actions=env.action_space.n,
                      batch_size=batch_size,
                      alpha=alpha,
                      n_epochs=n_epochs,
                      input_dims=env.observation_space.shape,
                      fc1_dims=layer_1_dim,
                      fc2_dims=layer_2_dim,
                      chkpt_dir=chkpt_dir)

        # Initialize storage pointers
        score_history = []
        avg_score_history = []
        loss = []
        actor_loss = []
        critic_loss = []
        total_loss = []

        # Initialize the run
        learn_iters = 0
        avg_score = 0