import numpy as np from ppo_torch import Agent import os from utils import plot_learning_curve if __name__ == "__main__": env = gym.make("CartPole-v0") N = 20 batch_size = 5 n_epochs = 4 alpha = 0.0003 agent = Agent( n_actions=env.action_space.n, batch_size=batch_size, alpha=alpha, n_epochs=n_epochs, input_dims=env.observation_space.shape, ) n_games = 300 if not os.path.exists("plots"): os.makedirs("plots") figure_file = os.path.join("plots", "cartpole.png") best_score = env.reward_range[0] score_history = [] learn_iters = 0 avg_score = 0
import gym import numpy as np from ppo_torch import Agent from utils import plot_learning_curve from ppo_torch import ActorNetwork from newmaze import Maze if __name__ == '__main__': #env = gym.make('FrozenLake-v0', is_slippery=False ) env = Maze() N = 20 batch_size = 5 n_epochs = 4 alpha = 0.0003 agent = Agent(n_actions=env.action_space.n, batch_size=batch_size, alpha=alpha, n_epochs=n_epochs, input_dims=(1, )) agent.load_models() observation = env.reset() done = False env.render() while not done: action, prob, val = agent.choose_action(observation) observation_, reward, done, info = env.step(action) env.render()
N = 20 batch_size = 5 n_epochs = 4 alpha = 0.0003 best_score = env.reward_range[0] perturbation = p score_book = {} actor_loss_book = {} critic_loss_book = {} total_loss_book = {} for trial in range(n_trials): print('\nTrial:', trial+1) agent = Agent(n_actions=env.action_space.n, batch_size=batch_size, alpha=alpha, n_epochs=n_epochs, input_dims=env.observation_space.shape, fc1_dims=256, fc2_dims=256, chkpt_dir='tmp/ppo') advAgent = adv_Agent(input_dims=env.observation_space.shape, layer1_size=200, layer2_size=200, env=env, n_actions=env.action_space.n, gamma=0.99, max_size=1000000, tau=0.005, ent_alpha=0.5, batch_size=100, reward_scale = 1, chkpt_dir='advModels/SACd/tmp/adv') score_history = [] avg_score_history = [] loss = [] actor_loss = [] critic_loss = [] total_loss = [] learn_iters = 0 avg_score = 0
best_score = env.reward_range[0] layer_1_dim = 128 layer_2_dim = 128 # Final results score_book = {} actor_loss_book = {} critic_loss_book = {} total_loss_book = {} for trial in range(n_trials): print('\nTrial:', trial + 1) agent = Agent(n_actions=env.action_space.n, batch_size=batch_size, alpha=alpha, n_epochs=n_epochs, input_dims=env.observation_space.shape, fc1_dims=layer_1_dim, fc2_dims=layer_2_dim, chkpt_dir=chkpt_dir) # Initialize storage pointers score_history = [] avg_score_history = [] loss = [] actor_loss = [] critic_loss = [] total_loss = [] # Initialize the run learn_iters = 0 avg_score = 0