def init_v_agent(state_shape, action_shape, action_max): mu_model = Seq_Network([state_shape, 32, 32, action_shape], nn.ReLU(), nn.Tanh()) p_model = Seq_Network([state_shape, 32, 32, action_shape**2], nn.ReLU()) model = models.centrilized_naf.QModel(mu_model, p_model, action_shape, action_max) return model
def init_v_agent(state_shape, action_shape, action_max, batch_size): mu_model = Seq_Network([state_shape, 16, 16, action_shape], nn.ReLU(), nn.Tanh()) p_model = Seq_Network([state_shape, 16, 16, action_shape ** 2], nn.ReLU()) v_model = Seq_Network([state_shape, 16, 16, 1], nn.ReLU()) noise = OUNoise(1, threshold=action_max, threshold_min=0.001, threshold_decrease=0.001 * action_max) agent = NAFAgent(mu_model, p_model, v_model, noise, state_shape, action_shape, action_max, batch_size, gamma=1) return agent
def init_agent(state_shape, action_shape, u_max, v_max, batch_size): u_model = init_u_agent(state_shape, action_shape, u_max) v_model = init_v_agent(state_shape, action_shape, v_max) v_network = Seq_Network([state_shape, 16, 16, 1], nn.ReLU()) noise = OUNoise(1, threshold=1, threshold_min=0.002, threshold_decrease=0.002) agent = models.centrilized_naf.CentralizedNafAgent(u_model, v_model, v_network, noise, state_shape, action_shape, u_max, v_max, batch_size) return agent
def init_u_agent(state_shape, action_shape, action_max, batch_size): mu_model = Seq_Network([state_shape, 32, 32, action_shape], nn.ReLU(), nn.Tanh()) p_model = Seq_Network([state_shape, 32, 32, action_shape**2], nn.ReLU()) v_model = Seq_Network([state_shape, 32, 32, 1], nn.ReLU()) noise = OUNoise(1, threshold=1, threshold_min=0.001, threshold_decrease=0.003) agent = DoubleNAFAgent(mu_model, p_model, v_model, noise, state_shape, action_shape, action_max, batch_size, gamma=0.999) return agent
def init_agent(state_shape, action_shape, u_max, v_max, batch_size): u_model = init_u_agent(state_shape, action_shape, u_max) v_model = init_v_agent(state_shape, action_shape, v_max) v_network = Seq_Network([state_shape, 50, 50, 1], nn.ReLU()) noise = OUNoise(2, threshold=1, threshold_min=0.002, threshold_decrease=0.003) agent = CentralizedDoubleNafAgent(u_model, v_model, v_network, noise, state_shape, action_shape, u_max, v_max, batch_size) return agent
import torch import torch.nn as nn from models.simple_naf import SimpleNaf from models.unlimited_naf import UnlimitedNAFAgent from problems.nonlinear_problem.nonlinear_problem_env import NonlinearProblem from problems.nonlinear_problem.optimal_agent import OptimalAgent from utilities.noises import OUNoise from utilities.sequentialNetwork import Seq_Network env = NonlinearProblem() state_shape = 2 action_shape = 1 episodes_n = 250 mu_model = Seq_Network([state_shape, 100, 100, 100, action_shape], nn.Sigmoid()) p_model = Seq_Network([state_shape, 100, 100, 100, action_shape**2], nn.Sigmoid()) v_model = Seq_Network([state_shape, 100, 100, 100, 1], nn.Sigmoid()) noise = OUNoise(action_shape, threshold=1, threshold_min=0.001, threshold_decrease=0.004) batch_size = 200 agent = SimpleNaf(mu_model, v_model, noise, state_shape, action_shape, batch_size, 1) def play_and_learn(env): total_reward = 0 state = env.reset()
import torch import torch.nn as nn from models.double_naf import DoubleNAFAgent from utilities.noises import OUNoise from utilities.sequentialNetwork import Seq_Network env_to_wrap = gym.make("LunarLanderContinuous-v2").env env = wrappers.Monitor(env_to_wrap, './videos/' + 'lunarLander' + '/', force=True) state_shape = env.observation_space.shape action_shape = env.action_space.shape action_max = 1 mu_model = Seq_Network([state_shape[0], 50, 50, action_shape[0]], nn.ReLU(), nn.Tanh()) p_model = Seq_Network([state_shape[0], 100, 100, action_shape[0]**2], nn.ReLU()) v_model = Seq_Network([state_shape[0], 50, 50, 1], nn.ReLU()) noise = OUNoise(action_shape, threshold=1, threshold_min=0.001, threshold_decrease=0.000005) batch_size = 200 agent = DoubleNAFAgent(mu_model, p_model, v_model, noise, state_shape, action_shape[0], action_max, batch_size, 0.999) # agent.Q.load_state_dict(torch.load('./result')) def play_and_learn(env, learn=True):
def init_v_model(state_shape): v_model = Seq_Network([state_shape, 25, 25, 1], nn.ReLU()) return v_model