Esempio n. 1
0
def init_v_agent(state_shape, action_shape, action_max):
    mu_model = Seq_Network([state_shape, 32, 32, action_shape], nn.ReLU(),
                           nn.Tanh())
    p_model = Seq_Network([state_shape, 32, 32, action_shape**2], nn.ReLU())
    model = models.centrilized_naf.QModel(mu_model, p_model, action_shape,
                                          action_max)
    return model
def init_v_agent(state_shape, action_shape, action_max, batch_size):
    mu_model = Seq_Network([state_shape, 16, 16, action_shape], nn.ReLU(), nn.Tanh())
    p_model = Seq_Network([state_shape, 16, 16, action_shape ** 2], nn.ReLU())
    v_model = Seq_Network([state_shape, 16, 16, 1], nn.ReLU())
    noise = OUNoise(1, threshold=action_max, threshold_min=0.001, threshold_decrease=0.001 * action_max)
    agent = NAFAgent(mu_model, p_model, v_model, noise, state_shape, action_shape, action_max, batch_size, gamma=1)
    return agent
Esempio n. 3
0
def init_agent(state_shape, action_shape, u_max, v_max, batch_size):
    u_model = init_u_agent(state_shape, action_shape, u_max)
    v_model = init_v_agent(state_shape, action_shape, v_max)
    v_network = Seq_Network([state_shape, 16, 16, 1], nn.ReLU())
    noise = OUNoise(1, threshold=1, threshold_min=0.002, threshold_decrease=0.002)
    agent = models.centrilized_naf.CentralizedNafAgent(u_model, v_model, v_network, noise, state_shape, action_shape,
                                                       u_max, v_max,
                                                       batch_size)
    return agent
Esempio n. 4
0
def init_u_agent(state_shape, action_shape, action_max, batch_size):
    mu_model = Seq_Network([state_shape, 32, 32, action_shape], nn.ReLU(),
                           nn.Tanh())
    p_model = Seq_Network([state_shape, 32, 32, action_shape**2], nn.ReLU())
    v_model = Seq_Network([state_shape, 32, 32, 1], nn.ReLU())
    noise = OUNoise(1,
                    threshold=1,
                    threshold_min=0.001,
                    threshold_decrease=0.003)
    agent = DoubleNAFAgent(mu_model,
                           p_model,
                           v_model,
                           noise,
                           state_shape,
                           action_shape,
                           action_max,
                           batch_size,
                           gamma=0.999)
    return agent
Esempio n. 5
0
def init_agent(state_shape, action_shape, u_max, v_max, batch_size):
    u_model = init_u_agent(state_shape, action_shape, u_max)
    v_model = init_v_agent(state_shape, action_shape, v_max)
    v_network = Seq_Network([state_shape, 50, 50, 1], nn.ReLU())
    noise = OUNoise(2,
                    threshold=1,
                    threshold_min=0.002,
                    threshold_decrease=0.003)
    agent = CentralizedDoubleNafAgent(u_model, v_model, v_network, noise,
                                      state_shape, action_shape, u_max, v_max,
                                      batch_size)
    return agent
import torch
import torch.nn as nn

from models.simple_naf import SimpleNaf
from models.unlimited_naf import UnlimitedNAFAgent
from problems.nonlinear_problem.nonlinear_problem_env import NonlinearProblem
from problems.nonlinear_problem.optimal_agent import OptimalAgent
from utilities.noises import OUNoise
from utilities.sequentialNetwork import Seq_Network

env = NonlinearProblem()
state_shape = 2
action_shape = 1
episodes_n = 250

mu_model = Seq_Network([state_shape, 100, 100, 100, action_shape],
                       nn.Sigmoid())
p_model = Seq_Network([state_shape, 100, 100, 100, action_shape**2],
                      nn.Sigmoid())
v_model = Seq_Network([state_shape, 100, 100, 100, 1], nn.Sigmoid())
noise = OUNoise(action_shape,
                threshold=1,
                threshold_min=0.001,
                threshold_decrease=0.004)
batch_size = 200
agent = SimpleNaf(mu_model, v_model, noise, state_shape, action_shape,
                  batch_size, 1)


def play_and_learn(env):
    total_reward = 0
    state = env.reset()
Esempio n. 7
0
import torch
import torch.nn as nn

from models.double_naf import DoubleNAFAgent
from utilities.noises import OUNoise
from utilities.sequentialNetwork import Seq_Network

env_to_wrap = gym.make("LunarLanderContinuous-v2").env
env = wrappers.Monitor(env_to_wrap,
                       './videos/' + 'lunarLander' + '/',
                       force=True)
state_shape = env.observation_space.shape
action_shape = env.action_space.shape
action_max = 1

mu_model = Seq_Network([state_shape[0], 50, 50, action_shape[0]], nn.ReLU(),
                       nn.Tanh())
p_model = Seq_Network([state_shape[0], 100, 100, action_shape[0]**2],
                      nn.ReLU())
v_model = Seq_Network([state_shape[0], 50, 50, 1], nn.ReLU())
noise = OUNoise(action_shape,
                threshold=1,
                threshold_min=0.001,
                threshold_decrease=0.000005)
batch_size = 200
agent = DoubleNAFAgent(mu_model, p_model, v_model, noise, state_shape,
                       action_shape[0], action_max, batch_size, 0.999)

# agent.Q.load_state_dict(torch.load('./result'))


def play_and_learn(env, learn=True):
def init_v_model(state_shape):
    v_model = Seq_Network([state_shape, 25, 25, 1], nn.ReLU())
    return v_model