Exemple #1
0
    def __init__(self,
                 state_size,
                 action_size,
                 random_seed,
                 fc1_units,
                 fc2_units,
                 weighted=False,
                 individual=False):
        """Initialize an Agent object.

        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)
        self.epsilon = EPSILON_MAX

        # Actor Network (w/ Target Network)
        if weighted:
            self.actor_local = Weight_adapter(state_size,
                                              action_size).to(device)
            self.actor_target = Weight_adapter(state_size,
                                               action_size).to(device)
        elif individual:
            self.actor_local = IndividualModel(state_size, action_size,
                                               random_seed,
                                               fc1_units).to(device)
            self.actor_target = IndividualModel(state_size, action_size,
                                                random_seed,
                                                fc1_units).to(device)
        else:
            self.actor_local = Actor(state_size, action_size, random_seed,
                                     fc1_units, fc2_units).to(device)
            self.actor_target = Actor(state_size, action_size, random_seed,
                                      fc1_units, fc2_units).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # Critic Network (w/ Target Network)
        self.critic_local = Critic(state_size, action_size,
                                   random_seed).to(device)
        self.critic_target = Critic(state_size, action_size,
                                    random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)

        # Noise process
        self.noise = OUNoise(action_size,
                             random_seed,
                             mu=0,
                             theta=0.15,
                             sigma=0.2)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE,
                                   random_seed)

        # Make sure target is with the same weight as the source
        self.hard_update(self.actor_target, self.actor_local)
        self.hard_update(self.critic_target, self.critic_local)

        self.t_step = 0
Exemple #2
0
model_1 = Actor(state_size=3, action_size=1, seed=0, fc1_units=25).to(device)
model_1.load_state_dict(torch.load("./actors/actor_0.43600.pth"))
model_1.eval()

# model_2 = IndividualModel(state_size=3, action_size=1, seed=0, fc1_units=50).to(device)
# model_2.load_state_dict(torch.load("./actors/actor_1.0_2800.pth"))
# model_2.eval()


def MController(state):
    action = 0.634 * state[0] - 0.296 * state[1] - 0.153 * state[
        2] + 0.053 * state[0]**2 - 1.215 * state[0]**3
    return action


Individual = IndividualModel(state_size=3, action_size=1, seed=0,
                             fc1_units=25).to(device)

agent = Agent(state_size=3,
              action_size=2,
              random_seed=0,
              fc1_units=None,
              fc2_units=None,
              weighted=True)


def mkdir(path):
    folder = os.path.exists(path)
    if not folder:
        os.makedirs(path)

Exemple #3
0
# this file is to record the NN controller parameters into a txt file to be used
# for Bernstein polynomial approximation by the tool of ReachNN
from Model import IndividualModel
import torch
import numpy as np

trained_model = IndividualModel(state_size=2,
                                action_size=1,
                                seed=0,
                                fc1_units=50)
trained_model.load_state_dict(torch.load('./models/Individual.pth'))
trained_model.eval()
bias_list = []
weight_list = []
for name, param in trained_model.named_parameters():
    if 'bias' in name:
        bias_list.append(param.detach().cpu().numpy())

    if 'weight' in name:
        weight_list.append(param.detach().cpu().numpy())

all_param = []

for i in range(len(bias_list)):
    for j in range(len(bias_list[i])):
        for k in range(weight_list[i].shape[1]):
            all_param.append(weight_list[i][j, k])
        all_param.append(bias_list[i][j])

np.savetxt('nn_individual_relu_tanh', np.array(all_param))
print('done')
	def __len__(self):
		return len(self.buffer)

USE_CUDA = torch.cuda.is_available()
Variable = lambda *args, **kwargs: autograd.Variable(*args, **kwargs).cuda() if USE_CUDA else autograd.Variable(*args, **kwargs)
batch_size = 128
gamma = 0.99
epsilon_start = 1.0
epsilon_final = 0.01
epsilon_decay = 3000
replay_buffer = ReplayBuffer(int(5e3))
epsilon_by_frame = lambda frame_idx: epsilon_final + (epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model_1 = IndividualModel(state_size=3, action_size=1, seed=0, fc1_units=50).to(device)
model_1.load_state_dict(torch.load("./actors/actor_0.3_2400.pth"))
model_1.eval()

model_2 = IndividualModel(state_size=3, action_size=1, seed=0, fc1_units=50).to(device)
model_2.load_state_dict(torch.load("./actors/actor_1.0_2800.pth"))
model_2.eval()

Individual = IndividualModel(state_size=3, action_size=1, seed=0).to(device)

agent = Agent(state_size=3, action_size=2, random_seed=0, fc1_units=None, fc2_units=None, weighted=True)

def mkdir(path):
	folder = os.path.exists(path)
	if not folder:
		os.makedirs(path)
                action_size=1,
                seed=0,
                fc1_units=25,
                fc2_units=None).to(device)
model_1.load_state_dict(torch.load("./models/actor_2800.pth"))
model_1.eval()

model_2 = Actor(state_size=2,
                action_size=1,
                seed=0,
                fc1_units=25,
                fc2_units=None).to(device)
model_2.load_state_dict(torch.load("./models/actor_2900.pth"))
model_2.eval()

Individual = IndividualModel(state_size=2, action_size=1, seed=0).to(device)


def mkdir(path):
    folder = os.path.exists(path)
    if not folder:
        os.makedirs(path)


def update_target(current_model, target_model):
    target_model.load_state_dict(current_model.state_dict())


class DQN(nn.Module):
    def __init__(self, num_inputs, num_actions):
        super(DQN, self).__init__()
Exemple #6
0
import numpy as np
import torch
from Model import IndividualModel
import torch.utils.data as Data
from torch.autograd import Variable
import torch.nn as nn

dataset = np.load('dataset.npy')
y = torch.from_numpy(np.reshape(dataset[:, -1],
                                (len(dataset[:, -1]), 1))).float()
x = torch.from_numpy(dataset[:, :3]).float()

Individual = IndividualModel(state_size=3, action_size=1, seed=0)


def train(inputdata, label, net):
    optimizer = torch.optim.Adam(net.parameters(), weight_decay=1e-5)
    criterion = torch.nn.MSELoss()

    BATCH_SIZE = 100
    EPOCH = 100

    torch_dataset = Data.TensorDataset(inputdata, label)

    loader = Data.DataLoader(
        dataset=torch_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=2,
    )