def __init__(self, state_size, action_size, random_seed, fc1_units, fc2_units, weighted=False, individual=False): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action random_seed (int): random seed """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(random_seed) self.epsilon = EPSILON_MAX # Actor Network (w/ Target Network) if weighted: self.actor_local = Weight_adapter(state_size, action_size).to(device) self.actor_target = Weight_adapter(state_size, action_size).to(device) elif individual: self.actor_local = IndividualModel(state_size, action_size, random_seed, fc1_units).to(device) self.actor_target = IndividualModel(state_size, action_size, random_seed, fc1_units).to(device) else: self.actor_local = Actor(state_size, action_size, random_seed, fc1_units, fc2_units).to(device) self.actor_target = Actor(state_size, action_size, random_seed, fc1_units, fc2_units).to(device) self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR) # Critic Network (w/ Target Network) self.critic_local = Critic(state_size, action_size, random_seed).to(device) self.critic_target = Critic(state_size, action_size, random_seed).to(device) self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY) # Noise process self.noise = OUNoise(action_size, random_seed, mu=0, theta=0.15, sigma=0.2) # Replay memory self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, random_seed) # Make sure target is with the same weight as the source self.hard_update(self.actor_target, self.actor_local) self.hard_update(self.critic_target, self.critic_local) self.t_step = 0
model_1 = Actor(state_size=3, action_size=1, seed=0, fc1_units=25).to(device) model_1.load_state_dict(torch.load("./actors/actor_0.43600.pth")) model_1.eval() # model_2 = IndividualModel(state_size=3, action_size=1, seed=0, fc1_units=50).to(device) # model_2.load_state_dict(torch.load("./actors/actor_1.0_2800.pth")) # model_2.eval() def MController(state): action = 0.634 * state[0] - 0.296 * state[1] - 0.153 * state[ 2] + 0.053 * state[0]**2 - 1.215 * state[0]**3 return action Individual = IndividualModel(state_size=3, action_size=1, seed=0, fc1_units=25).to(device) agent = Agent(state_size=3, action_size=2, random_seed=0, fc1_units=None, fc2_units=None, weighted=True) def mkdir(path): folder = os.path.exists(path) if not folder: os.makedirs(path)
# this file is to record the NN controller parameters into a txt file to be used # for Bernstein polynomial approximation by the tool of ReachNN from Model import IndividualModel import torch import numpy as np trained_model = IndividualModel(state_size=2, action_size=1, seed=0, fc1_units=50) trained_model.load_state_dict(torch.load('./models/Individual.pth')) trained_model.eval() bias_list = [] weight_list = [] for name, param in trained_model.named_parameters(): if 'bias' in name: bias_list.append(param.detach().cpu().numpy()) if 'weight' in name: weight_list.append(param.detach().cpu().numpy()) all_param = [] for i in range(len(bias_list)): for j in range(len(bias_list[i])): for k in range(weight_list[i].shape[1]): all_param.append(weight_list[i][j, k]) all_param.append(bias_list[i][j]) np.savetxt('nn_individual_relu_tanh', np.array(all_param)) print('done')
def __len__(self): return len(self.buffer) USE_CUDA = torch.cuda.is_available() Variable = lambda *args, **kwargs: autograd.Variable(*args, **kwargs).cuda() if USE_CUDA else autograd.Variable(*args, **kwargs) batch_size = 128 gamma = 0.99 epsilon_start = 1.0 epsilon_final = 0.01 epsilon_decay = 3000 replay_buffer = ReplayBuffer(int(5e3)) epsilon_by_frame = lambda frame_idx: epsilon_final + (epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model_1 = IndividualModel(state_size=3, action_size=1, seed=0, fc1_units=50).to(device) model_1.load_state_dict(torch.load("./actors/actor_0.3_2400.pth")) model_1.eval() model_2 = IndividualModel(state_size=3, action_size=1, seed=0, fc1_units=50).to(device) model_2.load_state_dict(torch.load("./actors/actor_1.0_2800.pth")) model_2.eval() Individual = IndividualModel(state_size=3, action_size=1, seed=0).to(device) agent = Agent(state_size=3, action_size=2, random_seed=0, fc1_units=None, fc2_units=None, weighted=True) def mkdir(path): folder = os.path.exists(path) if not folder: os.makedirs(path)
action_size=1, seed=0, fc1_units=25, fc2_units=None).to(device) model_1.load_state_dict(torch.load("./models/actor_2800.pth")) model_1.eval() model_2 = Actor(state_size=2, action_size=1, seed=0, fc1_units=25, fc2_units=None).to(device) model_2.load_state_dict(torch.load("./models/actor_2900.pth")) model_2.eval() Individual = IndividualModel(state_size=2, action_size=1, seed=0).to(device) def mkdir(path): folder = os.path.exists(path) if not folder: os.makedirs(path) def update_target(current_model, target_model): target_model.load_state_dict(current_model.state_dict()) class DQN(nn.Module): def __init__(self, num_inputs, num_actions): super(DQN, self).__init__()
import numpy as np import torch from Model import IndividualModel import torch.utils.data as Data from torch.autograd import Variable import torch.nn as nn dataset = np.load('dataset.npy') y = torch.from_numpy(np.reshape(dataset[:, -1], (len(dataset[:, -1]), 1))).float() x = torch.from_numpy(dataset[:, :3]).float() Individual = IndividualModel(state_size=3, action_size=1, seed=0) def train(inputdata, label, net): optimizer = torch.optim.Adam(net.parameters(), weight_decay=1e-5) criterion = torch.nn.MSELoss() BATCH_SIZE = 100 EPOCH = 100 torch_dataset = Data.TensorDataset(inputdata, label) loader = Data.DataLoader( dataset=torch_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, )