def __init__(self, inputWidth, num_hidden, num_action): super(ActorConvNet, self).__init__() self.inputShape = (inputWidth, inputWidth) self.layer1 = nn.Sequential( # input shape (1, inputWdith, inputWdith) nn.Conv2d( 1, # input channel 32, # output channel kernel_size=2, # filter size stride=1, padding=1), # if want same width and length of this image after Conv2d, padding=(kernel_size-1)/2 if stride=1 nn.BatchNorm2d(32), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2)) # inputWdith / 2 self.layer2 = nn.Sequential( nn.Conv2d(32, 64, kernel_size=2, stride=1, padding=1), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2)) # inputWdith / 2 # add a fully connected layer # width = int(inputWidth / 4) + 1 self.fc0 = nn.Linear(2, 128) self.fc1 = nn.Linear(self.featureSize() + 128, num_hidden) self.fc2 = nn.Linear(num_hidden, num_action) self.apply(xavier_init) self.noise = OUNoise(num_action, seed=1, mu=0.0, theta=0.15, max_sigma=0.3, min_sigma=0.05, decay_period=10000) self.noise.reset()
def __init__(self, input_size, hidden_size, output_size): super(Policy, self).__init__() self.output_size = output_size self.linear1 = nn.Linear(input_size, hidden_size) self.linear2 = nn.Linear(hidden_size, hidden_size) self.V = nn.Linear(hidden_size, 1) self.mu = nn.Linear(hidden_size, output_size) self.L = nn.Linear(hidden_size, output_size**2) self.tril_mask = torch.tril(torch.ones(output_size, output_size), diagonal=-1).unsqueeze(0) self.diag_mask = torch.diag( torch.diag(torch.ones(output_size, output_size))).unsqueeze(0) self.noise = OUNoise(output_size, seed=1, mu=0.0, theta=0.15, max_sigma=0.3, min_sigma=0.05, decay_period=10000) self.noise.reset() self.apply(xavier_init)
class Actor(nn.Module): def __init__(self, input_size, hidden_size, output_size, learning_rate=3e-4): super(Actor, self).__init__() self.linear1 = nn.Linear(input_size, hidden_size) self.linear2 = nn.Linear(hidden_size, hidden_size) self.linear3_1 = nn.Linear(hidden_size, 1) self.linear3_2 = nn.Linear(hidden_size, 1) self.apply(xavier_init) self.noise = OUNoise(output_size, seed = 1, mu=0.0, theta=0.15, max_sigma=0.3, min_sigma=0.05, decay_period=10000) self.noise.reset() def forward(self, state): """ Param state is a torch tensor """ x = F.relu(self.linear1(state)) x = F.relu(self.linear2(x)) action0 = torch.sigmoid(self.linear3_1(x)) action1 = torch.tanh(self.linear3_2(x)) action = torch.cat([action0, action1], dim=1) return action def select_action(self, state, noiseFlag = False): if noiseFlag: action = self.forward(state) action += torch.tensor(self.noise.get_noise(), dtype=torch.float32, device=config['device']).unsqueeze(0) return self.forward(state)
def __init__(self, input_size, hidden_size, output_size, learning_rate=3e-4): super(Actor, self).__init__() self.linear1 = nn.Linear(input_size, hidden_size) self.linear2 = nn.Linear(hidden_size, hidden_size) self.linear3 = nn.Linear(hidden_size, output_size) self.apply(xavier_init) self.noise = OUNoise(output_size, seed = 1, mu=0.0, theta=0.15, max_sigma=0.3, min_sigma=0.05, decay_period=100000) self.noise.reset()
class ActorConvNet(nn.Module): def __init__(self, inputWidth, num_hidden, num_action): super(ActorConvNet, self).__init__() self.inputShape = (inputWidth, inputWidth, inputWidth) self.layer1 = nn.Sequential( # input shape (1, inputWdith, inputWdith) nn.Conv3d(1, # input channel 32, # output channel kernel_size=2, # filter size stride=1, padding=1), # if want same width and length of this image after Conv2d, padding=(kernel_size-1)/2 if stride=1 nn.BatchNorm3d(32), nn.ReLU(), nn.MaxPool3d(kernel_size=2, stride=2)) # inputWdith / 2 self.layer2 = nn.Sequential( nn.Conv3d(32, 64, kernel_size=2, stride=1, padding=1), nn.BatchNorm3d(64), nn.ReLU(), nn.MaxPool3d(kernel_size=2, stride=2)) # inputWdith / 2 # add a fully connected layer # width = int(inputWidth / 4) + 1 # 6 dim for state self.fc0 = nn.Linear(6, 256) self.fc1 = nn.Linear(self.featureSize() + 256, num_hidden) self.fc2 = nn.Linear(num_hidden, num_action) self.apply(xavier_init) self.noise = OUNoise(num_action, seed=1, mu=0.0, theta=0.15, max_sigma=0.5, min_sigma=0.1, decay_period=1000000) self.noise.reset() def forward(self, state): x = state['sensor'] y = state['target'] xout = self.layer1(x) xout = self.layer2(xout) xout = xout.reshape(xout.size(0), -1) # mask xout for test #xout.fill_(0) yout = F.relu(self.fc0(y)) out = torch.cat((xout, yout), 1) out = F.relu(self.fc1(out)) out = self.fc2(out) out = torch.tanh(out) return out def featureSize(self): return self.layer2(self.layer1(torch.zeros(1, 1, *self.inputShape))).view(1, -1).size(1) def select_action(self, state, noiseFlag = False): if noiseFlag: action = self.forward(state) action += torch.tensor(self.noise.get_noise(), dtype=torch.float32, device=config['device']).unsqueeze(0) action = torch.clamp(action, -1, 1) return action return self.forward(state)
class DDPGAgent: def __init__(self, in_actor, out_actor, in_critic, lr_actor=1.0e-2, lr_critic=1.0e-2): super(DDPGAgent, self).__init__() self.actor = ActorNormLayers(in_actor, out_actor).to(device) self.target_actor = ActorNormLayers(in_actor, out_actor).to(device) self.critic = Critic(in_critic).to(device) self.target_critic = Critic(in_critic).to(device) self.noise = OUNoise(out_actor, scale=1.0) # initialize targets same as original networks hard_update(self.target_actor, self.actor) hard_update(self.target_critic, self.critic) self.actor_optimizer = Adam(list(self.actor.parameters()), lr=lr_actor) self.critic_optimizer = Adam(list(self.critic.parameters()), lr=lr_critic, weight_decay=1.e-5) def reset(self): self.noise.reset() def act(self, obs, noise=None): obs = obs.to(device) with torch.no_grad(): mode = self.actor.training self.actor.eval() if noise is not None: e = noise * self.noise.noise().to(device) else: e = 0.0 action = self.actor(obs) + e self.actor.train(mode) return torch.clamp(action, -1., 1.) def target_act(self, obs): obs = obs.to(device) with torch.no_grad(): mode = self.target_actor.training self.target_actor.eval() action = self.target_actor(obs) self.target_actor.train(mode) # As it was before return torch.clamp(action, -1., 1.)
def __init__(self, input_size, hidden_size, output_size, config): super(Actor, self).__init__() self.linear1 = nn.Linear(input_size, hidden_size) self.linear2 = nn.Linear(hidden_size, hidden_size) self.linear3 = nn.Linear(hidden_size, output_size) self.apply(xavier_init) self.noise = OUNoise(output_size, seed=1, mu=0.0, theta=0.1, max_sigma=0.5, min_sigma=0.1, decay_period=100000) self.noise.reset() self.config = config self.stepCount = 0
class Actor(nn.Module): def __init__(self, input_size, hidden_size, output_size, learning_rate=3e-4): super(Actor, self).__init__() self.linear1 = nn.Linear(input_size, hidden_size) self.linear2 = nn.Linear(hidden_size, hidden_size) self.linear3 = nn.Linear(hidden_size, output_size) self.apply(xavier_init) self.noise = OUNoise(output_size, seed=1, mu=0.0, theta=0.5, max_sigma=0.05, min_sigma=0.001, decay_period=10000) self.noise.reset() def forward(self, state): """ Param state is a torch tensor """ x = F.relu(self.linear1(state)) x = F.relu(self.linear2(x)) action = torch.sigmoid(self.linear3(x)) return action def select_action(self, state, noiseFlag=False): #noiseFlag = False if noiseFlag: action = self.forward(state) noise = self.noise.get_noise() action += torch.tensor(noise, dtype=torch.float32).unsqueeze(0) action = torch.clamp(action, 0, 1) return action else: return self.forward(state)
class Actor(nn.Module): def __init__(self, input_size, hidden_size, output_size, config): super(Actor, self).__init__() self.linear1 = nn.Linear(input_size, hidden_size) self.linear2 = nn.Linear(hidden_size, hidden_size) self.linear3 = nn.Linear(hidden_size, output_size) self.apply(xavier_init) self.noise = OUNoise(output_size, seed=1, mu=0.0, theta=0.1, max_sigma=0.5, min_sigma=0.1, decay_period=100000) self.noise.reset() self.config = config self.stepCount = 0 def forward(self, state): """ Param state is a torch tensor """ x = F.relu(self.linear1(state)) x = F.relu(self.linear2(x)) action = torch.tanh(self.linear3(x)) return action def select_action(self, state, noiseFlag=False): if noiseFlag: action = self.forward(state) action += torch.tensor(self.noise.get_noise(), dtype=torch.float32, device=config['device']).unsqueeze(0) action = torch.clamp(action, -1, 1) return action return self.forward(state)
def __init__(self, in_actor, out_actor, in_critic, lr_actor=1.0e-2, lr_critic=1.0e-2): super(DDPGAgent, self).__init__() self.actor = ActorNormLayers(in_actor, out_actor).to(device) self.target_actor = ActorNormLayers(in_actor, out_actor).to(device) self.critic = Critic(in_critic).to(device) self.target_critic = Critic(in_critic).to(device) self.noise = OUNoise(out_actor, scale=1.0) # initialize targets same as original networks hard_update(self.target_actor, self.actor) hard_update(self.target_critic, self.critic) self.actor_optimizer = Adam(list(self.actor.parameters()), lr=lr_actor) self.critic_optimizer = Adam(list(self.critic.parameters()), lr=lr_critic, weight_decay=1.e-5)
class Policy(nn.Module): def __init__(self, input_size, hidden_size, output_size): super(Policy, self).__init__() self.output_size = output_size self.linear1 = nn.Linear(input_size, hidden_size) self.linear2 = nn.Linear(hidden_size, hidden_size) self.V = nn.Linear(hidden_size, 1) self.mu = nn.Linear(hidden_size, output_size) self.L = nn.Linear(hidden_size, output_size**2) self.tril_mask = torch.tril(torch.ones(output_size, output_size), diagonal=-1).unsqueeze(0) self.diag_mask = torch.diag( torch.diag(torch.ones(output_size, output_size))).unsqueeze(0) self.noise = OUNoise(output_size, seed=1, mu=0.0, theta=0.15, max_sigma=0.3, min_sigma=0.05, decay_period=10000) self.noise.reset() self.apply(xavier_init) def forward(self, state, action): x = state u = action x = torch.tanh(self.linear1(x)) x = torch.tanh(self.linear2(x)) V = self.V(x) mu = torch.sigmoid(self.mu(x)) Q = None if u is not None: num_outputs = mu.size(1) L = self.L(x).view(-1, num_outputs, num_outputs) L = L * \ self.tril_mask.expand_as( L) + torch.exp(L) * self.diag_mask.expand_as(L) P = torch.bmm(L, L.transpose(2, 1)) u_mu = (u - mu).unsqueeze(2) A = -0.5 * \ torch.bmm(torch.bmm(u_mu.transpose(2, 1), P), u_mu)[:, :, 0] Q = A + V return mu, Q, V def select_action(self, state, noiseFlag=True): action, _, _ = self.forward(state, None) if noiseFlag: action += torch.tensor(self.noise.get_noise(), dtype=torch.float32).unsqueeze(0) action = torch.clamp(action, 0, 1) return action def eval_Q_value(self, state, action): _, Q, _ = self.forward(state, action) return Q def eval_state_value(self, state): _, _, V = self.forward(state, None) return V
class Actor(nn.Module): def __init__(self, input_size, hidden_size, output_size, config): super(Actor, self).__init__() self.linear1 = nn.Linear(input_size, hidden_size) self.linear2 = nn.Linear(hidden_size, hidden_size) self.linear3_1 = nn.Linear(hidden_size, 1) self.linear3_2 = nn.Linear(hidden_size, 1) self.apply(xavier_init) self.noise = OUNoise(output_size, seed=1, mu=0.0, theta=0.15, max_sigma=0.3, min_sigma=0.05, decay_period=10000) self.noise.reset() self.config = config self.stepCount = 0 def forward(self, state): """ Param state is a torch tensor """ x = F.relu(self.linear1(state)) x = F.relu(self.linear2(x)) action0 = torch.sigmoid(self.linear3_1(x)) action1 = torch.tanh(self.linear3_2(x)) action = torch.cat([action0, action1], dim=1) return action def getCustomAction(self): if self.config['particleType'] == 'FULLCONTROL': choice = np.random.randint(0, 3) if choice == 0: action = np.array([1, 0]) elif choice == 1: action = np.array([1, -1]) elif choice == 2: action = np.array([1, 1]) elif self.config['particleType'] == 'VANILLASP': action = np.array([1]) elif self.config['particleType'] == 'CIRCLER': action = np.array([1]) elif self.config['particleType'] == 'SLIDER': choice = np.random.randint(0, 3) if choice == 0: action = np.array([1]) elif choice == 1: action = np.array([0]) elif choice == 2: action = np.array([-1]) return torch.tensor(action, dtype=torch.float32, device=self.config['device']).unsqueeze(0) # def select_action(self, state, noiseFlag = False): # self.stepCount += 1 # if self.config['customExploreFlag'] and self.stepCount <= self.config['customExploreSteps']: # action = self.getCustomAction() # return action # else: # if noiseFlag: # action = self.forward(state) # action += torch.tensor(self.noise.get_noise(), dtype=torch.float32, device=self.config['device']).unsqueeze(0) # return self.forward(state) def select_action(self, state, noiseFlag=False): if noiseFlag: action = self.forward(state) action += torch.tensor(self.noise.get_noise(), dtype=torch.float32, device=config['device']).unsqueeze(0) action = torch.clamp(action, -1, 1) if action[0][0] < 0.0: action[0][0] = 0.0 return action return self.forward(state)