def forward(self, x): """ returns value estimation, action, log_action_prob """ # action x = self.conv1(x) x = F.relu(x) x = self.conv2(x) x = F.relu(x) x = self.conv3(x) x = F.relu(x) x = x.view(-1, 32 * 7 * 7) x = self.linear1(x) x = F.relu(x) mean = self.mu(x) # N, num_actions logstd = self.logstd.expand_as(mean) std = torch.exp(logstd) action = torch.normal(mean, std) # value v = self.critic_linear(x) # action prob on log scale logprob = log_normal_density(action, mean, std=std, log_std=logstd) return v, action, logprob, mean
def forward(self, x): """ returns value estimation, action, log_action_prob """ # action act = self.act_fc1(x) act = torch.relu(act) act = self.act_fc2(act) act = torch.relu(act) mean = self.mu(act) # N, num_actions logstd = self.logstd.expand_as(mean) std = torch.exp(logstd) action = torch.normal(mean, std) action = action.data.cpu().numpy()[0] action[0] = np.tanh(action[0]) # steering action[1] = sigmoid(action[1]) # acceleration action[2] = sigmoid(action[2]) # braking action = torch.as_tensor(action) action = action.view(1, -1) # value v = self.value_fc1(x) v = torch.relu(v) v = self.value_fc2(v) v = torch.relu(v) v = self.value_fc3(v) # action prob on log scale logprob = log_normal_density(action, mean, std=std, log_std=logstd) return v, action, logprob, mean
def forward(self, x, goal, speed): """ returns value estimation, action, log_action_prob """ # action a = F.relu(self.act_fea_cv1(x)) a = F.relu(self.act_fea_cv2(a)) a = a.view(a.shape[0], -1) a = F.relu(self.act_fc1(a)) a = torch.cat((a, goal, speed), dim=-1) a = F.relu(self.act_fc2(a)) mean1 = F.sigmoid(self.actor1(a)) mean2 = F.tanh(self.actor2(a)) mean = torch.cat((mean1, mean2), dim=-1) logstd = self.logstd.expand_as(mean) std = torch.exp(logstd) action = torch.normal(mean, std) # action prob on log scale logprob = log_normal_density(action, mean, std=std, log_std=logstd) # value v = F.relu(self.crt_fea_cv1(x)) v = F.relu(self.crt_fea_cv2(v)) v = v.view(v.shape[0], -1) v = F.relu(self.crt_fc1(v)) v = torch.cat((v, goal, speed), dim=-1) v = F.relu(self.crt_fc2(v)) v = self.critic(v) return v, action, logprob, mean
def forward(self, x, goal, speed): """ returns action, log_action_prob, mean(sigmoid, tanh) """ a = F.relu(self.act_fea_cv1(x)) a = F.relu(self.act_fea_cv2(a)) a = a.view(a.shape[0], -1) a = F.relu(self.act_fc1(a)) a = torch.cat((a, goal, speed), dim=-1) a = F.relu(self.act_fc2(a)) mean1 = F.sigmoid(self.actor1(a)) * self.max_action mean2 = F.tanh(self.actor2(a)) * self.max_action mean = torch.cat((mean1, mean2), dim=-1) logstd = self.logstd.expand_as(mean) std = torch.exp(logstd) action = torch.normal(mean, std) # action prob on log scale logprob = log_normal_density(action, mean, std=std, log_std=logstd) #---------------------------------------------------------------------# return action, logprob, mean
def evaluate_actions(self, x, action): v, _, _, mean = self.forward(x) logstd = self.logstd.expand_as(mean) std = torch.exp(logstd) # evaluate logprob = log_normal_density(action, mean, log_std=logstd, std=std) dist_entropy = 0.5 + 0.5 * math.log(2 * math.pi) + logstd dist_entropy = dist_entropy.sum(-1).mean() return v, logprob, dist_entropy
def evaluate_actions(self, x, goal, speed, action): """ returns log_action_prob, distance entropy """ _, _, mean = self.forward(x, goal, speed) logstd = self.logstd.expand_as(mean) std = torch.exp(logstd) # evaluate logprob = log_normal_density(action, mean, log_std=logstd, std=std) dist_entropy = 0.5 + 0.5 * math.log(2 * math.pi) + logstd dist_entropy = dist_entropy.sum(-1).mean() return logprob, dist_entropy
def evaluate_actions(self, x, action): v, _, _, mean = self.forward(x) logstd = self.logstd.expand_as(mean) std = torch.exp(logstd) # evaluate logprob = log_normal_density(action, mean, log_std=logstd, std=std) dist_entropy = 0.5 + 0.5 * math.log(2 * math.pi) + logstd dist_entropy = dist_entropy.sum(-1).mean() return v, logprob, dist_entropy # if __name__ == '__main__': # from torch.autograd import Variable # net = MLPPolicy(3, 2) # observation = Variable(torch.randn(2, 3)) # v, action, logprob, mean = net.forward(observation) # print(v)
def forward(self, x): """ returns value estimation, action, log_action_prob """ # action act = self.act_fc1(x) act = torch.tanh(act) act = self.act_fc2(act) act = torch.tanh(act) mean = self.mu(act) # N, num_actions logstd = self.logstd.expand_as(mean) std = torch.exp(logstd) action = torch.normal(mean, std) # value v = self.value_fc1(x) v = torch.tanh(v) v = self.value_fc2(v) v = torch.tanh(v) v = self.value_fc3(v) # action prob on log scale logprob = log_normal_density(action, mean, std=std, log_std=logstd) return v, action, logprob, mean