def forward(self, s): x = self.preprocess(s) x = F.leaky_relu(self.bn1(self.conv1(x))) x = F.leaky_relu(self.bn2(self.conv2(x))) x = F.leaky_relu(self.bn3(self.conv3(x))) x = F.leaky_relu(self.bn4(self.conv4(x))) x = F.leaky_relu(self.bn5(self.conv5(x))) x = F.leaky_relu(self.bn6(self.conv6(x))) # x = x.view(x.size(0), -1) policy = F.leaky_relu(self.policy_bn(self.conv_policy(x))).view( x.size(0), -1) policy = self.policy_dropout(policy) policy = F.dropout(policy, p=0.3, training=True) # change training method policy = self.softmax(self.linear_policy(policy)) value = F.leaky_relu(self.value_bn(self.conv_value(x))).view( x.size(0), -1) value = self.value_dropout(value) value = F.dropout(value, p=0.3, training=True) # change training method value = F.leaky_relu(self.fc_value(value)) value = torch.tanh(self.linear_output(value)) return policy, value
def forward( self, state, action ) : r"""Forward pass for this critic at a given (s,a) pair Args: state (torch.tensor): state of the pair to be evaluated action (torch.tensor): action of the pair to be evaluated """ x = self.bn0( state ) x = F.leaky_relu( self.bn1( self.fc1( x ) ) ) x = torch.cat( [x, action], dim = 1 ) x = F.leaky_relu( self.bn2( self.fc2( x ) ) ) x = F.leaky_relu( self.bn3( self.fc3( x ) ) ) x = self.fc4( x ) return x
def self_graph_att(self, x): x = x.permute(0, 2, 1).contiguous() bat, N, fea = x.size() key = torch.matmul(x, self.wk) query = torch.matmul(x, self.wq) data = key.repeat(1, 1, N).view(bat, N * N, 1) + query.repeat(1, N, 1) data = data.squeeze(2) data = data.view(bat, N, -1) data = F.leaky_relu(data) att = F.softmax(data, dim=2) att = self.dropout(att) return att
def forward(self, s): x = self.preprocess(s) x = F.leaky_relu(self.bn1(self.conv1(x))) x = F.leaky_relu(self.bn2(self.conv2(x))) x = F.leaky_relu(self.bn3(self.conv3(x))) x = F.leaky_relu(self.bn4(self.conv4(x))) x = F.leaky_relu(self.bn5(self.conv5(x))) x = F.leaky_relu(self.bn6(self.conv6(x))) value = self.value(self.value_fc(x.view(x.size(0), -1))) advantage = self.advantage(self.advantage_fc(x.view(x.size(0), -1))) output = value + (advantage - torch.mean(advantage, dim=1, keepdim=True)) return output
def forward(self, s): x = self.preprocess(s) x = F.leaky_relu(self.bn1(self.conv1(x))) x = F.leaky_relu(self.bn2(self.conv2(x))) x = F.leaky_relu(self.bn3(self.conv3(x))) # x = x.view(x.size(0), -1) policy = F.leaky_relu(self.policy_bn(self.conv_policy(x))).view(x.size(0), -1) policy = self.softmax(self.linear_policy(policy)) value = F.leaky_relu(self.value_bn(self.conv_value(x))).view(x.size(0), -1) value = F.leaky_relu(self.fc_value(value)) value = torch.tanh(self.linear_output(value)) return policy, value
def forward(self, x): x = self.layer1(x) x = self.layer1b(x) x = F.leaky_relu(x) x = self.layer2(x) x = self.layer2b(x) x = F.leaky_relu(x) x = self.layer3(x) x = self.layer3b(x) x = F.leaky_relu(x) x = self.layer4(x) x = self.layer4b(x) x = F.leaky_relu(x) x = self.layer5(x) x = self.layer5b(x) x = F.leaky_relu(x) x = self.layer6(x) x = self.layer6b(x) x = F.leaky_relu(x) x = self.layer7(x) return x
def forward(self, input): out = F.leaky_relu(input, negative_slope=self.negative_slope) return out * math.sqrt(2)
def fused_leaky_relu(input, bias, negative_slope=0.2, scale=2**0.5): return scale * F.leaky_relu(input + bias.view((1, -1) + (1, ) * (len(input.shape) - 2)), negative_slope=negative_slope)