def forward(self, s):
        x = self.preprocess(s)
        x = F.leaky_relu(self.bn1(self.conv1(x)))
        x = F.leaky_relu(self.bn2(self.conv2(x)))
        x = F.leaky_relu(self.bn3(self.conv3(x)))
        x = F.leaky_relu(self.bn4(self.conv4(x)))
        x = F.leaky_relu(self.bn5(self.conv5(x)))
        x = F.leaky_relu(self.bn6(self.conv6(x)))

        # x = x.view(x.size(0), -1)

        policy = F.leaky_relu(self.policy_bn(self.conv_policy(x))).view(
            x.size(0), -1)
        policy = self.policy_dropout(policy)
        policy = F.dropout(policy, p=0.3,
                           training=True)  # change training method
        policy = self.softmax(self.linear_policy(policy))

        value = F.leaky_relu(self.value_bn(self.conv_value(x))).view(
            x.size(0), -1)
        value = self.value_dropout(value)
        value = F.dropout(value, p=0.3,
                          training=True)  # change training method
        value = F.leaky_relu(self.fc_value(value))
        value = torch.tanh(self.linear_output(value))

        return policy, value
Beispiel #2
0
    def forward( self, state, action ) :
        r"""Forward pass for this critic at a given (s,a) pair

        Args:
            state (torch.tensor): state of the pair to be evaluated
            action (torch.tensor): action of the pair to be evaluated

        """
        x = self.bn0( state )
        x = F.leaky_relu( self.bn1( self.fc1( x ) ) )
        x = torch.cat( [x, action], dim = 1 )
        x = F.leaky_relu( self.bn2( self.fc2( x ) ) )
        x = F.leaky_relu( self.bn3( self.fc3( x ) ) )
        x = self.fc4( x )

        return x
Beispiel #3
0
 def self_graph_att(self, x):
     x = x.permute(0, 2, 1).contiguous()
     bat, N, fea = x.size()
     key = torch.matmul(x, self.wk)
     query = torch.matmul(x, self.wq)
     data = key.repeat(1, 1, N).view(bat, N * N, 1) + query.repeat(1, N, 1)
     data = data.squeeze(2)
     data = data.view(bat, N, -1)
     data = F.leaky_relu(data)
     att = F.softmax(data, dim=2)
     att = self.dropout(att)
     return att
    def forward(self, s):
        x = self.preprocess(s)
        x = F.leaky_relu(self.bn1(self.conv1(x)))
        x = F.leaky_relu(self.bn2(self.conv2(x)))
        x = F.leaky_relu(self.bn3(self.conv3(x)))
        x = F.leaky_relu(self.bn4(self.conv4(x)))
        x = F.leaky_relu(self.bn5(self.conv5(x)))
        x = F.leaky_relu(self.bn6(self.conv6(x)))

        value = self.value(self.value_fc(x.view(x.size(0), -1)))
        advantage = self.advantage(self.advantage_fc(x.view(x.size(0), -1)))

        output = value + (advantage -
                          torch.mean(advantage, dim=1, keepdim=True))
        return output
Beispiel #5
0
    def forward(self, s):
        x = self.preprocess(s)
        x = F.leaky_relu(self.bn1(self.conv1(x)))
        x = F.leaky_relu(self.bn2(self.conv2(x)))
        x = F.leaky_relu(self.bn3(self.conv3(x)))
        # x = x.view(x.size(0), -1)

        policy = F.leaky_relu(self.policy_bn(self.conv_policy(x))).view(x.size(0), -1)
        policy = self.softmax(self.linear_policy(policy))

        value = F.leaky_relu(self.value_bn(self.conv_value(x))).view(x.size(0), -1)
        value = F.leaky_relu(self.fc_value(value))
        value = torch.tanh(self.linear_output(value))

        return policy, value
Beispiel #6
0
 def forward(self, x):
     x = self.layer1(x)
     x = self.layer1b(x)
     x = F.leaky_relu(x)
     x = self.layer2(x)
     x = self.layer2b(x)
     x = F.leaky_relu(x)
     x = self.layer3(x)
     x = self.layer3b(x)
     x = F.leaky_relu(x)
     x = self.layer4(x)
     x = self.layer4b(x)
     x = F.leaky_relu(x)
     x = self.layer5(x)
     x = self.layer5b(x)
     x = F.leaky_relu(x)
     x = self.layer6(x)
     x = self.layer6b(x)
     x = F.leaky_relu(x)
     x = self.layer7(x)
     return x
Beispiel #7
0
    def forward(self, input):
        out = F.leaky_relu(input, negative_slope=self.negative_slope)

        return out * math.sqrt(2)
Beispiel #8
0
def fused_leaky_relu(input, bias, negative_slope=0.2, scale=2**0.5):
    return scale * F.leaky_relu(input + bias.view((1, -1) + (1, ) *
                                                  (len(input.shape) - 2)),
                                negative_slope=negative_slope)