def forward(self, inputs): # sanity check: actor network is deterministic, and receives only states as input assert len( inputs) == 1, 'ERROR> this network expects only one input (states)' # grab the actual input to the model (states) _states = inputs[0] if self._config.useBatchnorm: x = self._bn0(_states) x = F.relu(self._bn1(self._fc1(x))) x = F.relu(self._bn2(self._fc2(x))) x = F.tanh(self._fc3(x)) else: x = F.relu(self._fc1(_states)) x = F.relu(self._fc2(x)) x = F.tanh(self._fc3(x)) return x
def forward( self, state ) : r"""Forward pass for this deterministic policy, used for the max Q evaluation Args: state (torch.tensor): state used to decide the action """ x = F.relu( self.fc1( state ) ) x = F.relu( self.fc2( x ) ) x = F.tanh( self.fc3( x ) ) return x
def forward( self, observation ) : r"""Forward pass for this deterministic policy, used for the max Q evaluation Args: observation (torch.tensor): observation used to decide the action """ x = self.bn0( observation ) x = F.relu( self.bn1( self.fc1( x ) ) ) x = F.relu( self.bn2( self.fc2( x ) ) ) x = F.tanh( self.fc3( x ) ) return x
def forward(self, x1, x2, z=None): conv1_1 = self.conv1(x1) conv1_2 = self.conv1(x2) conv2_1 = self.conv2(conv1_1) conv2_2 = self.conv2(conv1_2) conv3_1 = self.conv3(conv2_1) conv3_2 = self.conv3(conv2_2) conv4_1 = self.conv4(conv3_1) conv4_2 = self.conv4(conv3_2) conv5_1 = self.conv5(conv4_1) conv5_2 = self.conv5(conv4_2) # center_1 = self.center(self.pool(conv5_1)) # center_2 = self.center(self.pool(conv5_2)) center_1 = self.center(conv5_1) center_2 = self.center(conv5_2) feature_1 = self.global_avg_pool(center_1) feature_2 = self.global_avg_pool(center_2) feature_12 = torch.cat((feature_1, feature_2), dim=1) # pdb.set_trace() attention_12 = F.sigmoid( self.mlp2(F.tanh(self.mlp1(feature_12.view(-1, 512))))).view( -1, 256, 1, 1) center_1 = center_1 * attention_12 center_2 = center_2 * attention_12 dec5_1 = self.dec5(torch.cat([center_1, conv5_1], 1)) dec5_2 = self.dec5(torch.cat([center_2, conv5_2], 1)) dec4_1 = self.dec4(torch.cat([dec5_1, conv4_1], 1)) dec4_2 = self.dec4(torch.cat([dec5_2, conv4_2], 1)) dec3_1 = self.dec3(torch.cat([dec4_1, conv3_1], 1)) dec3_2 = self.dec3(torch.cat([dec4_2, conv3_2], 1)) dec2_1 = self.dec2(torch.cat([dec3_1, conv2_1], 1)) dec2_2 = self.dec2(torch.cat([dec3_2, conv2_2], 1)) dec1_1 = self.dec1(dec2_1) dec1_2 = self.dec1(dec2_2) dec0_1 = self.dec0(dec1_1) dec0_2 = self.dec0(dec1_2) return self.final(dec0_1), self.final(dec0_2)
def forward(self, x): l1 = F.tanh(self.l1(x)) l2 = F.tanh(self.l2(l1)) pred = F.softmax(self.l3(l2)) return pred