def __init__(self, timestep, obs_dim, hidden_sizes,z_dim, c_dim,device=None): super(CPC, self).__init__() self.timestep = timestep self.c_dim = c_dim self.z_dim = z_dim self.device = device self.encoder = mlp([obs_dim] + list(hidden_sizes) + [z_dim], nn.ReLU, nn.Identity).to(device) self.gru = nn.GRU(z_dim, c_dim, num_layers=1, bidirectional=False, batch_first=True).to(device) self.Wk = nn.ModuleList([nn.Linear(c_dim, z_dim) for _ in range(timestep)]).to(device) self.softmax = F.softmax self.lsoftmax = F.log_softmax def _weights_init(m): if isinstance(m, nn.Linear): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') if isinstance(m, nn.Conv1d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, nn.BatchNorm1d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) # initialize gru for layer_p in self.gru._all_weights: for p in layer_p: if 'weight' in p: nn.init.kaiming_normal_(self.gru.__getattr__(p), mode='fan_out', nonlinearity='relu') self.apply(_weights_init)
def __init__(self, obs_dim, act_dim, hidden_sizes, activation, output_activation=nn.Identity): super().__init__() self.net = mlp([obs_dim] + list(hidden_sizes) + [act_dim], activation, output_activation)