def __init__(self, in_dim, size, logstd=0.0, init_scale=1.0, init_bias=0.0): super(DiagGaussianPdType, self).__init__() self.in_dim = in_dim self.size = size self.fc = nn.Linear(in_dim, size) self.logstd = torch.Tensor([[logstd] * size]) # first dim for batch init_weight(self.fc, init_scale, init_bias)
def __init__(self, env, network, hiddens=[256], dueling=True, layer_norm=False, **network_kwargs): super(QNet, self).__init__() self.dueling = dueling self.num_actions = env.action_space.n if isinstance(network, str): self.base_net = get_network_builder(network)( env.observation_space.shape, **network_kwargs) else: self.base_net = network action_layers = [] out_dim = self.base_net.out_dim for hidden in hiddens: action_layers.append(nn.Linear(out_dim, hidden)) if layer_norm: action_layers.append(nn.LayerNorm(hidden)) action_layers.append(nn.ReLU()) out_dim = hidden action_layers.append(nn.Linear(out_dim, self.num_actions)) self.action_layers = nn.Sequential(*action_layers) # init for m in self.action_layers.modules(): init_weight(m, init_scale=np.sqrt(2.0)) if dueling: state_layers = [] out_dim = self.base_net.out_dim for hidden in hiddens: state_layers.append(nn.Linear(out_dim, hidden)) if layer_norm: state_layers.append(nn.LayerNorm(hidden)) state_layers.append(nn.ReLU()) out_dim = hidden state_layers.append(nn.Linear(out_dim, 1)) self.state_layers = nn.Sequential(*state_layers) # init for m in self.state_layers.modules(): init_weight(m, init_scale=np.sqrt(2.0))
def __init__(self, input_size, convs, **conv_kwargs): super(cnn_convs_only, self).__init__() in_dim = input_size[-1] layers = [] for num_outputs, kernel_size, stride in convs: layers.append( nn.Conv2d(in_dim, num_outputs, kernel_size=kernel_size, stride=stride)) layers.append(nn.ReLU()) in_dim = num_outputs self.convs = nn.Sequential(*layers) x = torch.zeros((1, *input_size)) out = self.forward(x).view(1, -1) self.out_dim = out.size(1) # init for m in self.modules(): init_weight(m, **conv_kwargs)
def __init__(self, input_size, **conv_kwargs): super(nature_cnn, self).__init__() """ CNN from Nature paper. Args: input_size: (H, W, C) """ in_dim = input_size[-1] self.convs = nn.Sequential( nn.Conv2d(in_dim, 32, kernel_size=8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, kernel_size=4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, kernel_size=3, stride=1), nn.ReLU(), ) out_h = oSize(oSize(oSize(input_size[-3], 8, 4), 4, 2), 3) out_w = oSize(oSize(oSize(input_size[-2], 8, 4), 4, 2), 3) self.fc = nn.Sequential( nn.Linear(out_h * out_w * 64, 512), nn.ReLU(), ) self.out_dim = 512 # init for m in self.modules(): init_weight(m, **conv_kwargs) init_weight(self.convs[0], init_scale=np.sqrt(2.0)) init_weight(self.fc[0], init_scale=np.sqrt(2.0))
def __init__(self, input_size, num_layers=2, num_hidden=64, activation=nn.Tanh, layer_norm=False): super(nature_mlp, self).__init__() """ Stack of fully-connected layers to be used in a policy / q-function approximator Parameters: ---------- input_size: (int, ) input size, use env.observation_space.shape num_layers: int number of fully-connected layers (default: 2) num_hidden: int size of fully-connected layers (default: 64) activation: activation function (default: nn.Tanh) Returns: ------- fully connected network model """ in_dim = input_size[0] self.out_dim = num_hidden layers = [] layers.append(nn.Linear(in_dim, num_hidden)) if layer_norm: layers.append(nn.LayerNorm(num_hidden)) layers.append(activation()) for i in range(1, num_layers): layers.append(nn.Linear(num_hidden, num_hidden)) if layer_norm: layers.append(nn.LayerNorm(num_hidden)) layers.append(activation()) self.layers = nn.Sequential(*layers) # init for m in self.modules(): init_weight(m, init_scale=np.sqrt(2.0))
def __init__(self, env, latent, estimate_q=False, vf_latent=None, **tensors): super(PolicyWithValue, self).__init__() """ Parameters: ---------- env RL environment latent latent state from which policy distribution parameters should be inferred vf_latent latent state from which value function should be inferred (if None, then latent is used) **tensors tensorflow tensors for additional attributes such as state or mask """ self.state = None self.initial_state = None self.__dict__.update(tensors) self.latent = latent self.vf_latent = vf_latent if vf_latent is not None else latent # Based on the action space, will select what probability distribution type self.pdtype = make_pdtype(self.latent.out_dim, env.action_space, init_scale=0.01) if estimate_q: assert isinstance(env.action_space, gym.spaces.Discrete) self.vf = nn.Linear(self.latent.out_dim, env.action_space.n) self.q = self.vf else: self.vf = nn.Linear(self.latent.out_dim, 1) # init weight torch_utils.init_weight(self.vf)
def __init__(self, in_dim, ncat, init_scale=1.0, init_bias=0.0): super(CategoricalPdType, self).__init__() self.in_dim = in_dim self.ncat = ncat self.fc = nn.Linear(in_dim, ncat) init_weight(self.fc, init_scale, init_bias)