def __init__(self, in_channels=4): super(NatureConvBody, self).__init__() self.feature_dim = 512 self.conv1 = layer_init( nn.Conv2d(in_channels, 32, kernel_size=8, stride=4)) self.conv2 = layer_init(nn.Conv2d(32, 64, kernel_size=4, stride=2)) self.conv3 = layer_init(nn.Conv2d(64, 64, kernel_size=3, stride=1)) self.fc4 = layer_init(nn.Linear(7 * 7 * 64, self.feature_dim))
def __init__(self, body: nn.Module, action_dim: int, actfn=LeakyReLU): BaseNet.__init__(self) super(CategoricalDuelingDQNet, self).__init__() self.action_dim = action_dim self.body = body self.value = layer_init(nn.Linear(body.feature_dim, 1)) self.advantage = layer_init(nn.Linear(body.feature_dim, action_dim))
def __init__(self, body, action_dim, num_options): super(OptionCriticNet, self).__init__() self.fc_q = layer_init(nn.Linear(body.feature_dim, num_options)) self.fc_pi = layer_init( nn.Linear(body.feature_dim, num_options * action_dim)) self.fc_beta = layer_init(nn.Linear(body.feature_dim, num_options)) self.num_options = num_options self.action_dim = action_dim self.body = body self.to(Config.DEVICE)
def __init__(self, state_dim, action_dim, hidden_units=(64, 64), gate=F.relu): super(TwoLayerFCBodyWithAction, self).__init__() hidden_size1, hidden_size2 = hidden_units self.fc1 = layer_init(nn.Linear(state_dim, hidden_size1)) self.fc2 = layer_init( nn.Linear(hidden_size1 + action_dim, hidden_size2)) self.gate = gate self.feature_dim = hidden_size2
def __init__(self, input_dim, output_dim, hidden_units=(32, ), gate=F.relu): super(FullyConnectedFeedForward, self).__init__() dimensions = (input_dim, ) + hidden_units self.layers = nn.ModuleList([ layer_init(nn.Linear(dim_in, dim_out)) for dim_in, dim_out in zip(dimensions[:-1], dimensions[1:]) ]) self.policy_head_layer = layer_init( nn.Linear(hidden_units[-1], output_dim)) self.value_head_layer = layer_init(nn.Linear(hidden_units[-1], 1)) self.gate = gate
def __init__(self, action_dim, num_quantiles, body): super(QuantileNet, self).__init__() self.fc_quantiles = layer_init( nn.Linear(body.feature_dim, action_dim * num_quantiles)) self.action_dim = action_dim self.num_quantiles = num_quantiles self.body = body self.to(Config.DEVICE)
def __init__(self, action_dim, num_atoms, body): super(CategoricalNet, self).__init__() self.fc_categorical = layer_init( nn.Linear(body.feature_dim, action_dim * num_atoms)) self.action_dim = action_dim self.num_atoms = num_atoms self.body = body self.to(Config.DEVICE)
def __init__(self, state_dim, hidden_units=(64, 64), gate=F.relu): super(FCBody, self).__init__() dims = (state_dim, ) + hidden_units self.layers = nn.ModuleList([ layer_init(nn.Linear(dim_in, dim_out)) for dim_in, dim_out in zip(dims[:-1], dims[1:]) ]) self.gate = gate self.feature_dim = dims[-1]
def __init__(self, state_dim, action_dim, phi_body, actor_body, critic_body): super(ActorCriticNet, self).__init__() if phi_body is None: phi_body = DummyBody(state_dim) if actor_body is None: actor_body = DummyBody(phi_body.feature_dim) if critic_body is None: critic_body = DummyBody(phi_body.feature_dim) self.phi_body = phi_body self.actor_body = actor_body self.critic_body = critic_body self.fc_action = layer_init( nn.Linear(actor_body.feature_dim, action_dim), 1e-3) self.fc_critic = layer_init(nn.Linear(critic_body.feature_dim, 1), 1e-3) self.actor_params = list(self.actor_body.parameters()) + list( self.fc_action.parameters()) self.critic_params = list(self.critic_body.parameters()) + list( self.fc_critic.parameters()) self.phi_params = list(self.phi_body.parameters())
def __init__(self, in_channels=4): super(DDPGConvBody, self).__init__() self.feature_dim = 39 * 39 * 32 self.conv1 = layer_init( nn.Conv2d(in_channels, 32, kernel_size=3, stride=2)) self.conv2 = layer_init(nn.Conv2d(32, 32, kernel_size=3))
def __init__(self, state_dim, action_dim, hidden_units, gate=F.relu): super(OneLayerFCBodyWithAction, self).__init__() self.fc_s = layer_init(nn.Linear(state_dim, hidden_units)) self.fc_a = layer_init(nn.Linear(action_dim, hidden_units)) self.gate = gate self.feature_dim = hidden_units * 2