Ejemplo n.º 1
0
def DQN(state_dim,
        action_dim,
        n_actions,
        gamma,
        layers=(32, ),
        initial_params=None,
        target_update_freq=500):

    Q = MLPQFunction(state_dim,
                     n_actions,
                     layers=layers,
                     initial_params=initial_params)
    Q_target = MLPQFunction(state_dim,
                            n_actions,
                            layers=tuple(layers),
                            initial_params=initial_params)

    if initial_params is None:
        Q_target._w = Q._w
    operator = DQNOperator(state_dim, action_dim, gamma, Q_target,
                           target_update_freq)

    return Q, operator
Ejemplo n.º 2
0
    n_actions = 10

    # torch.manual_seed(300)
    # np.random.seed(300)
    # Create BellmanOperator
    operator = MellowBellmanOperator(kappa, tau, xi, gamma, state_dim,
                                     action_dim)
    operator2 = mellow(kappa, tau, xi, gamma, state_dim, action_dim)
    # Create Q Function
    layers = [l1]
    if l2 > 0:
        layers.append(l2)
    Q = MLPQFunction(state_dim, n_actions, layers=layers)
    Q2 = mlp(state_dim, n_actions, layers)

    Q._w = np.random.randn(Q._w.size)

    w = Q._w
    w = torch.randn(w.size).numpy()
    Q._w = w
    Q2._w = w
    weights = torch.randn(5, w.shape[0], requires_grad=True)

    samples = np.random.randn(10,
                              1 + state_dim + action_dim + 1 + state_dim + 1)
    samples[:, -1] = 0.
    samples[:, action_dim + state_dim] = np.random.random_integers(
        0, n_actions - 1, size=samples.shape[0])

    val = Q.value_weights(samples[:, 1:1 + state_dim + action_dim],
                          weights.detach().numpy(),