def DQN(state_dim, action_dim, n_actions, gamma, layers=(32, ), initial_params=None, target_update_freq=500): Q = MLPQFunction(state_dim, n_actions, layers=layers, initial_params=initial_params) Q_target = MLPQFunction(state_dim, n_actions, layers=tuple(layers), initial_params=initial_params) if initial_params is None: Q_target._w = Q._w operator = DQNOperator(state_dim, action_dim, gamma, Q_target, target_update_freq) return Q, operator
n_actions = 10 # torch.manual_seed(300) # np.random.seed(300) # Create BellmanOperator operator = MellowBellmanOperator(kappa, tau, xi, gamma, state_dim, action_dim) operator2 = mellow(kappa, tau, xi, gamma, state_dim, action_dim) # Create Q Function layers = [l1] if l2 > 0: layers.append(l2) Q = MLPQFunction(state_dim, n_actions, layers=layers) Q2 = mlp(state_dim, n_actions, layers) Q._w = np.random.randn(Q._w.size) w = Q._w w = torch.randn(w.size).numpy() Q._w = w Q2._w = w weights = torch.randn(5, w.shape[0], requires_grad=True) samples = np.random.randn(10, 1 + state_dim + action_dim + 1 + state_dim + 1) samples[:, -1] = 0. samples[:, action_dim + state_dim] = np.random.random_integers( 0, n_actions - 1, size=samples.shape[0]) val = Q.value_weights(samples[:, 1:1 + state_dim + action_dim], weights.detach().numpy(),