def DQN(state_dim, action_dim, n_actions, gamma, layers=(32, ), initial_params=None, target_update_freq=500): Q = MLPQFunction(state_dim, n_actions, layers=layers, initial_params=initial_params) Q_target = MLPQFunction(state_dim, n_actions, layers=tuple(layers), initial_params=initial_params) if initial_params is None: Q_target._w = Q._w operator = DQNOperator(state_dim, action_dim, gamma, Q_target, target_update_freq) return Q, operator
elif env == "three-room-gw": mdps = [ ThreeRoomGridworld(np.array([gw_size, gw_size]), door_x=(d1, d2)) for (d1, d2) in zip(doors, doors2) ] eval_states = [np.array([0., 0.]) for _ in range(10)] state_dim = mdps[0].state_dim action_dim = 1 n_actions = mdps[0].action_space.n K = n_basis**2 # Create BellmanOperator operator = MellowBellmanOperator(kappa, tau, xi, mdps[0].gamma, K, action_dim) # Create Q Function Q = MLPQFunction(K, n_actions, layers=None) # Create RBFs rbf = build_features_gw_state(gw_size, n_basis, state_dim) def run(mdp, seed=None): return learn(mdp, Q, operator, max_iter=max_iter, buffer_size=buffer_size, batch_size=batch_size, alpha_adam=alpha_adam, alpha_sgd=alpha_sgd, lambda_=lambda_, n_weights=n_weights,
] mdps = [MountainCarEnv(vel[i]) for i in range(n_runs)] n_eval_episodes = 5 state_dim = mdps[0].state_dim action_dim = 1 n_actions = mdps[0].action_space.n # Create BellmanOperator operator = MellowBellmanOperator(kappa, tau, xi, mdps[0].gamma, state_dim, action_dim) # Create Q Function layers = [l1] if l2 > 0: layers.append(l2) Q = MLPQFunction(state_dim, n_actions, layers=layers) def run(mdp, seed=None): return learn(mdp, Q, operator, max_iter=max_iter, buffer_size=buffer_size, batch_size=batch_size, alpha_adam=alpha_adam, alpha_sgd=alpha_sgd, lambda_=lambda_, n_weights=n_weights, train_freq=train_freq, eval_freq=eval_freq,
gamma = 0.99 state_dim = 2 action_dim = 1 n_actions = 10 # torch.manual_seed(300) # np.random.seed(300) # Create BellmanOperator operator = MellowBellmanOperator(kappa, tau, xi, gamma, state_dim, action_dim) operator2 = mellow(kappa, tau, xi, gamma, state_dim, action_dim) # Create Q Function layers = [l1] if l2 > 0: layers.append(l2) Q = MLPQFunction(state_dim, n_actions, layers=layers) Q2 = mlp(state_dim, n_actions, layers) Q._w = np.random.randn(Q._w.size) w = Q._w w = torch.randn(w.size).numpy() Q._w = w Q2._w = w weights = torch.randn(5, w.shape[0], requires_grad=True) samples = np.random.randn(10, 1 + state_dim + action_dim + 1 + state_dim + 1) samples[:, -1] = 0. samples[:, action_dim + state_dim] = np.random.random_integers( 0, n_actions - 1, size=samples.shape[0])