TwoRoomGridworld(np.array([gw_size, gw_size]), door_x=d) for d in doors ] elif env == "three-room-gw": mdps = [ ThreeRoomGridworld(np.array([gw_size, gw_size]), door_x=(d1, d2)) for (d1, d2) in zip(doors, doors2) ] eval_states = [np.array([0., 0.]) for _ in range(10)] state_dim = mdps[0].state_dim action_dim = 1 n_actions = mdps[0].action_space.n K = n_basis**2 # Create BellmanOperator operator = MellowBellmanOperator(kappa, tau, xi, mdps[0].gamma, K, action_dim) # Create Q Function Q = MLPQFunction(K, n_actions, layers=None) # Create RBFs rbf = build_features_gw_state(gw_size, n_basis, state_dim) def run(mdp, seed=None): return learn(mdp, Q, operator, max_iter=max_iter, buffer_size=buffer_size, batch_size=batch_size, alpha_adam=alpha_adam, alpha_sgd=alpha_sgd,
temp_lake = Lakecomo(None, None, min_env_flow, None, None, seed=seed) temp_inflow = list(como_data.loc[como_data['year'] == 1946, 'in']) temp_mdp = LakeEnv(temp_inflow, demand, temp_lake) # Load tasks tasks_data = utils.load_object(tasks_file) n_eval_episodes = 5 state_dim = temp_mdp.observation_space.shape[0] action_dim = 1 n_actions = temp_mdp.N_DISCRETE_ACTIONS # Create BellmanOperator operator = MellowBellmanOperator(kappa, tau, xi, temp_mdp.gamma, state_dim, action_dim) # Create Q Function layers = [l1] if l2 > 0: layers.append(l2) Q = MLPQFunction(state_dim, n_actions, layers=layers, activation=activation) def run(seed=None): return learn(Q, operator, tasks_data, demand, min_env_flow, max_iter=max_iter, buffer_size=buffer_size,