Ejemplo n.º 1
0
        TwoRoomGridworld(np.array([gw_size, gw_size]), door_x=d) for d in doors
    ]
elif env == "three-room-gw":
    mdps = [
        ThreeRoomGridworld(np.array([gw_size, gw_size]), door_x=(d1, d2))
        for (d1, d2) in zip(doors, doors2)
    ]
eval_states = [np.array([0., 0.]) for _ in range(10)]

state_dim = mdps[0].state_dim
action_dim = 1
n_actions = mdps[0].action_space.n
K = n_basis**2

# Create BellmanOperator
operator = MellowBellmanOperator(kappa, tau, xi, mdps[0].gamma, K, action_dim)
# Create Q Function
Q = MLPQFunction(K, n_actions, layers=None)
# Create RBFs
rbf = build_features_gw_state(gw_size, n_basis, state_dim)


def run(mdp, seed=None):
    return learn(mdp,
                 Q,
                 operator,
                 max_iter=max_iter,
                 buffer_size=buffer_size,
                 batch_size=batch_size,
                 alpha_adam=alpha_adam,
                 alpha_sgd=alpha_sgd,
Ejemplo n.º 2
0
temp_lake = Lakecomo(None, None, min_env_flow, None, None, seed=seed)
temp_inflow = list(como_data.loc[como_data['year'] == 1946, 'in'])
temp_mdp = LakeEnv(temp_inflow, demand, temp_lake)

# Load tasks
tasks_data = utils.load_object(tasks_file)

n_eval_episodes = 5

state_dim = temp_mdp.observation_space.shape[0]
action_dim = 1
n_actions = temp_mdp.N_DISCRETE_ACTIONS

# Create BellmanOperator
operator = MellowBellmanOperator(kappa, tau, xi, temp_mdp.gamma, state_dim,
                                 action_dim)
# Create Q Function
layers = [l1]
if l2 > 0:
    layers.append(l2)
Q = MLPQFunction(state_dim, n_actions, layers=layers, activation=activation)


def run(seed=None):
    return learn(Q,
                 operator,
                 tasks_data,
                 demand,
                 min_env_flow,
                 max_iter=max_iter,
                 buffer_size=buffer_size,