Example #1
0
Sequential.model = _model_evaluation
rho_regressor = Sequential()
rho_regressor.add(Dense(4, input_dim=n_q_regressors_weights, init='uniform',
                        activation=ACTIVATION))
rho_regressor.add(
    Dense(n_q_regressors_weights, init='uniform', activation='linear'))
rho_regressor.compile(loss='mse', optimizer='rmsprop')

import theano
import theano.tensor as T

theta = T.matrix()

res = rho_regressor.model(theta)


# rho_regressor.fit(None, None)
##########################################

def terminal_evaluation(old_theta, new_theta, tol_theta=1e-2):
    if increment_base_termination(old_theta, new_theta, 2, tol_theta):
        estimator = LQG_Q()
        estimator.omega = new_theta[0]
        agent = Algorithm(estimator, state_dim, action_dim,
                          discrete_actions, mdp.gamma, mdp.horizon)
        agent._iteration = 1
        initial_states = np.array([[1, 2, 5, 7, 10]]).T
        values = evaluation.evaluate_policy(mdp, agent,
                                            initial_states=initial_states)