Sequential.model = _model_evaluation rho_regressor = Sequential() rho_regressor.add(Dense(4, input_dim=n_q_regressors_weights, init='uniform', activation=ACTIVATION)) rho_regressor.add( Dense(n_q_regressors_weights, init='uniform', activation='linear')) rho_regressor.compile(loss='mse', optimizer='rmsprop') import theano import theano.tensor as T theta = T.matrix() res = rho_regressor.model(theta) # rho_regressor.fit(None, None) ########################################## def terminal_evaluation(old_theta, new_theta, tol_theta=1e-2): if increment_base_termination(old_theta, new_theta, 2, tol_theta): estimator = LQG_Q() estimator.omega = new_theta[0] agent = Algorithm(estimator, state_dim, action_dim, discrete_actions, mdp.gamma, mdp.horizon) agent._iteration = 1 initial_states = np.array([[1, 2, 5, 7, 10]]).T values = evaluation.evaluate_policy(mdp, agent, initial_states=initial_states)