selected_actions_dim = 1 # Assuming monodimensional, discrete action space # Split dataset for FQI sast, r = split_data_for_fqi(reduced_dataset, selected_states_dim, selected_actions_dim, reward_dim) # Action regressor of ExtraTreesRegressor for FQI fqi_regressor_params = { 'n_estimators': 50, 'criterion': 'mse', 'min_samples_split': 5, 'min_samples_leaf': 2, 'input_scaled': False, 'output_scaled': False, 'n_jobs': args.njobs } regressor = Regressor(regressor_class=ExtraTreesRegressor, **fqi_regressor_params) regressor = ActionRegressor(regressor, discrete_actions=selected_actions_values, tol=0.5, **fqi_regressor_params) # Create FQI model fqi_params = { 'estimator': regressor, 'state_dim': selected_states_dim, 'action_dim': selected_actions_dim, 'discrete_actions': selected_actions_values, 'gamma': mdp.gamma, 'horizon': args.iterations, 'verbose': True }
def get_weights(self): return self.w def get_k(self, omega): b = omega[:, 0] k = omega[:, 1] return - b * b / k def set_weights(self, w): self.w = np.array(w) def count_params(self): return self.w.size q_regressor_params = dict() q_regressor = Regressor(LQG_Q, **q_regressor_params) ACTIVATION = 'sigmoid' INCREMENTAL = False ### F_RHO REGRESSOR ###################### n_q_regressors_weights = q_regressor._regressor.count_params() rho_regressor_params = {'n_input': n_q_regressors_weights, 'n_output': n_q_regressors_weights, 'hidden_neurons': [20], 'init': 'uniform', 'loss': 'mse', 'activation': ACTIVATION, 'optimizer': 'rmsprop', 'metrics': ['accuracy'], 'input_scaled': 1} rho_regressor = Regressor(MLP, **rho_regressor_params)
def _generate_model(self, iteration): return Regressor(self._regressor_class, **self._regr_args)
from ifqi.algorithms.lspi import LSPI from ifqi.envs.utils import get_space_info from ifqi.evaluation import evaluation from ifqi.evaluation.utils import check_dataset, split_data_for_fqi from ifqi.models.linear import Linear from ifqi.models.regressor import Regressor mdp = env.CarOnHill() state_dim, action_dim, reward_dim = get_space_info(mdp) nextstate_idx = state_dim + action_dim + reward_dim reward_idx = action_dim + state_dim # dataset: s, a, r, s' dataset = evaluation.collect_episodes(mdp, n_episodes=500) check_dataset(dataset, state_dim, action_dim, reward_dim) regressor_params = dict(features=dict(name='poly', params=dict(degree=5))) regressor = Regressor(Linear, **regressor_params) lspi = LSPI(regressor, state_dim, action_dim, mdp.action_space.values, mdp.gamma) sast, r = split_data_for_fqi(dataset, state_dim, action_dim, reward_dim) lspi.fit(sast, r) values = evaluation.evaluate_policy(mdp, lspi, initial_states=mdp.initial_states) print(values)
mdp = envs.CarOnHill() state_dim, action_dim, reward_dim = envs.get_space_info(mdp) assert reward_dim == 1 regressor_params = { 'n_estimators': 50, 'criterion': 'mse', 'min_samples_split': 5, 'min_samples_leaf': 2, 'input_scaled': False, 'output_scaled': False } discrete_actions = mdp.action_space.values # ExtraTrees regressor = Regressor(ExtraTreesRegressor, **regressor_params) # Action regressor of Ensemble of ExtraTreesEnsemble # regressor = Ensemble(ExtraTreesRegressor, **regressor_params) regressor = ActionRegressor(regressor, discrete_actions=discrete_actions, tol=5, **regressor_params) dataset = evaluation.collect_episodes(mdp, n_episodes=1000) check_dataset(dataset, state_dim, action_dim, reward_dim) # this is just a # check, it can be removed in experiments print('Dataset has %d samples' % dataset.shape[0]) # reward_idx = state_dim + action_dim # sast = np.append(dataset[:, :reward_idx],
seed = None else: seed = config['experiment_setting']['evaluation']['seed'] mdp.seed(seed) state_dim, action_dim, reward_dim = envs.get_space_info(mdp) assert reward_dim == 1 reward_idx = state_dim + action_dim discrete_actions = mdp.action_space.values # Load model regressor_params = config['model']['params'] regressor_class = get_model(config['model']['name']) if config['model']['ensemble']: regressor = Ensemble(regressor_class, **regressor_params) else: regressor = Regressor(regressor_class, **regressor_params) if not config['model']['fit_actions']: regressor = ActionRegressor(regressor, discrete_actions=discrete_actions, tol=5, **regressor_params) results = list() # Run for e in range(config['experiment_setting']['evaluation']['n_experiments']): print('Experiment: %d' % (e + 1)) experiment_results = list() # Load dataset dataset = evaluation.collect_episodes( mdp, n_episodes=np.sort(config['experiment_setting']['evaluation'] ['n_episodes'])[-1]) print('Dataset has %d samples' % dataset.shape[0])
class LQG_Q(): def __init__(self, theta): self.theta = theta def predict(self, sa, **opt_pars): if 'f_rho' in opt_pars: k, b = opt_pars['f_rho'] else: k, b = self.theta return b * sa[:, 1]**2 - (sa[:, 1] - k * sa[:, 0])**2 theta = np.array([1., 0.]) regressor_params = {'theta': theta} regressor = Regressor(LQG_Q, **regressor_params) pbo = PBO(estimator=regressor, state_dim=state_dim, action_dim=action_dim, discrete_actions=discrete_actions, gamma=mdp.gamma, horizon=mdp.horizon, features=None, verbose=True) epsilon = 1e-5 delta = np.inf theta, _ = pbo.fit(sast, r) while delta > epsilon: