'min_samples_split': 5, 'min_samples_leaf': 2, 'input_scaled': False, 'output_scaled': False} discrete_actions = mdp.action_space.values # ExtraTrees regressor = Regressor(ExtraTreesRegressor, **regressor_params) # Action regressor of Ensemble of ExtraTreesEnsemble # regressor = Ensemble(ExtraTreesRegressor, **regressor_params) regressor = ActionRegressor(regressor, discrete_actions=discrete_actions, tol=5, **regressor_params) dataset = evaluation.collect_episodes(mdp, n_episodes=1000) check_dataset(dataset, state_dim, action_dim, reward_dim) # this is just a # check, it can be removed in experiments print('Dataset has %d samples' % dataset.shape[0]) # reward_idx = state_dim + action_dim # sast = np.append(dataset[:, :reward_idx], # dataset[:, reward_idx + reward_dim:-1], # axis=1) # r = dataset[:, reward_idx] sast, r = split_data_for_fqi(dataset, state_dim, action_dim, reward_dim) fqi_iterations = mdp.horizon # this is usually less than the horizon fqi = FQI(estimator=regressor, state_dim=state_dim, action_dim=action_dim, discrete_actions=discrete_actions,
from ifqi.models.regressor import Regressor from ifqi.models.mlp import MLP from ifqi.models.linear import Ridge from ifqi.algorithms.pbo.pbo import PBO """ Simple script to quickly run pbo. It solves the LQG environment. """ mdp = envs.LQG1D() state_dim, action_dim, reward_dim = envs.get_space_info(mdp) reward_idx = state_dim + action_dim discrete_actions = np.linspace(-8, 8, 20) dataset = evaluation.collect_episodes(mdp, n_episodes=100) check_dataset(dataset, state_dim, action_dim, reward_dim) sast, r = split_data_for_fqi(dataset, state_dim, action_dim, reward_dim) ### Q REGRESSOR ########################## class LQG_Q(): def __init__(self): self.w = np.array([1., 0.]) def predict(self, sa): k, b = self.w #print(k,b) return - b * b * sa[:, 0] * sa[:, 1] - 0.5 * k * sa[:, 1] ** 2 - 0.4 * k * sa[:, 0] ** 2 def get_weights(self): return self.w
dest="INDEPENDENT", default=False, help="Independent.") op.add_option("--activ", default="tanh", dest="ACTIVATION", type="str", help="NN activation") (opts, args) = op.parse_args() # np.random.seed(6652) mdp = envs.LQG1D() # mdp.seed(2897270658018522815) state_dim, action_dim, reward_dim = envs.get_space_info(mdp) reward_idx = state_dim + action_dim discrete_actions = np.linspace(-8, 8, 20) dataset = evaluation.collect_episodes(mdp, n_episodes=100) check_dataset(dataset, state_dim, action_dim, reward_dim) INCREMENTAL = opts.INCREMENTAL ACTIVATION = opts.ACTIVATION STEPS_AHEAD = opts.STEPS_HEAD UPDATE_EVERY = opts.UPDATE_EVERY INDEPENDENT = opts.INDEPENDENT EPOCH = opts.EPOCH NORM_VALUE = np.inf print('INCREMENTAL: {}'.format(INCREMENTAL)) print('ACTIVATION: {}'.format(ACTIVATION)) print('STEPS_AHEAD: {}'.format(STEPS_AHEAD)) print('UPDATE_EVERY: {}'.format(UPDATE_EVERY)) print('INDEPENDENT: {}'.format(INDEPENDENT)) print('NORM_VALUE: {}'.format(NORM_VALUE))
'output_scaled': False } discrete_actions = mdp.action_space.values # ExtraTrees regressor = Regressor(ExtraTreesRegressor, **regressor_params) # Action regressor of Ensemble of ExtraTreesEnsemble # regressor = Ensemble(ExtraTreesRegressor, **regressor_params) regressor = ActionRegressor(regressor, discrete_actions=discrete_actions, tol=5, **regressor_params) dataset = evaluation.collect_episodes(mdp, n_episodes=1000) check_dataset(dataset, state_dim, action_dim, reward_dim) # this is just a # check, it can be removed in experiments print('Dataset has %d samples' % dataset.shape[0]) # reward_idx = state_dim + action_dim # sast = np.append(dataset[:, :reward_idx], # dataset[:, reward_idx + reward_dim:-1], # axis=1) # r = dataset[:, reward_idx] sast, r = split_data_for_fqi(dataset, state_dim, action_dim, reward_dim) fqi_iterations = mdp.horizon # this is usually less than the horizon fqi = FQI(estimator=regressor, state_dim=state_dim, action_dim=action_dim, discrete_actions=discrete_actions,