Esempio n. 1
0
    selected_actions_dim = 1  # Assuming monodimensional, discrete action space
    # Split dataset for FQI
    sast, r = split_data_for_fqi(reduced_dataset, selected_states_dim,
                                 selected_actions_dim, reward_dim)

    # Action regressor of ExtraTreesRegressor for FQI
    fqi_regressor_params = {
        'n_estimators': 50,
        'criterion': 'mse',
        'min_samples_split': 5,
        'min_samples_leaf': 2,
        'input_scaled': False,
        'output_scaled': False,
        'n_jobs': args.njobs
    }
    regressor = Regressor(regressor_class=ExtraTreesRegressor,
                          **fqi_regressor_params)
    regressor = ActionRegressor(regressor,
                                discrete_actions=selected_actions_values,
                                tol=0.5,
                                **fqi_regressor_params)

    # Create FQI model
    fqi_params = {
        'estimator': regressor,
        'state_dim': selected_states_dim,
        'action_dim': selected_actions_dim,
        'discrete_actions': selected_actions_values,
        'gamma': mdp.gamma,
        'horizon': args.iterations,
        'verbose': True
    }
Esempio n. 2
0
    def get_weights(self):
        return self.w

    def get_k(self, omega):
        b = omega[:, 0]
        k = omega[:, 1]
        return - b * b / k

    def set_weights(self, w):
        self.w = np.array(w)

    def count_params(self):
        return self.w.size

q_regressor_params = dict()
q_regressor = Regressor(LQG_Q, **q_regressor_params)

ACTIVATION = 'sigmoid'
INCREMENTAL = False
### F_RHO REGRESSOR ######################
n_q_regressors_weights = q_regressor._regressor.count_params()
rho_regressor_params = {'n_input': n_q_regressors_weights,
                        'n_output': n_q_regressors_weights,
                        'hidden_neurons': [20],
                        'init': 'uniform',
                        'loss': 'mse',
                        'activation': ACTIVATION,
                        'optimizer': 'rmsprop',
                        'metrics': ['accuracy'],
                        'input_scaled': 1}
rho_regressor = Regressor(MLP, **rho_regressor_params)
Esempio n. 3
0
 def _generate_model(self, iteration):
     return Regressor(self._regressor_class, **self._regr_args)
Esempio n. 4
0
from ifqi.algorithms.lspi import LSPI
from ifqi.envs.utils import get_space_info
from ifqi.evaluation import evaluation
from ifqi.evaluation.utils import check_dataset, split_data_for_fqi
from ifqi.models.linear import Linear
from ifqi.models.regressor import Regressor

mdp = env.CarOnHill()
state_dim, action_dim, reward_dim = get_space_info(mdp)
nextstate_idx = state_dim + action_dim + reward_dim
reward_idx = action_dim + state_dim

# dataset: s, a, r, s'
dataset = evaluation.collect_episodes(mdp, n_episodes=500)
check_dataset(dataset, state_dim, action_dim, reward_dim)

regressor_params = dict(features=dict(name='poly', params=dict(degree=5)))
regressor = Regressor(Linear, **regressor_params)
lspi = LSPI(regressor, state_dim, action_dim, mdp.action_space.values,
            mdp.gamma)

sast, r = split_data_for_fqi(dataset, state_dim, action_dim, reward_dim)

lspi.fit(sast, r)

values = evaluation.evaluate_policy(mdp,
                                    lspi,
                                    initial_states=mdp.initial_states)

print(values)
Esempio n. 5
0
mdp = envs.CarOnHill()
state_dim, action_dim, reward_dim = envs.get_space_info(mdp)
assert reward_dim == 1
regressor_params = {
    'n_estimators': 50,
    'criterion': 'mse',
    'min_samples_split': 5,
    'min_samples_leaf': 2,
    'input_scaled': False,
    'output_scaled': False
}
discrete_actions = mdp.action_space.values

# ExtraTrees
regressor = Regressor(ExtraTreesRegressor, **regressor_params)

# Action regressor of Ensemble of ExtraTreesEnsemble
# regressor = Ensemble(ExtraTreesRegressor, **regressor_params)
regressor = ActionRegressor(regressor,
                            discrete_actions=discrete_actions,
                            tol=5,
                            **regressor_params)

dataset = evaluation.collect_episodes(mdp, n_episodes=1000)
check_dataset(dataset, state_dim, action_dim, reward_dim)  # this is just a
# check, it can be removed in experiments
print('Dataset has %d samples' % dataset.shape[0])

# reward_idx = state_dim + action_dim
# sast = np.append(dataset[:, :reward_idx],
Esempio n. 6
0
    seed = None
else:
    seed = config['experiment_setting']['evaluation']['seed']
mdp.seed(seed)
state_dim, action_dim, reward_dim = envs.get_space_info(mdp)
assert reward_dim == 1
reward_idx = state_dim + action_dim
discrete_actions = mdp.action_space.values

# Load model
regressor_params = config['model']['params']
regressor_class = get_model(config['model']['name'])
if config['model']['ensemble']:
    regressor = Ensemble(regressor_class, **regressor_params)
else:
    regressor = Regressor(regressor_class, **regressor_params)
if not config['model']['fit_actions']:
    regressor = ActionRegressor(regressor, discrete_actions=discrete_actions,
                                tol=5, **regressor_params)

results = list()
# Run
for e in range(config['experiment_setting']['evaluation']['n_experiments']):
    print('Experiment: %d' % (e + 1))
    experiment_results = list()

    # Load dataset
    dataset = evaluation.collect_episodes(
        mdp, n_episodes=np.sort(config['experiment_setting']['evaluation']
                                ['n_episodes'])[-1])
    print('Dataset has %d samples' % dataset.shape[0])
Esempio n. 7
0
class LQG_Q():
    def __init__(self, theta):
        self.theta = theta

    def predict(self, sa, **opt_pars):
        if 'f_rho' in opt_pars:
            k, b = opt_pars['f_rho']
        else:
            k, b = self.theta
        return b * sa[:, 1]**2 - (sa[:, 1] - k * sa[:, 0])**2


theta = np.array([1., 0.])
regressor_params = {'theta': theta}
regressor = Regressor(LQG_Q, **regressor_params)

pbo = PBO(estimator=regressor,
          state_dim=state_dim,
          action_dim=action_dim,
          discrete_actions=discrete_actions,
          gamma=mdp.gamma,
          horizon=mdp.horizon,
          features=None,
          verbose=True)

epsilon = 1e-5
delta = np.inf

theta, _ = pbo.fit(sast, r)
while delta > epsilon: