Exemple #1
0
# Action regressor of Ensemble of ExtraTreesEnsemble
# regressor = Ensemble(ExtraTreesRegressor, **regressor_params)
regressor = ActionRegressor(regressor, discrete_actions=discrete_actions,
                            tol=5, **regressor_params)

dataset = evaluation.collect_episodes(mdp, n_episodes=1000)
check_dataset(dataset, state_dim, action_dim, reward_dim) # this is just a
# check, it can be removed in experiments
print('Dataset has %d samples' % dataset.shape[0])

# reward_idx = state_dim + action_dim
# sast = np.append(dataset[:, :reward_idx],
#                  dataset[:, reward_idx + reward_dim:-1],
#                  axis=1)
# r = dataset[:, reward_idx]
sast, r = split_data_for_fqi(dataset, state_dim, action_dim, reward_dim)

fqi_iterations = mdp.horizon  # this is usually less than the horizon
fqi = FQI(estimator=regressor,
          state_dim=state_dim,
          action_dim=action_dim,
          discrete_actions=discrete_actions,
          gamma=mdp.gamma,
          horizon=fqi_iterations,
          verbose=True)

fit_params = {}
# fit_params = {
#     "n_epochs": 300,
#     "batch_size": 50,
#     "validation_split": 0.1,
Exemple #2
0
                        ]  # Assuming monodimensional, discrete action space
    action_idx = header.index('A0')
    selected_states = header[:action_idx]  # All states were selected
    selected_actions_values = np.unique(reduced_dataset[action_idx])

print('Reduced dataset has %d samples' % reduced_dataset.shape[0])
print('Selected states: %s' % selected_states)
print('Selected actions: %s' % selected_actions)

### FQI ###
if args.fqi:
    # Dataset parameters for FQI
    selected_states_dim = len(selected_states)
    selected_actions_dim = 1  # Assuming monodimensional, discrete action space
    # Split dataset for FQI
    sast, r = split_data_for_fqi(reduced_dataset, selected_states_dim,
                                 selected_actions_dim, reward_dim)

    # Action regressor of ExtraTreesRegressor for FQI
    fqi_regressor_params = {
        'n_estimators': 50,
        'criterion': 'mse',
        'min_samples_split': 5,
        'min_samples_leaf': 2,
        'input_scaled': False,
        'output_scaled': False,
        'n_jobs': args.njobs
    }
    regressor = Regressor(regressor_class=ExtraTreesRegressor,
                          **fqi_regressor_params)
    regressor = ActionRegressor(regressor,
                                discrete_actions=selected_actions_values,
Exemple #3
0
from ifqi.models.mlp import MLP
from ifqi.models.linear import Ridge
from ifqi.algorithms.pbo.pbo import PBO

"""
Simple script to quickly run pbo. It solves the LQG environment.

"""

mdp = envs.LQG1D()
state_dim, action_dim, reward_dim = envs.get_space_info(mdp)
reward_idx = state_dim + action_dim
discrete_actions = np.linspace(-8, 8, 20)
dataset = evaluation.collect_episodes(mdp, n_episodes=100)
check_dataset(dataset, state_dim, action_dim, reward_dim)
sast, r = split_data_for_fqi(dataset, state_dim, action_dim, reward_dim)

### Q REGRESSOR ##########################
class LQG_Q():
    def __init__(self):
        self.w = np.array([1., 0.])

    def predict(self, sa):
        k, b = self.w
        #print(k,b)
        return - b * b * sa[:, 0] * sa[:, 1] - 0.5 * k * sa[:, 1] ** 2 - 0.4 * k * sa[:, 0] ** 2

    def get_weights(self):
        return self.w

    def get_k(self, omega):
Exemple #4
0
    selected_actions = ['A0']  # Assuming monodimensional, discrete action space
    action_idx = header.index('A0')
    selected_states = header[:action_idx]  # All states were selected
    selected_actions_values = np.unique(reduced_dataset[action_idx])

print('Reduced dataset has %d samples' % reduced_dataset.shape[0])
print('Selected states: %s' % selected_states)
print('Selected actions: %s' % selected_actions)

### FQI ###
if args.fqi:
    # Dataset parameters for FQI
    selected_states_dim = len(selected_states)
    selected_actions_dim = 1  # Assuming monodimensional, discrete action space
    # Split dataset for FQI
    sast, r = split_data_for_fqi(reduced_dataset, selected_states_dim, selected_actions_dim, reward_dim)

    # Action regressor of ExtraTreesRegressor for FQI
    fqi_regressor_params = {'n_estimators': 50,
                            'criterion': 'mse',
                            'min_samples_split': 5,
                            'min_samples_leaf': 2,
                            'input_scaled': False,
                            'output_scaled': False,
                            'n_jobs': args.njobs}
    regressor = Regressor(regressor_class=ExtraTreesRegressor,
                          **fqi_regressor_params)
    regressor = ActionRegressor(regressor,
                                discrete_actions=selected_actions_values,
                                tol=0.5,
                                **fqi_regressor_params)