'criterion': 'mse', 'min_samples_split': 5, 'min_samples_leaf': 2, 'input_scaled': False, 'output_scaled': False} discrete_actions = mdp.action_space.values # ExtraTrees regressor = Regressor(ExtraTreesRegressor, **regressor_params) # Action regressor of Ensemble of ExtraTreesEnsemble # regressor = Ensemble(ExtraTreesRegressor, **regressor_params) regressor = ActionRegressor(regressor, discrete_actions=discrete_actions, tol=5, **regressor_params) dataset = evaluation.collect_episodes(mdp, n_episodes=1000) check_dataset(dataset, state_dim, action_dim, reward_dim) # this is just a # check, it can be removed in experiments print('Dataset has %d samples' % dataset.shape[0]) # reward_idx = state_dim + action_dim # sast = np.append(dataset[:, :reward_idx], # dataset[:, reward_idx + reward_dim:-1], # axis=1) # r = dataset[:, reward_idx] sast, r = split_data_for_fqi(dataset, state_dim, action_dim, reward_dim) fqi_iterations = mdp.horizon # this is usually less than the horizon fqi = FQI(estimator=regressor, state_dim=state_dim, action_dim=action_dim,
**regressor_params) # Load FQI fqi = FQI(estimator=regressor, state_dim=state_dim, action_dim=action_dim, discrete_actions=discrete_actions, gamma=config['fqi']['gamma'], horizon=config['fqi']['horizon'], features=config['fqi']['features'], verbose=config['fqi']['verbose']) fit_params = config['fit_params'] # Load dataset dataset = evaluation.collect_episodes( mdp, n_episodes=config['experiment_setting']['evaluation']['n_episodes'][-1]) print('Dataset has %d samples' % dataset.shape[0]) # Load initial state to start evaluation episodes. This is the only setting # to be chosen outside the configuration file. # IF MULTIPLE EXPERIMENTS ARE TO BE PERFORMED STARTING FROM THE SAME # INITIAL STATE, USE AN ARRAY WITH THE SAME INITIAL STATE REPEATED FOR THE # DESIRED NUMBER OF EVALUATION RUNS. initial_states = np.zeros((41, 4)) initial_states[:, 0] = np.linspace(-2, 2, 41) ###################################################################### ###################################################################### experiment_results = list() results = list()
from ifqi.evaluation.utils import check_dataset, split_data_for_fqi from ifqi.models.regressor import Regressor from ifqi.models.mlp import MLP from ifqi.models.linear import Ridge from ifqi.algorithms.pbo.pbo import PBO """ Simple script to quickly run pbo. It solves the LQG environment. """ mdp = envs.LQG1D() state_dim, action_dim, reward_dim = envs.get_space_info(mdp) reward_idx = state_dim + action_dim discrete_actions = np.linspace(-8, 8, 20) dataset = evaluation.collect_episodes(mdp, n_episodes=100) check_dataset(dataset, state_dim, action_dim, reward_dim) sast, r = split_data_for_fqi(dataset, state_dim, action_dim, reward_dim) ### Q REGRESSOR ########################## class LQG_Q(): def __init__(self): self.w = np.array([1., 0.]) def predict(self, sa): k, b = self.w #print(k,b) return - b * b * sa[:, 0] * sa[:, 1] - 0.5 * k * sa[:, 1] ** 2 - 0.4 * k * sa[:, 0] ** 2 def get_weights(self): return self.w
regressor = Ensemble(regressor_class, **regressor_params) else: regressor = Regressor(regressor_class, **regressor_params) if not config['model']['fit_actions']: regressor = ActionRegressor(regressor, discrete_actions=discrete_actions, tol=5, **regressor_params) results = list() # Run for e in range(config['experiment_setting']['evaluation']['n_experiments']): print('Experiment: %d' % (e + 1)) experiment_results = list() # Load dataset dataset = evaluation.collect_episodes( mdp, n_episodes=np.sort(config['experiment_setting']['evaluation'] ['n_episodes'])[-1]) print('Dataset has %d samples' % dataset.shape[0]) # Load FQI fqi = FQI(estimator=regressor, state_dim=state_dim, action_dim=action_dim, discrete_actions=discrete_actions, gamma=config['fqi']['gamma'], horizon=config['fqi']['horizon'], verbose=config['fqi']['verbose']) fit_params = config['fit_params'] if config['experiment_setting']['evaluation']['metric'] == 'n_episodes': for i in config['experiment_setting']['evaluation']['n_episodes']: