state_dim=state_dim, action_dim=action_dim, discrete_actions=discrete_actions, gamma=config['fqi']['gamma'], horizon=config['fqi']['horizon'], verbose=config['fqi']['verbose']) fit_params = config['fit_params'] if config['experiment_setting']['evaluation']['metric'] == 'n_episodes': for i in config['experiment_setting']['evaluation']['n_episodes']: episode_end_idxs = np.argwhere(dataset[:, -1] == 1).ravel() last_el = episode_end_idxs[i - 1] sast, r = split_data_for_fqi(dataset, state_dim, action_dim, reward_dim, last_el + 1) fqi.fit(sast, r, **fit_params) experiment_results.append(evaluate(mdp, fqi, mdp.initial_states, args)) results.append(experiment_results) elif config['experiment_setting']['evaluation']['metric'] == 'fqi_iteration': sast, r = split_data_for_fqi(dataset, state_dim, action_dim, reward_dim) fqi.partial_fit(sast, r, **fit_params) for i in range(2, fqi.horizon + 1): fqi.partial_fit(None, None, **fit_params) if not i % config['experiment_setting']['evaluation']['n_steps_to_evaluate']: experiment_results.append(evaluate(mdp, fqi, mdp.initial_states, args)) results.append(experiment_results) else:
class EpsilonFQI: def __init__(self, fqi, fe, epsilon=0.05): """ Creates an epsilon-greedy policy from the given FQI policy object. :param fqi: an FQI instance from the ifqi package :param fe: a feature extractor (method s_features(x) is expected) :param epsilon: exploration rate for the policy (0 <= epsilon <= 1) """ self.epsilon = epsilon self.fqi = None self.actions = None self.fe = None self.load_fe(fe) if isinstance(fqi, dict): self.fqi = FQI(**fqi) self.actions = fqi['discrete_actions'] else: self.load_fqi(fqi) def fit(self, sast, r, **kwargs): """ Fits the policy on the given sast, r dataset, for the amounts of iterations defined when creating FQI. :param sast: a dataset of (state, action, state, terminal) transitions :param r: the rewards corresponding to the transitions in sast :param kwargs: additional arguments for the fit() function of FQI """ self.fqi.fit(sast, r, **kwargs) def partial_fit(self, sast=None, r=None, **kwargs): """ Fits the policy on the given sast, r dataset, for one iteration. :param sast: a dataset of (state, action, state, terminal) transitions :param r: the rewards corresponding to the transitions in sast :param kwargs: additional arguments for the partial_fit() function of FQI """ self.fqi.partial_fit(sast, r, **kwargs) def draw_action(self, state, absorbing, evaluation=False, fully_deterministic=False): """ Picks an action according to the epsilon-greedy choice :param state: a state :param absorbing: bool, whether the state is absorbing :param evaluation: bool, whether to use the epsilon defined for evaluation :param fully_deterministic: whether to use FQI, deterministically, to select the action :return: the selected action """ if not fully_deterministic and random() <= self.epsilon: return choice(self.actions) else: preprocessed_state = self.fe.s_features(state) return self.fqi.draw_action(preprocessed_state, absorbing, evaluation=evaluation) def set_epsilon(self, epsilon): """ :param epsilon: the exploration rate to use """ self.epsilon = epsilon def get_epsilon(self): """ :return: the current exploration rate """ return self.epsilon def load_fqi(self, fqi): """ Loads an FQI policy from file, sets the action space accordingly :param fqi: str or file-like object from which to load the policy """ if isinstance(fqi, str): self.fqi = joblib.load(fqi) else: self.fqi = fqi # Set the correct action space self.actions = self.fqi._actions def save_fqi(self, filename): """ Saves the FQI object to file :param filename: filename to which save the model """ joblib.dump(self.fqi, filename) def load_fe(self, fe): """ Loads the feature extractor from file :param fe: str or file-like object from which to load the model """ if isinstance(fe, str): self.fe = joblib.load(fe) self.fe.load(fe) else: self.fe = fe def save_fe(self, filename): """ Saves the feature extractor to file :param filename: filename to which save the model """ if hasattr(self.fe, 'save'): self.fe.save(filename) else: joblib.dump(self.fe, filename)