def init(self, taskspec): ts = TaskSpecParser(taskspec) if not ts.valid: raise TaskSpecError('TaskSpec Error: Invalid task spec version') _, maxval = ts.get_reward_range() extra = ts.get_extra() v = ['OBSDESCR', 'ACTDESCR', 'COPYRIGHT'] pos = [] for i, id_ in enumerate(list(v)): try: pos.append(extra.index(id_)) except ValueError: v.remove(id_) sorted_v = sorted(zip(pos, v)) act_desc = {} for i, (_, id_) in enumerate(sorted_v): val = ts.get_value(i, extra, v) if id_ == 'OBSDESCR': pass elif id_ == 'ACTDESCR': act_desc = eval(val) obs = ts.get_double_obs() dimensions = [1.0] * ts.get_num_int_obs() dimensions += (1.0 / np.asarray(zip(*obs)[1] - np.asarray(zip(*obs)[0]))).tolist() MDPState.set_feature_limits(obs) act_limits = ts.get_int_act() act_limits += ts.get_double_act() discrete_dim = ts.get_num_int_act() assert (discrete_dim > 0) continuous_dim = ts.get_num_double_act() assert (continuous_dim == 0) if discrete_dim > 1: min_ = list(zip(*act_limits)[0]) max_ = (np.asarray(list(zip(*act_limits)[1])) + 1).tolist() actions = [range(*a) for a in zip(min_, max_)] import itertools act = list(itertools.product(*actions)) else: act = act_limits[0][:] act[1] += 1 bb = self._config['modelbreadth'] * self._config['modelbreadth'] maxd = np.sqrt(-bb * np.log(self._config['minweight'])) kernelfn = lambda x: np.exp(-x * x / bb) def model_inclusion(approximator, state, delta): try: approx = approximator._queries[state] except KeyError: approx = approximator.Approximation(approximator, state, approximator._kernelfn) do_add = True for _, s, d in approx._neighbors: delta_error = np.linalg.norm(d - delta) if delta_error <= self._config['model_sigma']: # At least one of the cases in the case base correctly estimated the query case, # the query case does not add any new information, do not add. do_add = False break do_add = do_add or approx._neighbors[0][0] > self._config['model_tau'] return do_add actions = [] for i in range(*act): model_approximator = CsmlApproximator(self._config['minfraction'], maxd, dimensions, kernelfn, self._config['n_components'], self._config['n_iter'], model_inclusion) actions.append( MDPAction.create(i, self._config['explorationthreshold'], maxval, model_approximator, name=act_desc[i] if i in act_desc else None, feature_limits=act_limits)) # shuffle(actions) actions = [actions[0], actions[1], actions[2]] # def value_inclusion(approximator, state): # try: # approx = approximator._queries[state] # except KeyError: # approx = approximator.Approximation(approximator, state, approximator._kernelfn) # # return len(approx._weights) <= 0 or approx._neighbors[0][0] > self._config['value_tau'] # # value_approximator = KernelApproximator(0.01, maxd, dimensions, lambda x: np.exp(-x ** 2), value_inclusion) value_approximator = InterpolationApproximator(self._config['resolutionfactor'], dimensions) mdp = MDP(actions, value_approximator) planner = PrioritizedSweeping(mdp, lambda x: False, lambda x: 0, self._config['discountfactor'], self._config['epsilon']) self._learner = Learner(planner)
def init(self, taskspec): ts = TaskSpecParser(taskspec) if not ts.valid: raise TaskSpecError('TaskSpec Error: Invalid task spec version') _, maxval = ts.get_reward_range() extra = ts.get_extra() v = ['OBSDESCR', 'ACTDESCR', 'COPYRIGHT'] pos = [] for i, id_ in enumerate(list(v)): try: pos.append(extra.index(id_)) except ValueError: v.remove(id_) sorted_v = sorted(zip(pos, v)) act_desc = {} for i, (_, id_) in enumerate(sorted_v): val = ts.get_value(i, extra, v) if id_ == 'OBSDESCR': pass elif id_ == 'ACTDESCR': act_desc = eval(val) obs = ts.get_double_obs() dimensions = [1.0] * ts.get_num_int_obs() dimensions += (1.0 / np.asarray(zip(*obs)[1] - np.asarray(zip(*obs)[0]))).tolist() MDPState.set_feature_limits(obs) act_limits = ts.get_int_act() act_limits += ts.get_double_act() discrete_dim = ts.get_num_int_act() assert (discrete_dim > 0) continuous_dim = ts.get_num_double_act() assert (continuous_dim == 0) if discrete_dim > 1: min_ = list(zip(*act_limits)[0]) max_ = (np.asarray(list(zip(*act_limits)[1])) + 1).tolist() actions = [range(*a) for a in zip(min_, max_)] import itertools act = list(itertools.product(*actions)) else: act = act_limits[0][:] act[1] += 1 bb = self._config['modelbreadth'] * self._config['modelbreadth'] maxd = np.sqrt(-bb * np.log(self._config['minweight'])) feature_metadata = {} feature_names = ['state', 'act', 'delta_state'] for i, n in enumerate(feature_names): feature_metadata[n] = { 'type': 'float', 'is_index': False if n == 'delta_state' else True, 'order': i, } try: feature_metadata[n]['retrieval_method'] = self._config['retrieval'][n]['method'] except KeyError: continue try: feature_metadata[n]['retrieval_method_params'] = self._config['retrieval'][n]['method_params'] except KeyError: pass else: if n == 'state': if isinstance(feature_metadata[n]['retrieval_method_params'], dict): if 'scale' not in feature_metadata[n]['retrieval_method_params']: feature_metadata[n]['retrieval_method_params']['scale'] = dimensions else: feature_metadata[n]['retrieval_method_params'] = listify( feature_metadata[n]['retrieval_method_params']) feature_metadata[n]['retrieval_method_params'][0] = maxd if len(feature_metadata[n]['retrieval_method_params']) <= 1: feature_metadata[n]['retrieval_method_params'].insert(0, dimensions) model_approximator = CasmlApproximator(feature_metadata, self._config['minfraction'], dimensions, lambda x: np.exp(-x * x / bb), self._config['model_tau'], self._config['model_sigma'], self._config['n_components'], self._config['n_iter']) actions = [] for i in range(*act): actions.append( MDPAction.create(i, self._config['explorationthreshold'], maxval, model_approximator, name=act_desc[i] if i in act_desc else None, feature_limits=act_limits)) # shuffle(actions) actions = [actions[0], actions[1], actions[2]] # def inclusion(approximator, state): # try: # approx = approximator._queries[state] # except KeyError: # approx = approximator.Approximation(approximator, state, approximator._kernelfn) # # return len(approx._weights) <= 0 or approx._neighbors[0][0] > self._config['value_tau'] # # value_approximator = KernelApproximator(0.01, maxd, dimensions, lambda x: np.exp(-x ** 2), inclusion) value_approximator = InterpolationApproximator(self._config['resolutionfactor'], dimensions) mdp = MDP(actions, value_approximator) planner = PrioritizedSweeping(mdp, lambda x: False, lambda x: 0, self._config['discountfactor'], self._config['epsilon']) self._learner = Learner(planner)
def init(self, taskspec): """Initializes the agent. Parameters ---------- taskspec : str The task specification. """ ts = TaskSpecParser(taskspec) if not ts.valid: raise TaskSpecError('TaskSpec Error: Invalid task spec version') _, maxval = ts.get_reward_range() extra = ts.get_extra() v = ['OBSDESCR', 'ACTDESCR', 'COPYRIGHT'] pos = [] for i, id_ in enumerate(list(v)): try: pos.append(extra.index(id_)) except ValueError: v.remove(id_) sorted_v = sorted(zip(pos, v)) act_desc = {} for i, (_, id_) in enumerate(sorted_v): val = ts.get_value(i, extra, v) if id_ == 'OBSDESCR': pass elif id_ == 'ACTDESCR': act_desc = eval(val) obs = ts.get_double_obs() dimensions = [1.0] * ts.get_num_int_obs() dimensions += (1.0 / np.asarray(zip(*obs)[1] - np.asarray(zip(*obs)[0]))).tolist() MDPState.set_feature_limits(obs) act_limits = ts.get_int_act() act_limits += ts.get_double_act() discrete_dim = ts.get_num_int_act() assert (discrete_dim > 0) continuous_dim = ts.get_num_double_act() assert (continuous_dim == 0) if discrete_dim > 1: min_ = list(zip(*act_limits)[0]) max_ = (np.asarray(list(zip(*act_limits)[1])) + 1).tolist() actions = [range(*a) for a in zip(min_, max_)] import itertools act = list(itertools.product(*actions)) else: act = act_limits[0][:] act[1] += 1 bb = self._config['modelbreadth'] * self._config['modelbreadth'] maxd = np.sqrt(-bb * np.log(self._config['minweight'])) kernelfn = lambda x: np.exp(-x * x / bb) actions = [] for i in range(*act): model_approximator = KernelApproximator(self._config['minfraction'], maxd, dimensions, kernelfn) actions.append( MDPAction.create(i, self._config['explorationthreshold'], maxval, model_approximator, name=act_desc[i] if i in act_desc else None, feature_limits=act_limits)) # shuffle(actions) actions = [actions[0], actions[1], actions[2]] value_approximator = InterpolationApproximator(self._config['resolutionfactor'], dimensions) mdp = MDP(actions, value_approximator) planner = PrioritizedSweeping(mdp, lambda x: False, lambda x: 0, self._config['discountfactor'], self._config['epsilon']) self._learner = Learner(planner)