def init(self, taskspec): super(BodyMotionAgent, self).init(taskspec) ts = TaskSpecParser(taskspec) if ts.valid: extra = ts.get_extra() v = ['FEATUREREP', 'STATESPERDIM', 'STATEDESCR', 'ACTIONDESCR', 'COPYRIGHT'] pos = [] for i, id_ in enumerate(list(v)): try: pos.append(extra.index(id_)) except: v.remove(id_) sorted_v = sorted(zip(pos, v)) v = [s[1] for s in sorted_v] for i, id_ in enumerate(v): val = ts.get_value(i, extra, v) if id_ == 'FEATUREREP': self._feature_rep = val if self._feature_rep == 'larm': def map_state_key(key): return { "x": 0, "y": 1, "z": 2, }[key] def map_action_key(key): return { "dx": 0, "dy": 1, "dz": 2, }[key] else: def map_state_key(key): return { "x": 0, "y": 1, "z": 2, "wx": 3, "wy": 4, "wz": 5, }[key] def map_action_key(key): return { "dx": 0, "dy": 1, "dz": 2, "dwx": 3, "dwy": 4, "dwz": 5 }[key] MDPState.key_to_index = staticmethod(map_state_key) MDPAction.key_to_index = staticmethod(map_action_key)
def init(self, taskspec): ts = TaskSpecParser(taskspec) if not ts.valid: raise TaskSpecError('TaskSpec Error: Invalid task spec version') _, maxval = ts.get_reward_range() extra = ts.get_extra() v = ['OBSDESCR', 'ACTDESCR', 'COPYRIGHT'] pos = [] for i, id_ in enumerate(list(v)): try: pos.append(extra.index(id_)) except ValueError: v.remove(id_) sorted_v = sorted(zip(pos, v)) act_desc = {} for i, (_, id_) in enumerate(sorted_v): val = ts.get_value(i, extra, v) if id_ == 'OBSDESCR': pass elif id_ == 'ACTDESCR': act_desc = eval(val) obs = ts.get_double_obs() dimensions = [1.0] * ts.get_num_int_obs() dimensions += (1.0 / np.asarray(zip(*obs)[1] - np.asarray(zip(*obs)[0]))).tolist() MDPState.set_feature_limits(obs) act_limits = ts.get_int_act() act_limits += ts.get_double_act() discrete_dim = ts.get_num_int_act() assert (discrete_dim > 0) continuous_dim = ts.get_num_double_act() assert (continuous_dim == 0) if discrete_dim > 1: min_ = list(zip(*act_limits)[0]) max_ = (np.asarray(list(zip(*act_limits)[1])) + 1).tolist() actions = [range(*a) for a in zip(min_, max_)] import itertools act = list(itertools.product(*actions)) else: act = act_limits[0][:] act[1] += 1 bb = self._config['modelbreadth'] * self._config['modelbreadth'] maxd = np.sqrt(-bb * np.log(self._config['minweight'])) feature_metadata = {} feature_names = ['state', 'act', 'delta_state'] for i, n in enumerate(feature_names): feature_metadata[n] = { 'type': 'float', 'is_index': False if n == 'delta_state' else True, 'order': i, } try: feature_metadata[n]['retrieval_method'] = self._config['retrieval'][n]['method'] except KeyError: continue try: feature_metadata[n]['retrieval_method_params'] = self._config['retrieval'][n]['method_params'] except KeyError: pass else: if n == 'state': if isinstance(feature_metadata[n]['retrieval_method_params'], dict): if 'scale' not in feature_metadata[n]['retrieval_method_params']: feature_metadata[n]['retrieval_method_params']['scale'] = dimensions else: feature_metadata[n]['retrieval_method_params'] = listify( feature_metadata[n]['retrieval_method_params']) feature_metadata[n]['retrieval_method_params'][0] = maxd if len(feature_metadata[n]['retrieval_method_params']) <= 1: feature_metadata[n]['retrieval_method_params'].insert(0, dimensions) model_approximator = CasmlApproximator(feature_metadata, self._config['minfraction'], dimensions, lambda x: np.exp(-x * x / bb), self._config['model_tau'], self._config['model_sigma'], self._config['n_components'], self._config['n_iter']) actions = [] for i in range(*act): actions.append( MDPAction.create(i, self._config['explorationthreshold'], maxval, model_approximator, name=act_desc[i] if i in act_desc else None, feature_limits=act_limits)) # shuffle(actions) actions = [actions[0], actions[1], actions[2]] # def inclusion(approximator, state): # try: # approx = approximator._queries[state] # except KeyError: # approx = approximator.Approximation(approximator, state, approximator._kernelfn) # # return len(approx._weights) <= 0 or approx._neighbors[0][0] > self._config['value_tau'] # # value_approximator = KernelApproximator(0.01, maxd, dimensions, lambda x: np.exp(-x ** 2), inclusion) value_approximator = InterpolationApproximator(self._config['resolutionfactor'], dimensions) mdp = MDP(actions, value_approximator) planner = PrioritizedSweeping(mdp, lambda x: False, lambda x: 0, self._config['discountfactor'], self._config['epsilon']) self._learner = Learner(planner)
def init(self, taskspec): ts = TaskSpecParser(taskspec) if not ts.valid: raise TaskSpecError('TaskSpec Error: Invalid task spec version') _, maxval = ts.get_reward_range() extra = ts.get_extra() v = ['OBSDESCR', 'ACTDESCR', 'COPYRIGHT'] pos = [] for i, id_ in enumerate(list(v)): try: pos.append(extra.index(id_)) except ValueError: v.remove(id_) sorted_v = sorted(zip(pos, v)) act_desc = {} for i, (_, id_) in enumerate(sorted_v): val = ts.get_value(i, extra, v) if id_ == 'OBSDESCR': pass elif id_ == 'ACTDESCR': act_desc = eval(val) obs = ts.get_double_obs() dimensions = [1.0] * ts.get_num_int_obs() dimensions += (1.0 / np.asarray(zip(*obs)[1] - np.asarray(zip(*obs)[0]))).tolist() MDPState.set_feature_limits(obs) act_limits = ts.get_int_act() act_limits += ts.get_double_act() discrete_dim = ts.get_num_int_act() assert (discrete_dim > 0) continuous_dim = ts.get_num_double_act() assert (continuous_dim == 0) if discrete_dim > 1: min_ = list(zip(*act_limits)[0]) max_ = (np.asarray(list(zip(*act_limits)[1])) + 1).tolist() actions = [range(*a) for a in zip(min_, max_)] import itertools act = list(itertools.product(*actions)) else: act = act_limits[0][:] act[1] += 1 bb = self._config['modelbreadth'] * self._config['modelbreadth'] maxd = np.sqrt(-bb * np.log(self._config['minweight'])) kernelfn = lambda x: np.exp(-x * x / bb) def model_inclusion(approximator, state, delta): try: approx = approximator._queries[state] except KeyError: approx = approximator.Approximation(approximator, state, approximator._kernelfn) do_add = True for _, s, d in approx._neighbors: delta_error = np.linalg.norm(d - delta) if delta_error <= self._config['model_sigma']: # At least one of the cases in the case base correctly estimated the query case, # the query case does not add any new information, do not add. do_add = False break do_add = do_add or approx._neighbors[0][0] > self._config['model_tau'] return do_add actions = [] for i in range(*act): model_approximator = CsmlApproximator(self._config['minfraction'], maxd, dimensions, kernelfn, self._config['n_components'], self._config['n_iter'], model_inclusion) actions.append( MDPAction.create(i, self._config['explorationthreshold'], maxval, model_approximator, name=act_desc[i] if i in act_desc else None, feature_limits=act_limits)) # shuffle(actions) actions = [actions[0], actions[1], actions[2]] # def value_inclusion(approximator, state): # try: # approx = approximator._queries[state] # except KeyError: # approx = approximator.Approximation(approximator, state, approximator._kernelfn) # # return len(approx._weights) <= 0 or approx._neighbors[0][0] > self._config['value_tau'] # # value_approximator = KernelApproximator(0.01, maxd, dimensions, lambda x: np.exp(-x ** 2), value_inclusion) value_approximator = InterpolationApproximator(self._config['resolutionfactor'], dimensions) mdp = MDP(actions, value_approximator) planner = PrioritizedSweeping(mdp, lambda x: False, lambda x: 0, self._config['discountfactor'], self._config['epsilon']) self._learner = Learner(planner)
def init(self, taskspec): super(PenaltyKickAgent, self).init(taskspec) ts = TaskSpecParser(taskspec) if ts.valid: extra = ts.get_extra() v = ['FEATUREREP', 'SUPPORTLEG', 'STATEDESCR', 'ACTIONDESCR', 'COPYRIGHT'] pos = [] for i, id_ in enumerate(list(v)): try: pos.append(extra.index(id_)) except: v.remove(id_) sorted_v = sorted(zip(pos, v)) v = [s[1] for s in sorted_v] for i, id_ in enumerate(v): val = ts.get_value(i, extra, v) if id_ == 'FEATUREREP': self._feature_rep = val if id_ == 'SUPPORTLEG': if val == 'left': self._ankle_roll = "RAnkleRoll" self._hip_roll = "RHipRoll" min_hip_roll, max_hip_roll = NaoWorldModel().get_robot_info(self._hip_roll) leg_length = NaoWorldModel().get_robot_info("TibiaLength") + NaoWorldModel().get_robot_info("ThighLength") MDPState.dtype = MDPState.DTYPE_INT if self._feature_rep == 'rl': try: max_location = NaoWorldModel().get_object("ball").resolution[0] except AttributeError: max_location = 0 MDPState.set_minmax_features([0, max_location], [math.floor(leg_length * math.sin(min_hip_roll)), math.ceil(leg_length * math.sin(max_hip_roll))]) MDPState.set_states_per_dim([int((MDPState.max_features[0] - MDPState.min_features[0]) / 2), int(math.ceil((MDPState.max_features[1] - MDPState.min_features[1]) / 4))]) # noinspection PyShadowingNames def is_valid(self): real_state = True if MDPState.min_features is not None: for (feature, min_feature, max_feature) in zip(self, MDPState.min_features, MDPState.max_features): if feature < (min_feature - eps) or feature > (max_feature + eps): real_state = False self._logger.debug("\t\t\t\tNext state is not valid (feature %d out of range)", feature) break return real_state MDPState.is_valid = is_valid else: MDPState.set_minmax_features([math.floor(leg_length * math.sin(min_hip_roll)), math.ceil(leg_length * math.sin(max_hip_roll))]) MDPState.set_nfeatures( int(math.ceil((MDPState.max_features - MDPState.min_features + 1) / self._bin_width))) # noinspection PyShadowingNames def is_valid(self): num_ones = len(np.where(self.get()[0:len(self)] == 1)[0]) if num_ones > 1 or num_ones < 1 or not all(i == 0 or i == 1 for i in self.get()): return False return True # noinspection PyShadowingNames def encode(self): return np.where(self.get()[0:len(self)] == 1)[0] def decode(cls, state_repr): decoded = [0] * cls.nfeatures bin_num = 0 if isinstance(state_repr[0], int): bin_num = state_repr[0] elif isinstance(state_repr[0], float): bin_num = int(math.floor((state_repr[0] - cls.min_features) / self._bin_width)) if 0 <= bin_num <= cls.nfeatures - 1: decoded[bin_num] = 1 elif bin_num < 0: decoded[0] = 1 else: decoded[cls.nfeatures - 1] = 1 return cls(decoded) MDPState.is_valid = is_valid MDPState.encode = encode MDPState.decode = decode
def init(self, taskspec): """Initializes the agent. Parameters ---------- taskspec : str The task specification. """ ts = TaskSpecParser(taskspec) if not ts.valid: raise TaskSpecError('TaskSpec Error: Invalid task spec version') _, maxval = ts.get_reward_range() extra = ts.get_extra() v = ['OBSDESCR', 'ACTDESCR', 'COPYRIGHT'] pos = [] for i, id_ in enumerate(list(v)): try: pos.append(extra.index(id_)) except ValueError: v.remove(id_) sorted_v = sorted(zip(pos, v)) act_desc = {} for i, (_, id_) in enumerate(sorted_v): val = ts.get_value(i, extra, v) if id_ == 'OBSDESCR': pass elif id_ == 'ACTDESCR': act_desc = eval(val) obs = ts.get_double_obs() dimensions = [1.0] * ts.get_num_int_obs() dimensions += (1.0 / np.asarray(zip(*obs)[1] - np.asarray(zip(*obs)[0]))).tolist() MDPState.set_feature_limits(obs) act_limits = ts.get_int_act() act_limits += ts.get_double_act() discrete_dim = ts.get_num_int_act() assert (discrete_dim > 0) continuous_dim = ts.get_num_double_act() assert (continuous_dim == 0) if discrete_dim > 1: min_ = list(zip(*act_limits)[0]) max_ = (np.asarray(list(zip(*act_limits)[1])) + 1).tolist() actions = [range(*a) for a in zip(min_, max_)] import itertools act = list(itertools.product(*actions)) else: act = act_limits[0][:] act[1] += 1 bb = self._config['modelbreadth'] * self._config['modelbreadth'] maxd = np.sqrt(-bb * np.log(self._config['minweight'])) kernelfn = lambda x: np.exp(-x * x / bb) actions = [] for i in range(*act): model_approximator = KernelApproximator(self._config['minfraction'], maxd, dimensions, kernelfn) actions.append( MDPAction.create(i, self._config['explorationthreshold'], maxval, model_approximator, name=act_desc[i] if i in act_desc else None, feature_limits=act_limits)) # shuffle(actions) actions = [actions[0], actions[1], actions[2]] value_approximator = InterpolationApproximator(self._config['resolutionfactor'], dimensions) mdp = MDP(actions, value_approximator) planner = PrioritizedSweeping(mdp, lambda x: False, lambda x: 0, self._config['discountfactor'], self._config['epsilon']) self._learner = Learner(planner)