def choose_action(self, env, state): if self.room != state.r: self.room = state.r self.plan = None pos = (state.x, state.y) if self.plan is None or pos not in self.plan: priority_func = lambda s: manhattan_dist(s, env.goal) expand_partial = lambda s: expand_state(env.states, s) self.plan = search.best_first_graph_search(pos, env.goal, priority_func, expand_partial) for i, pathpos in enumerate(self.plan): if i == len(self.plan)-1: #print 'choose random option in door plan' return np.array([choice(env.actions)]) elif pos == pathpos: fx,fy = self.plan[i+1] dx,dy = (fx-state.x, fy-state.y) #print 'move in direction',dx,dy,'for door plan' return np.array([env.movemap[dx,dy]])
def choose_action_parameterized(self, env, state, field, action): if self.room != state.r: self.room = state.r self.plan = None pos = (state.x, state.y) if self.plan is None or pos not in self.plan: key_pos = filter_states(env.states, field) # Not positive if this is right move here. if not key_pos: #print 'RANDOM action for plan', action return np.array([choice(env.actions)]) priority_func = lambda s: manhattan_dist(s, key_pos[0]) expand_partial = lambda s: expand_state(env.states, s) self.plan = search.best_first_graph_search(pos, key_pos[0], priority_func, expand_partial) for i, pathpos in enumerate(self.plan): if i == len(self.plan)-1: #print 'action', action, 'for plan', field return np.array([action]) elif pos == pathpos: fx,fy = self.plan[i+1] dx,dy = (fx-state.x, fy-state.y) #print 'move', dx,dy, 'for plan', field return np.array([env.movemap[dx,dy]])