def set_room(self, room): assert 0 <= room < len(self.rooms) self.room = room roomarr = self.rooms[room] self.states = roomarr.copy() self.goal = filter_states(self.states, self.field.DOOR)[0] self.initPos = filter_states(self.states, self.field.INIT) self.lock_pos = filter_states(self.states, self.field.LOCK)[0] key_pos = filter_states(self.states, self.field.KEY) self.key_pos = key_pos[0] if key_pos else None self.agent_holding_key = False
def perform_action(self, state, action): assert action.shape == (1,) action = action[0] # in this case action i s pos2 = (state.x, state.y) a = self.actions reward = self.step_reward if action in [a.N, a.S, a.E, a.W]: delta = self.movemap[action] pos2 = tuple(map(sum,zip(pos2, delta))) if self.forbidden_pos(pos2): pos2 = (state.x,state.y) if pos2 == self.goal: state = State(*[0]*17)._replace(x=pos2[0], y=pos2[1]) if not self.episode_finished(state): self.set_room(self.room + 1) reward = self.task_reward state2 = self.new_state() pos2 = (state2.x, state2.y) else: reward = self.final_reward elif action == a.G: if self.states[pos2] == self.field.KEY: self.agent_holding_key = True self.states[pos2] = self.field.EMPTY reward = self.task_reward elif action == a.P: if (self.states[pos2] == self.field.LOCK and not filter_states(self.states, self.field.KEY)): self.agent_holding_key = False self.states[self.goal] = self.field.EMPTY #self.states[self.states == self.field.DOOR] = self.field.EMPTY reward = self.task_reward return (self.calculate_state(pos2), reward)
def choose_action_parameterized(self, env, state, field, action): if self.room != state.r: self.room = state.r self.plan = None pos = (state.x, state.y) if self.plan is None or pos not in self.plan: key_pos = filter_states(env.states, field) # Not positive if this is right move here. if not key_pos: #print 'RANDOM action for plan', action return np.array([choice(env.actions)]) priority_func = lambda s: manhattan_dist(s, key_pos[0]) expand_partial = lambda s: expand_state(env.states, s) self.plan = search.best_first_graph_search(pos, key_pos[0], priority_func, expand_partial) for i, pathpos in enumerate(self.plan): if i == len(self.plan)-1: #print 'action', action, 'for plan', field return np.array([action]) elif pos == pathpos: fx,fy = self.plan[i+1] dx,dy = (fx-state.x, fy-state.y) #print 'move', dx,dy, 'for plan', field return np.array([env.movemap[dx,dy]])