def reset(self): MazeTask.reset(self) goUp = choice([True, False]) self.specialObs = goUp if goUp: self.env.goal = (3, self.length + 1) else: self.env.goal = (1, self.length + 1)
def performAction(self, action): poss = [] for a in range(self.actions): if action - a % 4 != 2: poss.append(a) if random() < self.stochAction * len(poss): MazeTask.performAction(self, choice(poss)) else: MazeTask.performAction(self, action)
def __init__(self, **args): self.initPos = [(2, 1)] self.setArgs(**args) columns = [[1] * 5] for dummy in range(self.length): columns.append([1, 1, 0, 1, 1]) columns.append([1, 0, 0, 0, 1]) columns.append([1] * 5) self.topology = array(columns).T MazeTask.__init__(self, **args)
def getReward(self): if self.env.perseus[1] == self.length + 1: if abs(self.env.perseus[0] - self.env.goal[0]) == 2: # bad choice taken self.env.perseus = self.env.goal return self.bangPenalty return MazeTask.getReward(self)
def reset(self): MazeTask.reset(self) self.env.perseusDir = 1
def getReward(self): if self.bad: return self.minReward else: return MazeTask.getReward(self)
def reset(self): MazeTask.reset(self) self.bad = False