def T(self, state, action): if action is None: return [(0.0, state)] else: return [(0.8, self.go(state, action)), (0.1, self.go(state, turn_right(action))), (0.1, self.go(state, turn_left(action)))]
def T(self, state, action): if action is None: return [(0.0, state)] else: # Updating to the transition matrix from the question2. return [(0.7, self.go(state, action)), (0.15, self.go(state, turn_right(action))), (0.15, self.go(state, turn_left(action)))]
def T(self, state, action): # print("state:{0},a:{1},ra:{2},la:{3}".format(state,action,turn_right(action),turn_left(action))) if action is None: #アクションが取られなかった時、そのまま。 return [(0.0, state)] else: # アクションが取られた時、 # 行きたい方向に0.8、その左右に0.1の確率で遷移する。 list1 = [] acts = [action, turn_right(action), turn_left(action)] pros = [0.8, 0.1, 0.1] for (a, p) in zip(acts, pros): if self.state_check([x + y for (x, y) in zip(state, a)]): list1.append((p, self.go(state, a))) return list1