Esempio n. 1
0
 def T(self, state, action):
     if action is None:
         return [(0.0, state)]
     else:
         return [(0.8, self.go(state, action)),
                 (0.1, self.go(state, turn_right(action))),
                 (0.1, self.go(state, turn_left(action)))]
Esempio n. 2
0
 def T(self, state, action):
     if action is None:
         return [(0.0, state)]
     else:
         return [(0.8, self.go(state, action)),
                 (0.1, self.go(state, turn_right(action))),
                 (0.1, self.go(state, turn_left(action)))]
Esempio n. 3
0
 def T(self, state, action):
     if action is None:
         return [(0.0, state)]
     else:
         # Updating to the transition matrix from the question2.
         return [(0.7, self.go(state, action)),
                 (0.15, self.go(state, turn_right(action))),
                 (0.15, self.go(state, turn_left(action)))]
Esempio n. 4
0
 def T(self, state, action):
     # print("state:{0},a:{1},ra:{2},la:{3}".format(state,action,turn_right(action),turn_left(action)))
     if action is None:
         #アクションが取られなかった時、そのまま。
         return [(0.0, state)]
     else:
         # アクションが取られた時、
         # 行きたい方向に0.8、その左右に0.1の確率で遷移する。
         list1 = []
         acts = [action, turn_right(action), turn_left(action)]
         pros = [0.8, 0.1, 0.1]
         for (a, p) in zip(acts, pros):
             if self.state_check([x + y for (x, y) in zip(state, a)]):
                 list1.append((p, self.go(state, a)))
         return list1