def decode_action(self, a_m, a_t, state, mode): if mode == "max_probability": a_m = np.argmax(a_m) a_t = np.argmax(a_t) elif mode == "sample": #a_m += 0.01 a_m /= a_m.sum() a_m = np.random.choice(range(3), p=a_m) #a_t += 0.01 a_t /= a_t.sum() a_t = np.random.choice(range(3), p=a_t) action = Action() if a_m == 0: # left action.v_n = -1.0 elif a_m == 1: # ahead action.v_t = +1.0 elif a_m == 2: # right action.v_n = +1.0 if a_t == 0: # left action.angular = +1.0 elif a_t == 1: # stay action.angular = 0.0 elif a_t == 2: # right action.angular = -1.0 if state.detect: action.shoot = +1.0 else: action.shoot = 0.0 return action