def decode_action(self, a_m, a_t, state, mode): if mode == "max_probability": a_m = np.argmax(a_m) a_t = np.argmax(a_t) elif mode == "sample": #a_m += 0.01 a_m /= a_m.sum() a_m = np.random.choice(range(3), p=a_m) #a_t += 0.01 a_t /= a_t.sum() a_t = np.random.choice(range(3), p=a_t) action = Action() if a_m == 0: # left action.v_n = -1.0 elif a_m == 1: # ahead action.v_t = +1.0 elif a_m == 2: # right action.v_n = +1.0 if a_t == 0: # left action.angular = +1.0 elif a_t == 1: # stay action.angular = 0.0 elif a_t == 2: # right action.angular = -1.0 if state.detect: action.shoot = +1.0 else: action.shoot = 0.0 return action
def select_action(self, state: RobotState): action = Action() pos = state.pos vel = state.velocity angle = state.angle if state.detect: action.shoot = +1.0 else: action.shoot = 0.0 if ((pos[0]-self.target[0])**2 + (pos[1]-self.target[1])**2 < 0.1): self.index = random.choice(self.connected[self.index]) self.target = self.avaiable_pos[self.index] #print(self.target) #self.index = (self.index + 1) % len(self.path) #self.target = self.path[self.index] v, omega = self.move.moveTo(pos, vel, angle, self.target) action.v_t = v[0] action.v_n = v[1] action.omega = omega return action