Esempio n. 1
0
    def decode_action(self, a_m, a_t, state, mode):
        if mode == "max_probability":
            a_m = np.argmax(a_m)
            a_t = np.argmax(a_t)
        elif mode == "sample":
            #a_m += 0.01
            a_m /= a_m.sum()
            a_m = np.random.choice(range(3), p=a_m)
            #a_t += 0.01
            a_t /= a_t.sum()
            a_t = np.random.choice(range(3), p=a_t)

        action = Action()
        if a_m == 0:  # left
            action.v_n = -1.0
        elif a_m == 1:  # ahead
            action.v_t = +1.0
        elif a_m == 2:  # right
            action.v_n = +1.0

        if a_t == 0:  # left
            action.angular = +1.0
        elif a_t == 1:  # stay
            action.angular = 0.0
        elif a_t == 2:  # right
            action.angular = -1.0

        if state.detect:
            action.shoot = +1.0
        else:
            action.shoot = 0.0

        return action
Esempio n. 2
0
    def select_action(self, state: RobotState):
        action = Action()
        pos = state.pos
        vel = state.velocity
        angle = state.angle
        if state.detect:
            action.shoot = +1.0
        else:
            action.shoot = 0.0

        if ((pos[0]-self.target[0])**2 + (pos[1]-self.target[1])**2 < 0.1):
            self.index = random.choice(self.connected[self.index])
            self.target = self.avaiable_pos[self.index]
            #print(self.target)
            #self.index = (self.index + 1) % len(self.path)
            #self.target = self.path[self.index]

        v, omega = self.move.moveTo(pos, vel, angle, self.target)
        action.v_t = v[0]
        action.v_n = v[1]
        action.omega = omega
        return action