def step(self, action):
        roll, pitch, yaw, thrust = action[0], action[1], action[2], action[3]

        # update hunter
        # dronesim.simcontrol([roll, pitch, yaw, thrust])
        dronesim.simrun(
            int(1e9 / self.fps),
            [roll, pitch, yaw, thrust])  #transform from second to nanoseconds

        # update state
        self.state = self.get_state()

        # calculate reward and check if done
        reward = (
            self.previous_distance - self.distance
        ) * 500  # even if chasing at max speed, the reward will be 10 / 100 * 500 = 50
        self.previous_distance = self.distance

        done = False
        reason = None
        is_in_view = [
            (self.min_relative_x < coordinate[0] < self.max_relative_x
             and self.min_relative_y < coordinate[1] < self.max_relative_y)
            for coordinate in self.coordinate_queue
        ]
        '''
        if True not in is_in_view:
            done = True
            reward = 0
            self.episodes += 1
            self.iteration = 0
        '''
        if self.distance > self.max_detect_distance or self.distance < self.min_detect_distance or self.iteration > 100000:
            done = True
            reward = 0
            self.episodes += 1
            self.iteration = 0
            if self.distance < self.min_detect_distance:
                reward = 200
                reason = 1
            if self.distance > self.max_detect_distance:
                reason = 2
            if self.iteration > 100000:
                reason = 3

        # control parameter
        self.iteration += 1

        return self.state, reward, done, {
            'distance': self.distance,
            'reason': reason
        }
    def step(self, action):
        roll, pitch, yaw, thrust = action[0], action[1], action[2], action[3]

        if self.iteration % 50 == 0:
            self.roll_target = min(
                max(self.roll_target + 0.33 * np.random.randn(), -1), 1)
            self.pitch_target = min(
                max(self.pitch_target + 0.33 * np.random.randn(), -1), 1)
            self.yaw_target = min(
                max(self.yaw_target + 0.33 * np.random.randn(), -1), 1)
            self.thrust_target = min(
                max(self.thrust_target + 0.33 * np.random.randn(), -1), 1)

        # update hunter
        # dronesim.simcontrol([roll, pitch, yaw, thrust], [self.roll_target, self.pitch_target, self.yaw_target, self.thrust_target])

        dronesim.simrun(int(1e9 / self.fps), [roll, pitch, yaw, thrust], [
            self.roll_target, self.pitch_target, self.yaw_target,
            self.thrust_target
        ])  #transform from second to nanoseconds

        #####################

        self.test_roll_hunter = roll
        self.test_pitch_hunter = pitch
        self.test_yaw_hunter = yaw
        self.test_thrust_hunter = thrust

        #####################

        # update state
        self.state = self.get_state()

        # calculate reward and check if done
        reward = (
            self.previous_distance - self.distance
        ) * 1000  # even if chasing at max speed, the reward will be 10 / 100 * 500 = 50
        self.previous_distance = self.distance

        done = False
        is_in_view = [
            (self.min_relative_x < coordinate[0] < self.max_relative_x
             and self.min_relative_y < coordinate[1] < self.max_relative_y)
            if coordinate[0] != -1 else False
            for coordinate in self.coordinate_queue
        ]
        if True not in is_in_view:
            done = True
            reward = 0
            self.episodes += 1
            self.iteration = 0

        if self.coordinate_queue[-1][0] != -1:
            error_x, error_y = abs(self.coordinate_queue[-1][0] -
                                   0.5), abs(self.coordinate_queue[-1][1] -
                                             0.5)
            # reward = reward - 20*error_x - 10*error_y - 20*(error_x*error_y) # so the largest possible punishment is 20
            reward = reward - 300 * np.linalg.norm(
                [error_x / self.distance, error_y / self.distance])
            # reward = -100 * np.linalg.norm([error_x/self.distance, error_y/self.distance, (1/self.distance - 1)/5])
        else:
            reward = -100

        if self.distance > self.max_detect_distance or self.distance < self.min_detect_distance or self.iteration > self.max_iteration:
            done = True
            reward = 0
            self.episodes += 1
            self.iteration = 0
            if self.distance < self.min_detect_distance: reward = 200
            if self.iteration > self.max_iteration: reward = 0

        if self.flag:
            done = True
            reward = 0
            self.iteration = 0

        # control parameter
        self.iteration += 1

        return self.state, reward, done, {'distance': self.distance}
    def step(self, action):
        roll, pitch, yaw, thrust = action[0], action[1], action[2], action[3]
        # print('norm: ', roll, pitch, yaw)
        # _, ori_hunter, _, pos_target, ori_target, _, _ = dronesim.siminfo()

        # roll_target, pitch_target, yaw_target = ori_target[0], ori_target[1], ori_target[2]
        # print('target: ', roll_target, pitch_target, yaw_target)
        # print('hunter: ', ori_hunter[0], ori_hunter[1], ori_hunter[2])
        # print('diff: ', ori_hunter[0] - roll_target, ori_hunter[1] - pitch_target, ori_hunter[2] - yaw_target)
        if self.iteration % 50 == 0:
            self.roll_target = min(
                max(self.roll_target + 0.33 * np.random.randn(), -1), 1)
            self.pitch_target = min(
                max(self.pitch_target + 0.33 * np.random.randn(), -1), 1)
            self.yaw_target = min(
                max(self.yaw_target + 0.33 * np.random.randn(), -1), 1)
            self.thrust_target = min(
                max(self.thrust_target + 0.33 * np.random.randn(), -1), 1)
        # print('norm_target: ', self.roll_target, self.pitch_target, self.yaw_target)

        # update hunter
        # dronesim.simcontrol([roll, pitch, yaw, thrust], [self.roll_target, self.pitch_target, self.yaw_target, self.thrust_target])
        dronesim.simrun(int(1e9 / self.fps), [roll, pitch, yaw, thrust], [
            self.roll_target, self.pitch_target, self.yaw_target,
            self.thrust_target
        ])  #transform from second to nanoseconds

        # update state
        self.state = self.get_state()

        # calculate reward and check if done
        reward = (
            self.previous_distance - self.distance
        ) * 1000  # even if chasing at max speed, the reward will be 10 / 100 * 500 = 50
        self.previous_distance = self.distance

        # print('reward: ', reward)

        done = False
        reason = None
        is_in_view = [
            (self.min_relative_x < coordinate[0] < self.max_relative_x
             and self.min_relative_y < coordinate[1] < self.max_relative_y)
            if coordinate[0] != -1 else False
            for coordinate in self.coordinate_queue
        ]
        '''
        if True not in is_in_view:
            done = True
            reward = 0
            self.episodes += 1
            self.iteration = 0
        '''
        # print('reward of first term: ', reward)
        # print(self.coordinate_queue[-1])
        if self.coordinate_queue[-1][0] != -1:
            error_x, error_y = abs(self.coordinate_queue[-1][0] -
                                   0.5), abs(self.coordinate_queue[-1][1] -
                                             0.5)
            # reward = reward - 20*error_x - 10*error_y - 20*(error_x*error_y) # so the largest possible punishment is 20
            reward = reward - 200 * np.linalg.norm(
                [error_x / self.distance, error_y / self.distance])
        else:
            reward = -100

        if self.distance > self.max_detect_distance * 2 or self.distance < self.min_detect_distance or self.iteration > 100000:
            done = True
            reward = 0
            self.episodes += 1
            self.iteration = 0
            if self.distance < self.min_detect_distance:
                reward = 200
                reason = 1
            if self.distance > self.max_detect_distance:
                reason = 2
            if self.iteration > 100000:
                reason = 3

        # control parameter
        self.iteration += 1
        # print('reward: ', reward)
        return self.state, reward, done, {
            'distance': self.distance,
            'reason': reason
        }