Python NMSの例、utils.NMS Pythonの例

コード例 #1

0

ファイルを表示

 def detect(self, image, threshold, anchors):
     image_data = self.trans(image).to(self.device)
     image_data = image_data.unsqueeze(dim=0)
     # yolov3
     output_13, output_26, output_52 = self.net(image_data)
     output_13 = output_13.cpu().detach()
     output_26 = output_26.cpu().detach()
     output_52 = output_52.cpu().detach()
     indexs_13, outputs_13 = self.filter(output_13, threshold)
     boxes_13 = self.backToImage(indexs_13, outputs_13, anchors[13], 32)
     indexs_26, outputs_26 = self.filter(output_26, threshold)
     boxes_26 = self.backToImage(indexs_26, outputs_26, anchors[26], 16)
     indexs_52, outputs_52 = self.filter(output_52, threshold)
     boxes_52 = self.backToImage(indexs_52, outputs_52, anchors[52], 8)
     boxes_all = torch.cat((boxes_13, boxes_26, boxes_52), dim=0)
     # 做NMS删除重叠框
     result_box = []
     if boxes_all.shape[0] == 0:
         return boxes_all
     else:
         # 只根据前4个类别进行nms,只适用于训练"data/garbage_img"路径下的图片
         for i in range(4):
             # for i in range(10):
             boxes_nms = boxes_all[boxes_all[:, 5] == i]
             if boxes_nms.size(0) > 0:
                 result_box.extend(NMS(boxes_nms, 0.3, 2))
         return torch.stack(result_box)

コード例 #2

0

ファイルを表示

ファイル: mtnet.py プロジェクト: PankajMehar/computer-vision-1

    def o_onet(self, img, bboxes):

        h, w, c = img.shape
        bboxes = change_box(bboxes)
        bboxes[:, 0:4] = np.round(bboxes[:, 0:4])

        [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = pad(bboxes, w, h)
        crop_img = np.zeros((bboxes.shape[0], 48, 48, 3), dtype=np.float32)
        for i in range(bboxes.shape[0]):
            tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
            tmp[dy[i]:edy[i], dx[i]:edx[i], :] = img[y[i]:ey[i], x[i]:ex[i], :]
            crop_img[i, :, :, :] = (cv2.resize(tmp, (48, 48)) - 127.5) / 128
        scores, box, landmark = self.load_onet(crop_img)
        scores = scores[:, 1]
        keep_index = np.where(scores > 0.7)[0]
        bboxes = bboxes[keep_index]
        bboxes[:, 4] = scores[keep_index]
        box = box[keep_index]
        landmark = landmark[keep_index]

        w = bboxes[:, 2] - bboxes[:, 0]
        h = bboxes[:, 3] - bboxes[:, 1]
        landmark[:, 0::2] = (np.tile(w, (5, 1)) * landmark[:, 0::2].T +
                             np.tile(bboxes[:, 0], (5, 1))).T
        landmark[:, 1::2] = (np.tile(h, (5, 1)) * landmark[:, 1::2].T +
                             np.tile(bboxes[:, 1], (5, 1))).T

        res_boxes = calibrate_box(bboxes, box)

        nms = NMS(res_boxes, 0.6)
        res_boxes = res_boxes[nms]
        landmark = landmark[nms]

        return res_boxes, landmark

コード例 #3

0

ファイルを表示

ファイル: test2.py プロジェクト: huangbo1221/mtcnn_opencv

    def detect_face(self,img):
        pnet_boxes = self.pnet_detect(img,self.thresh[0])
        if pnet_boxes is None:
            return None,None
        pnet_num_boxes = pnet_boxes.shape[0]
        iters = int(np.ceil(pnet_num_boxes / self.batch_size))
        total_rnet_boxes = []
        for i in range(iters):
            start = i * self.batch_size
            end = min(start + self.batch_size, pnet_num_boxes)
            tmp = self.rnet_detect(img, pnet_boxes[start:end], self.thresh[1])
            if len(total_rnet_boxes) == 0 and len(tmp) > 0:
                total_rnet_boxes = tmp
            elif len(total_rnet_boxes) > 0 and len(tmp) > 0:
                total_rnet_boxes = np.concatenate((total_rnet_boxes, tmp),axis=0)
        if len(total_rnet_boxes) == 0:
            return None,None
        rnet_sorted_total_boxes = total_rnet_boxes[np.lexsort(-total_rnet_boxes.T)]  # 按最后一列排序
        rnet_nms_boxes,_ = NMS(rnet_sorted_total_boxes, 0.4, 'm')
        rnet_reg_boxes = BoxRegression(rnet_nms_boxes)
        rnet_pad_boxes = BBoxPadSquare(rnet_reg_boxes, img.shape[1], img.shape[0])

        rnet_num_boxes = rnet_pad_boxes.shape[0]
        iters = int(np.ceil(rnet_num_boxes / self.batch_size))
        total_onet_boxes = []
        total_landmarks = []
        for i in range(iters):
            start = i * self.batch_size
            end = min(start + self.batch_size, rnet_num_boxes)
            tmp_box,tmp_landmarks = self.onet_detect(img, rnet_pad_boxes[start:end], self.thresh[2])
            if len(total_onet_boxes) == 0 and len(tmp_box) > 0:
                total_onet_boxes = tmp_box
                total_landmarks = tmp_landmarks
            elif len(total_onet_boxes) > 0 and len(tmp_box) > 0:
                total_onet_boxes = np.concatenate((total_onet_boxes, tmp_box), axis=0)
                total_landmarks = np.concatenate((total_landmarks, tmp_landmarks), axis=0)
        if len(total_onet_boxes) == 0:
            return None,None
        onet_reg_boxes = BoxRegression(total_onet_boxes)
        onet_nms_boxes,valid_index = NMS(onet_reg_boxes,0.4, 'm')
        total_landmarks = total_landmarks[valid_index < 1]
        onet_pad_boxes = BBoxPad(onet_nms_boxes, img.shape[1], img.shape[0])
        return onet_pad_boxes,total_landmarks

コード例 #4

0

ファイルを表示

ファイル: test2.py プロジェクト: huangbo1221/mtcnn_opencv

    def pnet_detect(self,img,thresh):
        scales = self.generate_scales(img)
        h,w = img.shape[:2]
        pnet_boxes = []
        for idx in range(len(scales)):
            ws = int(np.ceil(w * scales[idx]))
            hs = int(np.ceil(h * scales[idx]))
            resized_img = cv2.resize(img,(ws,hs),0,0,cv2.INTER_LINEAR)
            blob = cv2.dnn.blobFromImage(resized_img, 1.0 / 255.0, None, (0, 0, 0), False)

            self.pnet.setInput(blob)
            detections = self.pnet.forward(["conv4-2", "prob1"])
            reg = np.squeeze(detections[0])
            score = np.squeeze(detections[1][:, 1, :, :])

            score_h, score_w = score.shape
            total_boxes = []
            for i in range(score_h):
                for j in range(score_w):
                    if score[i, j] < 1 - 0.6999:
                        tmp = []
                        xmin = j * 2 / scales[idx]
                        ymin = i * 2 / scales[idx]
                        xmax = (j * 2 + 12 - 1) / scales[idx]
                        ymax = (i * 2 + 12 - 1) / scales[idx]
                        tmp.extend([xmin, ymin, xmax, ymax])
                        tmp.extend(reg[:, i, j])
                        tmp.append(score[i, j])
                        total_boxes.append(tmp)
            if len(pnet_boxes) == 0 and len(total_boxes) > 0:
                pnet_boxes = np.array(total_boxes)
            elif len(pnet_boxes) > 0 and len(total_boxes) > 0:
                pnet_boxes = np.concatenate((pnet_boxes,np.array(total_boxes)),axis=0)
        if len(pnet_boxes) == 0:
            return None
        pnet_sorted_total_boxes = pnet_boxes[np.lexsort(-pnet_boxes.T)]  # 按最后一列排序
        pnet_nms_boxes,_ = NMS(pnet_sorted_total_boxes)
        pnet_reg_boxes = BoxRegression(pnet_nms_boxes)
        pnet_pad_boxes = BBoxPadSquare(pnet_reg_boxes, w, h)
        return pnet_pad_boxes

コード例 #5

0

ファイルを表示

ファイル: mtnet.py プロジェクト: PankajMehar/computer-vision-1

    def p_pent(self, img):

        scale = float(self.pnet_size / self.min_face)
        _img = process_img(img, scale)
        h, w, _ = _img.shape
        all_boxes = []

        while min(h, w) > self.pnet_size:

            # print(_img.shape)
            p_cls, p_box = self.load_pnet(np.expand_dims(_img, axis=0))

            boxes = generate_box(p_cls[:, :, 1], p_box, scale, 0.6)
            scale *= self.factor
            _img = process_img(img, scale)
            h, w, _ = _img.shape

            nms = NMS(boxes[:, :5], 0.5)
            boxes = boxes[nms]

            all_boxes.append(boxes)

        all_boxes = np.vstack(all_boxes)
        # box = all_boxes[:,:5]
        box_w = all_boxes[:, 2] - all_boxes[:, 0]
        box_h = all_boxes[:, 3] - all_boxes[:, 1]

        res_boxes = np.vstack([
            all_boxes[:, 0] + all_boxes[:, 5] * box_w,
            all_boxes[:, 1] + all_boxes[:, 6] * box_h,
            all_boxes[:, 2] + all_boxes[:, 7] * box_w,
            all_boxes[:, 3] + all_boxes[:, 8] * box_h, all_boxes[:, 4]
        ])  #[5,NUM]  --->  [NUM,5]

        print(res_boxes.shape)
        res_boxes = res_boxes.T
        print(res_boxes.shape)

        return res_boxes

コード例 #6

0

ファイルを表示

ファイル: mtnet.py プロジェクト: PankajMehar/computer-vision-1

    def p_rent(self, img, bboxes):

        h, w, _ = img.shape
        bboxes = change_box(bboxes)

        bboxes[:, 0:4] = np.round(bboxes[:, 0:4])

        [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = pad(bboxes, w, h)
        # print(edy - dy == ey - y)

        match_size = np.ones_like(tmpw) * 24
        zeros = np.zeros_like(tmpw)
        ones = np.ones_like(tmpw)
        num = np.sum(
            np.where((np.minimum(tmpw, tmph) >= match_size), ones, zeros))
        _img = np.zeros((num, 24, 24, 3), dtype=np.float32)

        for i in range(num):

            tmp = np.zeros((tmph[i], tmph[i], 3), dtype=np.float32)
            tmp[dy[i]:edy[i], dx[i]:edx[i], :] = img[y[i]:ey[i], x[i]:ex[i], :]
            _img[i, :, :, :] = (cv2.resize(tmp, (24, 24)) - 127.5) / 127.5

        cls_score, box = self.load_rnet(_img)
        cls_score = cls_score[:, 1]
        keep_index = np.where(cls_score > 0.7)[0]
        boxes = bboxes[keep_index]
        boxes[:, 4] = cls_score[keep_index]
        box = box[keep_index]

        nms = NMS(boxes, 0.7)
        boxes = boxes[nms]
        box = box[nms]

        result = calibrate_box(boxes, box)

        return result

コード例 #7

0

ファイルを表示

    def inference(self, image):
        """
        get images list of arbitrary length, separate into small enough 
        batches and doing batch inference
        """
        skip_scale_branch_list = []
        if image.ndim != 3 or image.shape[2] != 3:
            print('Only RGB images are supported.')
            return None
        input_height = self.input_shape[2]
        input_width = self.input_shape[3]
        input_batch = np.zeros((1, input_height, input_width, self.input_shape[1]), dtype=np.float32)
        left_pad = 0
        top_pad = 0
        if image.shape[0] / image.shape[1] > input_height / input_width:
            resize_scale = input_height / image.shape[0]
            input_image = cv2.resize(image, (0, 0), fx=resize_scale, fy=resize_scale)
            left_pad = int((input_width - input_image.shape[1]) / 2)
            input_batch[0, :, left_pad:left_pad + input_image.shape[1], :] = input_image
        else:
            resize_scale = input_width / image.shape[1]
            input_image = cv2.resize(image, (0, 0), fx=resize_scale, fy=resize_scale)
            top_pad = int((input_height - input_image.shape[0]) / 2)
            input_batch[0, top_pad:top_pad + input_image.shape[0], :, :] = input_image

        input_batch = input_batch.transpose([0, 3, 1, 2])
        input_batch = np.array(input_batch, dtype=np.float32, order='C')
        self.inputs[0].host = input_batch

        outputs = common.do_inference(self.context, bindings=self.bindings, inputs=self.inputs, outputs=self.outputs, stream=self.stream, batch_size=self.engine.max_batch_size)
        outputs = [np.squeeze(output.reshape(shape)) for output, shape in zip(outputs, self.output_shapes)]

        bbox_collection = []
        for i in range(self.num_output_scales):
            if i in skip_scale_branch_list:
                continue

            score_map = np.squeeze(outputs[i * 2])
            bbox_map = np.squeeze(outputs[i * 2 + 1])

            RF_center_Xs = np.array([self.receptive_field_center_start[i] + self.receptive_field_stride[i] * x for x in range(score_map.shape[1])])
            RF_center_Xs_mat = np.tile(RF_center_Xs, [score_map.shape[0], 1])
            RF_center_Ys = np.array([self.receptive_field_center_start[i] + self.receptive_field_stride[i] * y for y in range(score_map.shape[0])])
            RF_center_Ys_mat = np.tile(RF_center_Ys, [score_map.shape[1], 1]).T

            x_lt_mat = RF_center_Xs_mat - bbox_map[0, :, :] * self.constant[i]
            y_lt_mat = RF_center_Ys_mat - bbox_map[1, :, :] * self.constant[i]
            x_rb_mat = RF_center_Xs_mat - bbox_map[2, :, :] * self.constant[i]
            y_rb_mat = RF_center_Ys_mat - bbox_map[3, :, :] * self.constant[i]

            x_lt_mat = x_lt_mat
            x_lt_mat[x_lt_mat < 0] = 0
            y_lt_mat = y_lt_mat
            y_lt_mat[y_lt_mat < 0] = 0
            x_rb_mat = x_rb_mat
            x_rb_mat[x_rb_mat > input_width] = input_width
            y_rb_mat = y_rb_mat
            y_rb_mat[y_rb_mat > input_height] = input_height

            select_index = np.where(score_map > self.score_threshold)
            for idx in range(select_index[0].size):
                bbox_collection.append((
                    x_lt_mat[select_index[0][idx], select_index[1][idx]] - left_pad,
                    y_lt_mat[select_index[0][idx], select_index[1][idx]] - top_pad,
                    x_rb_mat[select_index[0][idx], select_index[1][idx]] - left_pad,
                    y_rb_mat[select_index[0][idx], select_index[1][idx]] - top_pad,
                    score_map[select_index[0][idx], select_index[1][idx]]
                ))

        # NMS
        bbox_collection = sorted(bbox_collection, key=lambda item: item[-1], reverse=True)
        if len(bbox_collection) > self.top_k:
            bbox_collection = bbox_collection[0:self.top_k]
        bbox_collection_np = np.array(bbox_collection, dtype=np.float32)
        bbox_collection_np = bbox_collection_np / resize_scale

        s = time.time()
        final_bboxes = NMS(bbox_collection_np, self.NMS_threshold)
        print("NMS time: ", time.time() - s)
        # final_bboxes_ = []
        # for i in range(final_bboxes.shape[0]):
        #     final_bboxes_.append((final_bboxes[i, 0], final_bboxes[i, 1], final_bboxes[i, 2], final_bboxes[i, 3], final_bboxes[i, 4]))
        final_bboxes_ = [
            [
                final_bboxes[i, 0], final_bboxes[i, 1], 
                final_bboxes[i, 2], final_bboxes[i, 3], 
                final_bboxes[i, 4]
            ]
            for i in range(final_bboxes.shape[0])
        ]
        return final_bboxes_

コード例 #8

0

ファイルを表示

ファイル: detect_late_fusion.py プロジェクト: GGQ1996/action_co_localization

    def detect(
        cas_dir,
        subset,
        out_file_name,
        global_score_thrh,
        metric_type,
        thrh_type,
        thrh_value,
        interpolate_type,
        proc_type,
        proc_value,
        sample_offset,
        weight_inner,
        weight_outter,
        weight_global,
        att_filtering_value=None,
    ):

        assert (metric_type in ['score', 'multiply', 'att-filtering'])
        assert (thrh_type in ['mean', 'max'])
        assert (interpolate_type in ['quadratic', 'linear', 'nearest'])
        assert (proc_type in ['dilation', 'median'])

        out_detections = []

        dataset_dict = dataset_dicts[subset]

        for video_name in dataset_dict.keys():

            rgb_weight = 2
            flow_weight = 1
            avg_score, att_weight, branch_scores, global_score = get_late_fusion_cas(
                cas_dir, video_name, rgb_weight, flow_weight)

            duration = dataset_dict[video_name]['duration']
            fps = dataset_dict[video_name]['frame_rate']
            frame_cnt = dataset_dict[video_name]['frame_cnt']

            global_score = softmax(global_score, dim=0)

            ################ Threshoding ################
            for class_id in range(action_class_num):

                if global_score[class_id] <= global_score_thrh:
                    continue

                if metric_type == 'score':

                    # metric = softmax(avg_score, dim=1)[:, class_id:class_id + 1]
                    metric = avg_score[:, class_id:class_id + 1]
                    # metric = smooth(metric)
                    metric = normalize(metric)

                elif metric_type == 'multiply':

                    _score = softmax(avg_score, dim=1)[:,
                                                       class_id:class_id + 1]
                    metric = att_weight * _score
                    # metric = smooth(metric)
                    metric = normalize(metric)

                elif metric_type == 'att-filtering':
                    assert (att_filtering_value is not None)

                    metric = softmax(avg_score, dim=1)[:,
                                                       class_id:class_id + 1]
                    # metric = smooth(metric)
                    metric = normalize(metric)
                    metric[att_weight < att_filtering_value] = 0
                    metric = normalize(metric)

                #########################################

                # print(metric.shape)
                metric = interpolate(metric[:, 0],
                                     feature_type,
                                     frame_cnt,
                                     sample_rate,
                                     snippet_size=base_snippet_size,
                                     kind=interpolate_type)

                # add smooth
                metric = smooth(metric)
                metric = np.expand_dims(metric, axis=1)

                thres_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
                temp_out = []
                for thrh_value in thres_list:
                    mask = detect_with_thresholding(metric, thrh_type,
                                                    thrh_value, proc_type,
                                                    proc_value)

                    temp_out.extend(
                        mask_to_detections(mask, metric, weight_inner,
                                           weight_outter))

                # NMS
                nms_threshold = 0.65
                temp_out = NMS(temp_out, nms_threshold)

                #########################################

                for entry in temp_out:
                    entry[2] = class_id

                    entry[3] += global_score[class_id] * weight_global

                    entry[0] = (entry[0] + sample_offset) / fps
                    entry[1] = (entry[1] + sample_offset) / fps

                    entry[0] = max(0, entry[0])
                    entry[1] = max(0, entry[1])
                    entry[0] = min(duration, entry[0])
                    entry[1] = min(duration, entry[1])

                #########################################

                for entry_id in range(len(temp_out)):
                    temp_out[entry_id] = [video_name] + temp_out[entry_id]

                out_detections += temp_out

        # add soft flag

        soft_flag = True
        if dataset_name == 'thumos14':
            output_detections_thumos14(out_detections, out_file_name)
        elif dataset_name in ['ActivityNet12', 'ActivityNet13']:
            if soft_flag:
                soft_output_detections_anet(out_detections, out_file_name,
                                            dataset_name, feature_type)
            else:
                output_detections_anet(out_detections, out_file_name,
                                       dataset_name, feature_type)

        return out_detections

コード例 #9

0

ファイルを表示

    def detect_image(self, image):

        image = Image.fromarray(image)
        self.sess.run(tf.global_variables_initializer())

        ratio = np.array([image.size[0] / _SIZE, image.size[1] / _SIZE])

        boxed_image, image_shape = resize_image(image, self.input_size)
        inputs = np.array(boxed_image, dtype='float32') / 255.
        inputs = np.expand_dims(inputs, 0)

        boxes, scores = self.sess.run([self.boxes, self.scores],
                                      feed_dict={
                                          self.inputs: inputs,
                                          self.ratio: ratio
                                      })

        mask = scores >= _SCORE_THRESHOLD

        boxes_ = []
        scores_ = []
        classes_ = []

        for Class in range(len(self.class_names)):

            cls_boxes = boxes[np.array(mask[:, Class]), :]
            cls_scores = scores[np.array(mask[:, Class]), Class]

            while cls_boxes.shape[0] != 0:
                cls_boxes, cls_scores, max_box, max_score = NMS(
                    cls_boxes, cls_scores, _IOU_THRESHOLD)
                boxes_.append(max_box)
                scores_.append(max_score)
                classes_.append(np.ones_like(max_score, dtype=int) * Class)

        out_boxes = np.reshape(boxes_, [-1, 4])
        out_scores = np.reshape(scores_, [-1])
        out_classes = np.reshape(classes_, [-1])

        print('Found {} boxes for {}'.format(len(out_boxes), 'img'))

        # Visualisation#################################################################################################

        colors = []
        cls = ''
        color = tuple(np.random.randint(0, 256, 3))
        for i in out_classes:
            if cls != i:
                color = tuple(np.random.randint(0, 256, 3))
                cls = i
                colors.append(color)
            else:
                colors.append(color)

        font = ImageFont.truetype(font='./font/FiraMono-Medium.otf',
                                  size=np.floor(3e-2 * image.size[1] +
                                                0.5).astype(np.int32))
        thickness = (image.size[0] + image.size[1]) // 500  # do day cua BB

        for i, c in list(enumerate(out_classes)):
            predicted_class = self.class_names[c]
            box = out_boxes[i]
            score = out_scores[i]

            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)

            top, left, bottom, right = box
            top = max(0, np.floor(top + 0.5).astype(np.int32))
            left = max(0, np.floor(left + 0.5).astype(np.int32))
            bottom = min(image.size[1],
                         np.floor(bottom + 0.5).astype(np.int32))
            right = min(image.size[0], np.floor(right + 0.5).astype(np.int32))
            print(label, (left, top), (right, bottom))

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for j in range(thickness):
                draw.rectangle([left + j, top + j, right - j, bottom - j],
                               outline=colors[i])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=colors[i])
            draw.text(text_origin, label, fill=(0, 0, 0), font=font)
            del draw

        cv2.imwrite(FLAGS.output_img, np.array(image))

コード例 #10

0

ファイルを表示

    def predict(self,
                image,
                resize_scale=1,
                score_threshold=0.8,
                top_k=100,
                NMS_threshold=0.3,
                NMS_flag=True,
                skip_scale_branch_list=[]):

        if image.ndim != 3 or image.shape[2] != 3:
            print('Only RGB images are supported.')
            return None

        bbox_collection = []

        shorter_side = min(image.shape[:2])
        if shorter_side * resize_scale < 128:
            resize_scale = float(128) / shorter_side

        input_image = cv2.resize(image, (0, 0),
                                 fx=resize_scale,
                                 fy=resize_scale)

        input_image = input_image.astype(dtype=np.float32)
        input_image = input_image[:, :, :, np.newaxis]
        input_image = input_image.transpose([3, 2, 0, 1])

        data_batch = DataBatch()
        data_batch.data = [mx.ndarray.array(input_image, self.ctx)]

        # tic = time.time()
        self.module.forward(data_batch=data_batch, is_train=False)
        results = self.module.get_outputs()
        outputs = []
        for output in results:
            outputs.append(output.asnumpy())
        # toc = time.time()
        # infer_time = (toc - tic) * 1000

        for i in range(self.num_output_scales):
            if i in skip_scale_branch_list:
                continue

            score_map = np.squeeze(outputs[i * 2], (0, 1))

            # score_map_show = score_map * 255
            # score_map_show[score_map_show < 0] = 0
            # score_map_show[score_map_show > 255] = 255
            # cv2.imshow('score_map' + str(i), cv2.resize(score_map_show.astype(dtype=np.uint8), (0, 0), fx=2, fy=2))
            # cv2.waitKey()

            bbox_map = np.squeeze(outputs[i * 2 + 1], 0)

            RF_center_Xs = np.array([
                self.receptive_field_center_start[i] +
                self.receptive_field_stride[i] * x
                for x in range(score_map.shape[1])
            ])
            RF_center_Xs_mat = np.tile(RF_center_Xs, [score_map.shape[0], 1])
            RF_center_Ys = np.array([
                self.receptive_field_center_start[i] +
                self.receptive_field_stride[i] * y
                for y in range(score_map.shape[0])
            ])
            RF_center_Ys_mat = np.tile(RF_center_Ys, [score_map.shape[1], 1]).T

            x_lt_mat = RF_center_Xs_mat - bbox_map[0, :, :] * self.constant[i]
            y_lt_mat = RF_center_Ys_mat - bbox_map[1, :, :] * self.constant[i]
            x_rb_mat = RF_center_Xs_mat - bbox_map[2, :, :] * self.constant[i]
            y_rb_mat = RF_center_Ys_mat - bbox_map[3, :, :] * self.constant[i]

            x_lt_mat = x_lt_mat / resize_scale
            x_lt_mat[x_lt_mat < 0] = 0
            y_lt_mat = y_lt_mat / resize_scale
            y_lt_mat[y_lt_mat < 0] = 0
            x_rb_mat = x_rb_mat / resize_scale
            x_rb_mat[x_rb_mat > image.shape[1]] = image.shape[1]
            y_rb_mat = y_rb_mat / resize_scale
            y_rb_mat[y_rb_mat > image.shape[0]] = image.shape[0]

            select_index = np.where(score_map > score_threshold)
            for idx in range(select_index[0].size):
                bbox_collection.append(
                    (x_lt_mat[select_index[0][idx], select_index[1][idx]],
                     y_lt_mat[select_index[0][idx], select_index[1][idx]],
                     x_rb_mat[select_index[0][idx], select_index[1][idx]],
                     y_rb_mat[select_index[0][idx], select_index[1][idx]],
                     score_map[select_index[0][idx], select_index[1][idx]]))

        # NMS
        bbox_collection = sorted(bbox_collection,
                                 key=lambda item: item[-1],
                                 reverse=True)
        if len(bbox_collection) > top_k:
            bbox_collection = bbox_collection[0:top_k]
        bbox_collection_numpy = np.array(bbox_collection, dtype=np.float32)

        final_bboxes = NMS(bbox_collection_numpy, NMS_threshold)
        final_bboxes_ = []
        for i in range(final_bboxes.shape[0]):
            # bbox: (x1, y1, x2, y2, score, -1)
            final_bboxes_.append([
                final_bboxes[i, 0], final_bboxes[i, 1], final_bboxes[i, 2],
                final_bboxes[i, 3], final_bboxes[i, 4], -1
            ])

        return final_bboxes_  # , infer_time