Ejemplo n.º 1
0
    def get_FPS(self, image, test_interval):
        image_shape = np.array(np.shape(image)[0:2])
        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #---------------------------------------------------------#
        crop_img = letterbox_image(image, [self.input_shape[0],self.input_shape[1]])
        #----------------------------------------------------------------------------------#
        #   将RGB转化成BGR,这是因为原始的centernet_hourglass权值是使用BGR通道的图片训练的
        #----------------------------------------------------------------------------------#
        photo = np.array(crop_img,dtype = np.float32)[:,:,::-1]
        #-----------------------------------------------------------#
        #   图片预处理,归一化。获得的photo的shape为[1, 512, 512, 3]
        #-----------------------------------------------------------#
        photo = np.reshape(preprocess_image(photo), [1, self.input_shape[0], self.input_shape[1], self.input_shape[2]])

        preds = self.centernet.predict(photo)
        
        if self.nms:
            preds = np.array(nms(preds, self.nms_threhold))

        if len(preds[0])>0:
            preds[0][:, 0:4] = preds[0][:, 0:4] / (self.input_shape[0] / 4)
            
            det_label = preds[0][:, -1]
            det_conf = preds[0][:, -2]
            det_xmin, det_ymin, det_xmax, det_ymax = preds[0][:, 0], preds[0][:, 1], preds[0][:, 2], preds[0][:, 3]

            top_indices = [i for i, conf in enumerate(det_conf) if conf >= self.confidence]
            top_conf = det_conf[top_indices]
            top_label_indices = det_label[top_indices].tolist()
            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(det_xmin[top_indices],-1),np.expand_dims(det_ymin[top_indices],-1),np.expand_dims(det_xmax[top_indices],-1),np.expand_dims(det_ymax[top_indices],-1)
            
            boxes = centernet_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.input_shape[0],self.input_shape[1]]),image_shape)

         
        t1 = time.time()
        for _ in range(test_interval):
            preds = self.centernet.predict(photo)
            
            if self.nms:
                preds = np.array(nms(preds, self.nms_threhold))

            if len(preds[0])>0:
                preds[0][:, 0:4] = preds[0][:, 0:4] / (self.input_shape[0] / 4)
                
                det_label = preds[0][:, -1]
                det_conf = preds[0][:, -2]
                det_xmin, det_ymin, det_xmax, det_ymax = preds[0][:, 0], preds[0][:, 1], preds[0][:, 2], preds[0][:, 3]

                top_indices = [i for i, conf in enumerate(det_conf) if conf >= self.confidence]
                top_conf = det_conf[top_indices]
                top_label_indices = det_label[top_indices].tolist()
                top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(det_xmin[top_indices],-1),np.expand_dims(det_ymin[top_indices],-1),np.expand_dims(det_xmax[top_indices],-1),np.expand_dims(det_ymax[top_indices],-1)
                
                boxes = centernet_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.input_shape[0],self.input_shape[1]]),image_shape)

        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
Ejemplo n.º 2
0
    def __get_bbox(self, image):
        """
        :param image: 要预测的图片
        :return: 返回NMS后的bboxes,存储格式为(xmin, ymin, xmax, ymax, score, class)
        """
        org_image = np.copy(image)
        org_h, org_w, _ = org_image.shape

        yolo_input = utils.img_preprocess2(image, None, (self.__test_input_size, self.__test_input_size), False)
        yolo_input = yolo_input[np.newaxis, ...]

        pred_sbbox, pred_mbbox, pred_lbbox = self.__sess.run(
            [self.__pred_sbbox, self.__pred_mbbox, self.__pred_lbbox],
            feed_dict={
                self.__input_data: yolo_input,
                self.__training: False
            }
        )

        sbboxes = self.__convert_pred(pred_sbbox, (org_h, org_w), self.__valid_scales[0])
        mbboxes = self.__convert_pred(pred_mbbox, (org_h, org_w), self.__valid_scales[1])
        lbboxes = self.__convert_pred(pred_lbbox, (org_h, org_w), self.__valid_scales[2])

        # sbboxes = self.__valid_scale_filter(sbboxes, self.__valid_scales[0])
        # mbboxes = self.__valid_scale_filter(mbboxes, self.__valid_scales[1])
        # lbboxes = self.__valid_scale_filter(lbboxes, self.__valid_scales[2])

        bboxes = np.concatenate([sbboxes, mbboxes, lbboxes], axis=0)
        bboxes = utils.nms(bboxes, self.__score_threshold, self.__iou_threshold, method='nms')
        return bboxes
Ejemplo n.º 3
0
    def __call__(self, loc, score, anchor, img_size, scale=1.):
        if self.mode == "training":
            n_pre_nms = self.n_train_pre_nms
            n_post_nms = self.n_train_post_nms
        else:
            n_pre_nms = self.n_test_pre_nms
            n_post_nms = self.n_test_post_nms
        # 将RPN网络预测结果转化成建议框
        roi = loc2bbox(anchor, loc)

        # 利用slice进行分割,防止建议框超出图像边缘
        roi[:, slice(0, 4, 2)] = np.clip(roi[:, slice(0, 4, 2)], 0,
                                         img_size[1])
        roi[:, slice(1, 4, 2)] = np.clip(roi[:, slice(1, 4, 2)], 0,
                                         img_size[0])

        # 宽高的最小值不可以小于16
        min_size = self.min_size * scale
        # 计算高宽
        ws = roi[:, 2] - roi[:, 0]
        hs = roi[:, 3] - roi[:, 1]
        # 防止建议框过小
        keep = np.where((hs >= min_size) & (ws >= min_size))[0]
        roi = roi[keep, :]
        score = score[keep]
        # 取出成绩最好的一些建议框
        order = score.ravel().argsort()[::-1]
        if n_pre_nms > 0:
            order = order[:n_pre_nms]
        roi = roi[order, :]
        roi = nms(roi, self.nms_thresh)
        roi = torch.Tensor(roi)
        roi = roi[:n_post_nms]
        return roi
Ejemplo n.º 4
0
 def __get_bbox(self, image):
     """
     :param image: 要预测的图片
     :return: 返回NMS后的bboxes,存储格式为(xmin, ymin, xmax, ymax, score, class)
     """
     if self.__multi_test:
         test_input_sizes = self.__train_input_sizes[::3]
         bboxes_list = []
         for test_input_size in test_input_sizes:
             valid_scale = (0, np.inf)
             bboxes_list.append(
                 self.__predict(image, test_input_size, valid_scale))
             if self.__flip_test:
                 bboxes_flip = self.__predict(image[:, ::-1, :],
                                              test_input_size, valid_scale)
                 bboxes_flip[:,
                             [0, 2]] = image.shape[1] - bboxes_flip[:,
                                                                    [2, 0]]
                 bboxes_list.append(bboxes_flip)
         bboxes = np.row_stack(bboxes_list)
     else:
         bboxes = self.__predict(image, self.__test_input_size, (0, np.inf))
     bboxes = utils.nms(bboxes,
                        self.__score_threshold,
                        self.__iou_threshold,
                        method='nms')
     return bboxes
Ejemplo n.º 5
0
def generate_det(args):
    ckpt_path = args.checkpoint_path
    try:
        names = os.listdir(ckpt_path)
        for name in names:
            out = re.findall("ResNet_.*", name)
            if out != []:
                ckpt_path = out[0]
                break
        ckpt_path = os.path.join(args.checkpoint_path, ckpt_path)
    except Exception:
        print("There is no checkpoint in ", args.checkpoint)
        exit
    model = RC3D_resnet.RC3D(num_classes, cfg.Test.Image_shape,
                             args.feature_path)
    model = model.cuda()
    model.zero_grad()
    model.load(ckpt_path)
    test_batch = utils.new_Batch_Generator(name_to_id, num_classes,
                                           args.image_path,
                                           args.annotation_path, 'test')
    fp = []
    det = []
    for i in range(1, num_classes):
        f = open(
            os.path.join(args.json_path, "detection_{}.json".format(str(i))),
            'w')
        fp.append(f)
        det.append({})
        det[i - 1]['object'] = []
    try:
        while True:
            with torch.no_grad():
                data, gt = next(test_batch)
                _, _, object_cls_score, object_offset = model.forward(data)
                #bbox 是按照score降序排列的
                bbox = utils.nms(model.proposal_bbox, object_cls_score,
                                 object_offset, model.num_classes,
                                 model.im_info)
                if bbox is None:
                    continue
                #pdb.set_trace()
                for _cls, score, proposal in zip(bbox['cls'], bbox['score'],
                                                 bbox['bbox']):
                    if proposal[:, 0] == proposal[:, 1]:
                        continue
                    temp_dict = {}
                    temp_dict['file_name'] = data
                    temp_dict['start'] = float(proposal[:, 0])
                    temp_dict['end'] = float(proposal[:, 1])
                    temp_dict['score'] = float(score)
                    det[int(_cls[0]) - 1]['object'].append(temp_dict)
                torch.cuda.empty_cache()
    except StopIteration:
        for i in range(num_classes - 1):
            json.dump(det[i], fp[i])
            fp[i].close()
    print("generate_gt Done!")
Ejemplo n.º 6
0
    def detect_image(self, image_id, image):
        f = open("./input/detection-results/" + image_id + ".txt", "w")
        self.confidence = 0.01
        self.nms_threhold = 0.5

        image_shape = np.array(np.shape(image)[0:2])
        crop_img = letterbox_image(image,
                                   [self.input_shape[0], self.input_shape[1]])
        # 将RGB转化成BGR,这是因为原始的centernet_hourglass权值是使用BGR通道的图片训练的
        photo = np.array(crop_img, dtype=np.float32)[:, :, ::-1]

        # 图片预处理,归一化
        photo = np.reshape(
            preprocess_image(photo),
            [1, self.input_shape[0], self.input_shape[1], self.input_shape[2]])
        preds = self.centernet.predict(photo)

        if self.nms:
            preds = np.array(nms(preds, self.nms_threhold))

        if len(preds[0]) <= 0:
            return image

        preds[0][:, 0:4] = preds[0][:, 0:4] / (self.input_shape[0] / 4)

        # 筛选出其中得分高于confidence的框
        det_label = preds[0][:, -1]
        det_conf = preds[0][:, -2]
        det_xmin, det_ymin, det_xmax, det_ymax = preds[0][:, 0], preds[
            0][:, 1], preds[0][:, 2], preds[0][:, 3]

        top_indices = [
            i for i, conf in enumerate(det_conf) if conf >= self.confidence
        ]
        top_conf = det_conf[top_indices]
        top_label_indices = det_label[top_indices].tolist()
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            det_xmin[top_indices],
            -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims(
                det_xmax[top_indices],
                -1), np.expand_dims(det_ymax[top_indices], -1)

        # 去掉灰条
        boxes = centernet_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.input_shape[0], self.input_shape[1]]), image_shape)

        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c)]
            score = str(top_conf[i])

            top, left, bottom, right = boxes[i]
            f.write("%s %s %s %s %s %s\n" %
                    (predicted_class, score[:6], str(int(left)), str(
                        int(top)), str(int(right)), str(int(bottom))))

        f.close()
        return
Ejemplo n.º 7
0
def reconstruct(Iorig, I, Y, out_size, threshold=.9):

    net_stride = 2**4
    side = ((208. + 40.) / 2.) / net_stride  # 7.75

    Probs = Y[..., 0]
    Affines = Y[..., 2:]
    rx, ry = Y.shape[:2]
    ywh = Y.shape[1::-1]
    iwh = np.array(I.shape[1::-1], dtype=float).reshape((2, 1))

    xx, yy = np.where(Probs > threshold)

    WH = getWH(I.shape)
    MN = WH / net_stride

    vxx = vyy = 0.5  #alpha

    base = lambda vx, vy: np.matrix([[-vx, -vy, 1.], [vx, -vy, 1.],
                                     [vx, vy, 1.], [-vx, vy, 1.]]).T
    labels = []

    for i in range(len(xx)):
        y, x = xx[i], yy[i]
        affine = Affines[y, x]
        prob = Probs[y, x]

        mn = np.array([float(x) + .5, float(y) + .5])

        A = np.reshape(affine, (2, 3))
        A[0, 0] = max(A[0, 0], 0.)
        A[1, 1] = max(A[1, 1], 0.)

        pts = np.array(A * base(vxx, vyy))  #*alpha
        pts_MN_center_mn = pts * side
        pts_MN = pts_MN_center_mn + mn.reshape((2, 1))

        pts_prop = pts_MN / MN.reshape((2, 1))

        labels.append(DLabel(0, pts_prop, prob))

    final_labels = nms(labels, .1)
    TLps = []

    if len(final_labels):
        final_labels.sort(key=lambda x: x.prob(), reverse=True)
        for i, label in enumerate(final_labels):

            t_ptsh = getRectPts(0, 0, out_size[0], out_size[1])
            ptsh = np.concatenate((label.pts * getWH(Iorig.shape).reshape(
                (2, 1)), np.ones((1, 4))))
            H = find_T_matrix(ptsh, t_ptsh)
            Ilp = cv2.warpPerspective(Iorig, H, out_size, borderValue=.0)

            TLps.append(Ilp)

    return final_labels, TLps
Ejemplo n.º 8
0
 def __get_bbox(self, image):
     """
     :param image: 要预测的图片
     :return: 返回NMS后的bboxes,存储格式为(xmin, ymin, xmax, ymax, score, class)
     """
     bboxes = self.__predict(image, self.__test_input_size, (0, np.inf))
     bboxes = utils.nms(bboxes,
                        self.__score_threshold,
                        self.__iou_threshold,
                        method='nms')
     return bboxes
Ejemplo n.º 9
0
    def _predict_pil(self, pil_img, **kwargs):
        '''
        Args:
            pil_img: PIL.Image.Image
            input_size: int, input resolution
            conf_thres: float, confidence threshold
        '''
        input_size = kwargs.get('input_size', self.input_size)
        conf_thres = kwargs.get('conf_thres', self.conf_thres)
        assert isinstance(pil_img, Image.Image), 'input must be a PIL.Image'
        assert input_size is not None, 'Please specify the input resolution'
        assert conf_thres is not None, 'Please specify the confidence threshold'

        # pad to square
        input_img, _, pad_info = utils.rect_to_square(pil_img, None,
                                                      input_size, 0)

        input_ori = tvf.to_tensor(input_img)
        input_ = input_ori.unsqueeze(0)

        assert input_.dim() == 4
        device = next(self.model.parameters()).device
        input_ = input_.to(device=device)
        with torch.no_grad():
            dts = self.model(input_).cpu()

        dts = dts.squeeze()
        # post-processing
        dts = dts[dts[:, 5] >= conf_thres]
        if len(dts) > 1000:
            _, idx = torch.topk(dts[:, 5], k=1000)
            dts = dts[idx, :]
        if kwargs.get('debug', False):
            np_img = np.array(input_img)
            visualization.draw_dt_on_np(np_img, dts)
            plt.imshow(np_img)
            plt.show()
        dts = utils.nms(dts,
                        is_degree=True,
                        nms_thres=0.45,
                        img_size=input_size)
        dts = utils.detection2original(dts, pad_info.squeeze())
        if kwargs.get('debug', False):
            np_img = np.array(pil_img)
            visualization.draw_dt_on_np(np_img, dts)
            plt.imshow(np_img)
            plt.show()
        return dts
Ejemplo n.º 10
0
    def run_result(org_img, input_size, params):
        original_image_size = org_img.shape[:2]
        img = image_preporcess(np.copy(org_img), [input_size, input_size],
                               canny=params.canny)

        input_data = [img.astype(np.float32)]
        interpreter.set_tensor(input_details[0]['index'], input_data)
        interpreter.invoke()
        bboxes = interpreter.get_tensor(merge_branch["index"])

        pred_bbox = np.reshape(bboxes, (-1, 5 + params.class_num))

        bboxes = postprocess_boxes(pred_bbox, original_image_size, input_size,
                                   0.3)
        bboxes = nms(bboxes, 0.3, method='nms')
        draw_boxes(params, org_img, bboxes)
        return bboxes
Ejemplo n.º 11
0
    def run_result(org_img, input_size, params):
        original_image_size = org_img.shape[:2]
        img = image_preporcess(np.copy(org_img), [input_size, input_size],
                               canny=params.canny)
        pred_mbbox, pred_lbbox = sess.run(rtensor[1:],
                                          feed_dict={rtensor[0]: [img]})
        pred_bbox = np.concatenate([
            np.reshape(pred_mbbox, (-1, 5 + params.class_num)),
            np.reshape(pred_lbbox, (-1, 5 + params.class_num))
        ],
                                   axis=0)

        bboxes = postprocess_boxes(pred_bbox, original_image_size, input_size,
                                   0.3)
        bboxes = nms(bboxes, 0.3, method='nms')
        draw_boxes(params, org_img, bboxes)
        return bboxes
Ejemplo n.º 12
0
Archivo: api.py Proyecto: aowais2/RAPiD
def detect_once(model, pil_img, conf_thres, nms_thres=0.45, input_size=608):
    '''
    Run the model on the pil_img and return the detections.
    '''
    ori_w, ori_h = pil_img.width, pil_img.height
    input_img, _, pad_info = utils.rect_to_square(pil_img, None, input_size, 0)

    input_img = tvf.to_tensor(input_img).cuda()
    with torch.no_grad():
        dts = model(input_img[None]).cpu().squeeze()
    dts = dts[dts[:,5] >= conf_thres].cpu()
    dts = utils.nms(dts, is_degree=True, nms_thres=0.45)
    dts = utils.detection2original(dts, pad_info.squeeze())
    # np_img = np.array(pil_img)
    # api_utils.draw_dt_on_np(np_img, detections)
    # plt.imshow(np_img)
    # plt.show()
    return dts
Ejemplo n.º 13
0
def test(args):
    runtime = AverageMeter()
    ckpt_path = args.checkpoint_path
    try:
        names = os.listdir(ckpt_path)
        for name in names:
            out = re.findall("ResNet_.*", name)
            if out != []:
                ckpt_path = out[0]
                break
        ckpt_path = os.path.join(args.checkpoint_path, ckpt_path)
    except Exception:
        print("There is no checkpoint in ", args.checkpoint)
        exit
    model = RC3D_resnet.RC3D(num_classes, cfg.Test.Image_shape,
                             args.feature_path)
    model = model.cuda()
    model.zero_grad()
    model.load(ckpt_path)
    #test_batch = utils.Batch_Generator(name_to_id, num_classes, args.image_path, args.annotation_path, mode = 'test')
    test_batch = utils.new_Batch_Generator(name_to_id, num_classes,
                                           args.image_path,
                                           args.annotation_path)
    tic = time.time()
    data, gt = next(test_batch)
    with torch.no_grad():
        pdb.set_trace()
        print(gt)
        _, _, object_cls_score, object_offset = model.forward(data)
        bbox = utils.nms(model.proposal_bbox, object_cls_score, object_offset,
                         model.num_classes, model.im_info)
        toc = time.time()
        torch.cuda.empty_cache()
        runtime.update(toc - tic)
        print('Time {runtime.val:.3f} ({runtime.avg:.3f})\t'.format(
            runtime=runtime))
        for _cls, score, proposal in zip(bbox['cls'], bbox['score'],
                                         bbox['bbox']):
            print(
                "class:{:}({:})\t   score:{:.6f}\t   start:{:.2f}\t  end:{:.2f}\t"
                .format(id_to_name[int(_cls[0])], _cls[0], score[0],
                        proposal[0, 0], proposal[0, 1]))
Ejemplo n.º 14
0
    def __get_bbox(self, image):
        """
        :param image: 要预测的图片
        :return: 返回NMS后的bboxes,存储格式为(xmin, ymin, xmax, ymax, score, class)
        """
        org_image = np.copy(image)
        org_h, org_w, _ = org_image.shape
        s0 = time.time()
        yolo_input = img_preprocess2(
            image, None, (self.__test_input_size, self.__test_input_size),
            False)
        yolo_input = yolo_input[np.newaxis, ...]
        s1 = time.time()
        print("process img time:", s1 - s0)
        pred_sbbox, pred_mbbox, pred_lbbox = sess.run(
            [self.__pred_sbbox, self.__pred_mbbox, self.__pred_lbbox],
            feed_dict={self.__input_data: yolo_input})
        s2 = time.time()
        print("inference time:", s2 - s1)

        sbboxes = self.__convert_pred(pred_sbbox, (org_h, org_w),
                                      self.__valid_scales[0])
        mbboxes = self.__convert_pred(pred_mbbox, (org_h, org_w),
                                      self.__valid_scales[1])
        lbboxes = self.__convert_pred(pred_lbbox, (org_h, org_w),
                                      self.__valid_scales[2])
        s3 = time.time()
        print("conver pred time:", s3 - s2)

        # sbboxes = self.__valid_scale_filter(sbboxes, self.__valid_scales[0])
        # mbboxes = self.__valid_scale_filter(mbboxes, self.__valid_scales[1])
        # lbboxes = self.__valid_scale_filter(lbboxes, self.__valid_scales[2])

        bboxes = np.concatenate([sbboxes, mbboxes, lbboxes], axis=0)
        bboxes = utils.nms(bboxes,
                           self.__score_threshold,
                           self.__iou_threshold,
                           method='nms')
        print("nms time:", time.time() - s3)
        return bboxes
Ejemplo n.º 15
0
    def detect(self, img):

        img2 = utils.pred_img(img)

        resize_img = np.array(img2, dtype=np.float32)
        resize_img /= 255.0
        resize_img = np.transpose(resize_img, (1, 2, 0))
        # resize_img = np.astype(np.float32)
        images = []
        images.append(resize_img)
        images = np.asarray(images)
        images = images.transpose((0, 3, 1, 2))
        images = t.from_numpy(images)

        outputlist = []
        if t.cuda.is_available:
            print("cuda is on")
            self.Detection.cuda()
            images = images.cuda()
            out = self.Detection(images)
            for i in range(3):
                outputlist.append(self.Decode(out[i]))
                # outputlist.append(self.Decode(out[i]))
        else:
            print("cuda is off")
            out = self.Detection(images)
            for i in range(3):
                outputlist.append(self.Decode(out[i]))
        for i in outputlist:
            print(i.shape)
        output = t.cat(outputlist, 1)

        betch_detection = utils.nms(output,
                                    cfg["class_num"],
                                    conf_thres=self.conf,
                                    nms_thres=0.4)
        print(output[0, 0, 0])
        return output
Ejemplo n.º 16
0
def filter_prediction(boxes, probs, cls_idx):
    """
    Filter bounding boxes with probability threshold and nms
    Args:
        boxes: [BATCH, 4], (cx, cy, w, h)
        probs: [BATCH, CLASS_NUM], class probability
        cls_idx: array of class indices
    Return:
        final_boxes: filtered bounding boxes
        final_probs: filtered probabilities
        final_cls_idx: filtered class indices
    """
    if cfg.TOP_N_DETECTION < len(probs) and cfg.TOP_N_DETECTION > 0:
        order = probs.argsort()[:-cfg.TOP_N_DETECTION - 1:-1]
        probs = probs[order]
        boxes = boxes[order]
        cls_idx = cls_idx[order]
    else:
        filtered_idx = np.nonzero(probs > cfg.PROB_THRESHOLD)[0]
        probs = probs[filtered_idx]
        boxes = boxes[filtered_idx]
        cls_idx = cls_idx[filtered_idx]

    final_boxes = []
    final_probs = []
    final_cls_idx = []

    for c in range(cfg.NUM_CLASSES):
        idx_per_class = [i for i in range(len(probs)) if cls_idx[i] == c]
        keep = nms(boxes[idx_per_class], probs[idx_per_class],
                   cfg.NMS_THRESHOLD)
        for i in range(len(keep)):
            if keep[i]:
                final_boxes.append(boxes[idx_per_class[i]])
                final_probs.append(probs[idx_per_class[i]])
                final_cls_idx.append(c)
    return final_boxes, final_probs, final_cls_idx
Ejemplo n.º 17
0
def predict_batch(region_weight, edge_map_weight, junctions_weight):

    metrics = Metrics()
    for _id in _ids:

        # load detections
        fname = '{}/{}.jpg_5.pkl'.format(res_dir, _id)
        with open(fname, 'rb') as f:
            c = p.load(f, encoding='latin1')

        # apply non maxima supression
        cs, cs_c, th, th_c = nms(c['junctions'],
                                 c['junc_confs'],
                                 c['thetas'],
                                 c['theta_confs'],
                                 nms_thresh=8.0)

        # load annotations
        p_path = '{}/{}.npy'.format(annot_dir, _id)
        v_set = np.load(open(p_path, 'rb'), encoding='bytes')
        graph_annot = dict(v_set[()])
        cs_annot, es_annot = load_annots(graph_annot)

        # load edge map
        edge_map_path = '{}/{}.jpg'.format(edge_dir, _id)
        im_path = '{}/{}.jpg'.format(rgb_dir, _id)
        edge_map = np.array(Image.open(edge_map_path).convert('L')) / 255.0

        # load region masks
        region_path = '{}/{}.npy'.format(region_dir, _id)
        region_mks = np.load(region_path)
        region_mks = filter_regions(region_mks)

        # compute edge scores from classifier
        lw_from_cls = get_edge_scores(cs, region_mks, rgb_dir, _id)

        # Reconstruct
        junctions, juncs_on, lines_on, regs_sm_on = reconstructBuildingBaseline(
            cs,
            edge_map,
            use_junctions_with_var=True,
            use_regions=True,
            thetas=th,
            regions=region_mks,
            angle_thresh=5,
            with_corner_edge_confidence=True,
            corner_confs=cs_c,
            corner_edge_thresh=0.125,
            theta_confs=th_c,
            theta_threshold=0.25,
            region_hit_threshold=0.1,
            lw_from_cls=lw_from_cls,
            use_edge_classifier=True,
            closed_region_lowerbound=True,
            closed_region_upperbound=True,
            with_corner_variables=True,
            corner_min_degree_constraint=True,
            junctions_soft=True,
            region_intersection_constraint=True,
            inter_region_constraint=True,
            post_process=True,
            region_weight=region_weight,
            edge_map_weight=edge_map_weight,
            junctions_weight=junctions_weight,
        )
        dwg = svgwrite.Drawing('../result/svg/{}.svg'.format(_id), (128, 128))
        dwg.add(svgwrite.image.Image(edge_map_path, size=(128, 128)))
        im_path = os.path.join(rgb_dir, _id + '.jpg')
        draw_building(dwg, junctions, juncs_on, lines_on)
        dwg.save()
        metrics.forward(graph_annot, junctions, juncs_on, lines_on, _id)

    return metrics.edge_f_score()
Ejemplo n.º 18
0
for _id in _ids:

    # if '1548206121.73' not in _id:
    #     continue
    # # # 1553980237.28

    # load detections
    fname = '{}/{}.jpg_5.pkl'.format(res_dir, _id)
    with open(fname, 'rb') as f:
        c = p.load(f, encoding='latin1')

    # apply non maxima supression
    cs, cs_c, th, th_c = nms(c['junctions'],
                             c['junc_confs'],
                             c['thetas'],
                             c['theta_confs'],
                             nms_thresh=8.0)

    # load annotations
    p_path = '{}/{}.npy'.format(annot_dir, _id)
    v_set = np.load(open(p_path, 'rb'), encoding='bytes')
    graph_annot = dict(v_set[()])
    cs_annot, es_annot = load_annots(graph_annot)

    # load edge map
    edge_map_path = '{}/{}.jpg'.format(edge_dir, _id)
    im_path = '{}/{}.jpg'.format(rgb_dir, _id)
    edge_map = np.array(Image.open(edge_map_path).convert('L')) / 255.0

    # load region masks
Ejemplo n.º 19
0
    def detect_image(self, image_id, image):
        f = open("./input/detection-results/" + image_id + ".txt", "w")
        self.confidence = 0.01
        self.nms_threhold = 0.5

        image_shape = np.array(np.shape(image)[0:2])
        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #---------------------------------------------------------#
        crop_img = letterbox_image(image,
                                   [self.input_shape[0], self.input_shape[1]])
        #----------------------------------------------------------------------------------#
        #   将RGB转化成BGR,这是因为原始的centernet_hourglass权值是使用BGR通道的图片训练的
        #----------------------------------------------------------------------------------#
        photo = np.array(crop_img, dtype=np.float32)[:, :, ::-1]
        #-----------------------------------------------------------#
        #   图片预处理,归一化。获得的photo的shape为[1, 512, 512, 3]
        #-----------------------------------------------------------#
        photo = np.reshape(
            preprocess_image(photo),
            [1, self.input_shape[0], self.input_shape[1], self.input_shape[2]])

        preds = self.centernet.predict(photo)
        #--------------------------------------------------------------------------#
        #   对于centernet网络来讲,确立中心非常重要。
        #   对于大目标而言,会存在许多的局部信息。
        #   此时对于同一个大目标,中心点比较难以确定。
        #   使用最大池化的非极大抑制方法无法去除局部框
        #   所以我还是写了另外一段对框进行非极大抑制的代码
        #   实际测试中,hourglass为主干网络时有无额外的nms相差不大,resnet相差较大。
        #---------------------------------------------------------------------------#
        if self.nms:
            preds = np.array(nms(preds, self.nms_threhold))

        if len(preds[0]) <= 0:
            return

        #-----------------------------------------------------------#
        #   将预测结果转换成小数的形式
        #-----------------------------------------------------------#
        preds[0][:, 0:4] = preds[0][:, 0:4] / (self.input_shape[0] / 4)

        #-----------------------------------------------------------#
        #   筛选出其中得分高于confidence的框
        #-----------------------------------------------------------#
        det_label = preds[0][:, -1]
        det_conf = preds[0][:, -2]
        det_xmin, det_ymin, det_xmax, det_ymax = preds[0][:, 0], preds[
            0][:, 1], preds[0][:, 2], preds[0][:, 3]

        top_indices = [
            i for i, conf in enumerate(det_conf) if conf >= self.confidence
        ]
        top_conf = det_conf[top_indices]
        top_label_indices = det_label[top_indices].tolist()
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            det_xmin[top_indices],
            -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims(
                det_xmax[top_indices],
                -1), np.expand_dims(det_ymax[top_indices], -1)

        #-----------------------------------------------------------#
        #   去掉灰条部分
        #-----------------------------------------------------------#
        boxes = centernet_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.input_shape[0], self.input_shape[1]]), image_shape)

        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c)]
            score = str(top_conf[i])

            top, left, bottom, right = boxes[i]
            f.write("%s %s %s %s %s %s\n" %
                    (predicted_class, score[:6], str(int(left)), str(
                        int(top)), str(int(right)), str(int(bottom))))

        f.close()
        return
Ejemplo n.º 20
0
    features = tf.convert_to_tensor(net_out_reshaped)
    x = tf.Session().run(
        yolo_boxes_and_scores(features, anchors[anchor_mask[0]], nb_classes,
                              model_image_size, org_image_shape))
    boxes = np.concatenate(
        [x[0],
         np.reshape(x[2][0], (n_shape[1] * n_shape[1] * 3, 1)), x[1]],
        axis=1)
    all_boxes.extend(boxes)
boxes_, scores_, classes_ = postprocess_boxes_tf(all_boxes, score_threshold=.3)
image = draw_boxes_tf(boxes_, scores_, classes_, classes, org_image)
image.show()

#########################################################################################################
bboxes = postprocess_boxes(all_boxes, org_image, model_image_size[0], 0.3)
bboxes = nms(bboxes, 0.45, method='nms')
image = draw_bbox(org_image, bboxes, classes)
image = fromarray(image)
image.show()

#########################################################################################################
pred_bbox = np.concatenate([
    np.reshape(predictions[0], (-1, 5 + nb_classes)),
    np.reshape(predictions[0], (-1, 5 + nb_classes)),
    np.reshape(predictions[0], (-1, 5 + nb_classes))
],
                           axis=0)

bboxes = postprocess_boxes(pred_bbox, org_image, model_image_size[0], 0.3)
bboxes = nms(bboxes, 0.45, method='nms')
image = draw_bbox(org_image, bboxes, classes)
Ejemplo n.º 21
0
    def detect_face_limited(self, img, det_type=2):
        height, width, _ = img.shape
        if det_type >= 2:
            total_boxes = np.array(
                [[0.0, 0.0, img.shape[1], img.shape[0], 0.9]],
                dtype=np.float32)
            num_box = total_boxes.shape[0]

            # pad the bbox
            [dy, edy, dx, edx, y, ey, x, ex, tmpw,
             tmph] = self.pad(total_boxes, width, height)
            # (3, 24, 24) is the input shape for RNet
            input_buf = np.zeros((num_box, 3, 24, 24), dtype=np.float32)

            for i in range(num_box):
                tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
                tmp[dy[i]:edy[i] + 1,
                    dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1,
                                               x[i]:ex[i] + 1, :]
                input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (24, 24)))

            output = self.RNet.predict(input_buf)

            # filter the total_boxes with threshold
            passed = np.where(output[1][:, 1] > self.threshold[1])
            total_boxes = total_boxes[passed]

            if total_boxes.size == 0:
                return None

            total_boxes[:, 4] = output[1][passed, 1].reshape((-1, ))
            reg = output[0][passed]

            # nms
            pick = nms(total_boxes, 0.7, 'Union')
            total_boxes = total_boxes[pick]
            total_boxes = self.calibrate_box(total_boxes, reg[pick])
            total_boxes = self.convert_to_square(total_boxes)
            total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])
        else:
            total_boxes = np.array(
                [[0.0, 0.0, img.shape[1], img.shape[0], 0.9]],
                dtype=np.float32)
        num_box = total_boxes.shape[0]
        [dy, edy, dx, edx, y, ey, x, ex, tmpw,
         tmph] = self.pad(total_boxes, width, height)
        # (3, 48, 48) is the input shape for ONet
        input_buf = np.zeros((num_box, 3, 48, 48), dtype=np.float32)

        for i in range(num_box):
            tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.float32)
            tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1,
                                                             x[i]:ex[i] + 1, :]
            input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48)))

        output = self.ONet.predict(input_buf)
        # print(output[2])

        # filter the total_boxes with threshold
        passed = np.where(output[2][:, 1] > self.threshold[2])
        total_boxes = total_boxes[passed]

        if total_boxes.size == 0:
            return None

        total_boxes[:, 4] = output[2][passed, 1].reshape((-1, ))
        reg = output[1][passed]
        points = output[0][passed]

        # compute landmark points
        bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
        bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1
        points[:, 0:5] = np.expand_dims(
            total_boxes[:, 0], 1) + np.expand_dims(bbw, 1) * points[:, 0:5]
        points[:, 5:10] = np.expand_dims(
            total_boxes[:, 1], 1) + np.expand_dims(bbh, 1) * points[:, 5:10]

        # nms
        total_boxes = self.calibrate_box(total_boxes, reg)
        pick = nms(total_boxes, 0.7, 'Min')
        total_boxes = total_boxes[pick]
        points = points[pick]

        if not self.accurate_landmark:
            return total_boxes, points

        #############################################
        # extended stage
        #############################################
        num_box = total_boxes.shape[0]
        patchw = np.maximum(total_boxes[:, 2] - total_boxes[:, 0] + 1,
                            total_boxes[:, 3] - total_boxes[:, 1] + 1)
        patchw = np.round(patchw * 0.25)

        # make it even
        patchw[np.where(np.mod(patchw, 2) == 1)] += 1

        input_buf = np.zeros((num_box, 15, 24, 24), dtype=np.float32)
        for i in range(5):
            x, y = points[:, i], points[:, i + 5]
            x, y = np.round(x - 0.5 * patchw), np.round(y - 0.5 * patchw)
            [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(
                np.vstack([x, y, x + patchw - 1, y + patchw - 1]).T, width,
                height)
            for j in range(num_box):
                tmpim = np.zeros((tmpw[j], tmpw[j], 3), dtype=np.float32)
                tmpim[dy[j]:edy[j] + 1,
                      dx[j]:edx[j] + 1, :] = img[y[j]:ey[j] + 1,
                                                 x[j]:ex[j] + 1, :]
                input_buf[j, i * 3:i * 3 + 3, :, :] = adjust_input(
                    cv2.resize(tmpim, (24, 24)))

        output = self.LNet.predict(input_buf)

        pointx = np.zeros((num_box, 5))
        pointy = np.zeros((num_box, 5))

        for k in range(5):
            # do not make a large movement
            tmp_index = np.where(np.abs(output[k] - 0.5) > 0.35)
            output[k][tmp_index[0]] = 0.5

            pointx[:, k] = np.round(points[:, k] -
                                    0.5 * patchw) + output[k][:, 0] * patchw
            pointy[:, k] = np.round(points[:, k + 5] -
                                    0.5 * patchw) + output[k][:, 1] * patchw

        points = np.hstack([pointx, pointy])
        points = points.astype(np.int32)

        return total_boxes, points
Ejemplo n.º 22
0
    # get input image
    #im_arr = xs.squeeze(0).cpu().numpy().transpose(1, 2, 0) * 255.0
    im_path = os.path.join(RGB_FOLDER, valid_list[k] + '.jpg')
    im = Image.open(im_path)
    #im = Image.fromarray(im_arr.astype('uint8'))

    # update metric
    pos_gt_ind = prob_gt > 0
    pos_pred_ind = prob > .5
    dets_gt = dets_gt[pos_gt_ind]
    dets = dets[pos_pred_ind]
    prob_gt = prob_gt[pos_gt_ind]
    prob = prob[pos_pred_ind]

    # apply nms
    dets, prob = nms(dets.detach().cpu().numpy(), prob.detach().cpu().numpy())
    #dets, prob = dets.detach().cpu().numpy(), prob.detach().cpu().numpy()
    # draw outputs
    seg_im = compose_im(np.array(im), seg)
    draw = ImageDraw.Draw(seg_im)
    for p, det in zip(prob, dets):
        x, y = det
        draw.ellipse((x - 2, y - 2, x + 2, y + 2), fill='red')

    # draw ground truth - Debug
    for p, det in zip(prob_gt, dets_gt):
        x, y = det
        draw.ellipse((x - 1, y - 1, x + 1, y + 1), fill='blue')

    seg_im = seg_im.resize((512, 512))
    mt.forward(valid_list[k], dets_gt.cpu().numpy(), dets)
Ejemplo n.º 23
0
def default_rule(det_df, **kwargs):
    assert 'prio_weight' in kwargs.keys(), 'Must input priority weight'
    assert 'prio_file' in kwargs.keys(), 'Must input priority file'

    # out dir
    out_dir = 'detection result images'
    if out_dir is not None:
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)

    det_df['bbox_score'] = det_df.bbox + det_df.score.map(lambda x: [x])

    prio_weight = kwargs['prio_weight']
    prio_file = kwargs['prio_file']

    if len(det_df) == 0:
        if kwargs['draw_imgs']:
            show_and_save_images(kwargs['img_path'],
                                 kwargs['img_name'],
                                 det_df.bbox_score.values,
                                 det_df.category.values,
                                 out_dir=out_dir)
        return kwargs['false_name'], 1
    else:
        filtered = det_df[det_df['score'] >= kwargs['other_thr']]
        if len(filtered) == 0:
            return kwargs['other_name'], 1

        # filtering
        filtered = filter_code(filtered, 'RES06', 0.9)
        filtered = filter_code(filtered, 'RES03', 0.85, 'RES05')
        filtered = filter_code(filtered, 'AZ08', 0.6)
        filtered = filter_code(filtered, 'STR02', 0.9, 'COM01')
        filtered = filter_code(filtered, 'STR04', 0.8, 'COM01')
        filtered = filter_code(filtered, 'COM03', 0.9)
        filtered = filter_code(filtered, 'PLN01', 0.8, 'RES05')
        filtered = filter_code(filtered, 'REP01', 0.9)
        # filtered = filter_code(filtered, 'COM01', 0.4)

        # # check in
        # if len(filtered) > 1:
        #     if np.sum(filtered.category.values == 'QS') > 1:
        #         code_df = filtered[filtered['category'] == 'QS']
        #         filtered = check_in_filter(filtered, code_df, 0.9)

        # nms
        if len(filtered) != 0:
            lst = []
            for i in range(len(filtered)):
                lst.append(filtered.iloc[i, -1])
            arr = np.array(lst)
            best_bboxes = nms(arr, 0.5)
            filtered = filtered[filtered['bbox_score'].map(
                lambda x: x in best_bboxes)]

        # judge res04
        df_res05 = filtered[(filtered['category'] == 'RES05')
                            & (filtered['score'] >= 0.5)]
        if len(df_res05) >= 3:
            filtered.loc[filtered['category'] == 'RES05', 'category'] = 'RES04'

        if len(filtered) == 0:
            if kwargs['draw_imgs']:
                show_and_save_images(kwargs['img_path'],
                                     kwargs['img_name'],
                                     filtered.bbox_score.values,
                                     filtered.category.values,
                                     out_dir=out_dir)
            return kwargs['false_name'], 1

        Max_conf = max(filtered['score'].values)
        prio_thr = Max_conf * prio_weight
        filtered_final = filtered[filtered['score'] >= prio_thr]

        prio = pd.read_excel(prio_file)
        prio_lst = list(prio.values)
        final_code = prio_check(prio_lst,
                                list(filtered_final['category'].values))
        defect_score = max(
            filtered_final.loc[filtered['category'] == final_code,
                               'score'].values)

        # draw images
        if kwargs['draw_imgs']:
            show_and_save_images(kwargs['img_path'],
                                 kwargs['img_name'],
                                 filtered.bbox_score.values,
                                 filtered.category.values,
                                 out_dir=out_dir)

        return final_code, defect_score
Ejemplo n.º 24
0
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])
        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #---------------------------------------------------------#
        crop_img = letterbox_image(image,
                                   [self.input_shape[0], self.input_shape[1]])
        #----------------------------------------------------------------------------------#
        #   将RGB转化成BGR,这是因为原始的centernet_hourglass权值是使用BGR通道的图片训练的
        #----------------------------------------------------------------------------------#
        photo = np.array(crop_img, dtype=np.float32)[:, :, ::-1]
        #-----------------------------------------------------------#
        #   图片预处理,归一化。获得的photo的shape为[1, 512, 512, 3]
        #-----------------------------------------------------------#
        photo = np.reshape(
            preprocess_image(photo),
            [1, self.input_shape[0], self.input_shape[1], self.input_shape[2]])

        preds = self.get_pred(photo).numpy()
        #-------------------------------------------------------#
        #   对于centernet网络来讲,确立中心非常重要。
        #   对于大目标而言,会存在许多的局部信息。
        #   此时对于同一个大目标,中心点比较难以确定。
        #   使用最大池化的非极大抑制方法无法去除局部框
        #   所以我还是写了另外一段对框进行非极大抑制的代码
        #   实际测试中,hourglass为主干网络时有无额外的nms相差不大,resnet相差较大。
        #-------------------------------------------------------#
        if self.nms:
            preds = np.array(nms(preds, self.nms_threhold))

        if len(preds[0]) <= 0:
            return image

        #-----------------------------------------------------------#
        #   将预测结果转换成小数的形式
        #-----------------------------------------------------------#
        preds[0][:, 0:4] = preds[0][:, 0:4] / (self.input_shape[0] / 4)

        det_label = preds[0][:, -1]
        det_conf = preds[0][:, -2]
        det_xmin, det_ymin, det_xmax, det_ymax = preds[0][:, 0], preds[
            0][:, 1], preds[0][:, 2], preds[0][:, 3]
        #-----------------------------------------------------------#
        #   筛选出其中得分高于confidence的框
        #-----------------------------------------------------------#
        top_indices = [
            i for i, conf in enumerate(det_conf) if conf >= self.confidence
        ]
        top_conf = det_conf[top_indices]
        top_label_indices = det_label[top_indices].tolist()
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            det_xmin[top_indices],
            -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims(
                det_xmax[top_indices],
                -1), np.expand_dims(det_ymax[top_indices], -1)

        #-----------------------------------------------------------#
        #   去掉灰条部分
        #-----------------------------------------------------------#
        boxes = centernet_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.input_shape[0], self.input_shape[1]]), image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = max(
            (np.shape(image)[0] + np.shape(image)[1]) // self.input_shape[0],
            1)

        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c)]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[int(c)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[int(c)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image
Ejemplo n.º 25
0
            else:

                # retrieve detections
                grid_size = 2.0
                dets = grid_size * coords[0, :, :2] + anchors[0]
                dets_gt = grid_size * ys[0, :, :2] + anchors[0]

                # select detections
                prob_gt = ys[0, :, 2]
                prob = prob.view(-1)
                pos_gt_ind = prob_gt > 0
                pos_pred_ind = prob > .5
                dets_gt = dets_gt[pos_gt_ind]
                dets = dets[pos_pred_ind]
                prob = prob[pos_pred_ind]
                dets, prob = nms(np.array(dets), np.array(prob))

                # update metric
                mt.forward(valid_list[i], np.array(dets_gt), np.array(dets))

        # print epoch loss
        print('[%d] %s lr: %f \nloss: %.5f' %
              (epoch + 1, phase, optimizer.param_groups[0]['lr'],
               running_loss / len(dset_loader[phase])))

        # tack best model
        if phase == 'val':

            recall, precision = mt.calc_metrics()
            f_score = 2.0 * precision * recall / (precision + recall + 1e-8)
            print('val f_score %.5f' % f_score)
Ejemplo n.º 26
0
    def detect_face(self, img):
        """
            detect face over img
        Parameters:
        ----------
            img: numpy array, bgr order of shape (1, 3, n, m)
                input image
        Retures:
        -------
            bboxes: numpy array, n x 5 (x1,y2,x2,y2,score)
                bboxes
            points: numpy array, n x 10 (x1, x2 ... x5, y1, y2 ..y5)
                landmarks
        """

        # check input
        height, width, _ = img.shape
        MIN_DET_SIZE = 12

        if img is None:
            return None

        # only works for color image
        if len(img.shape) != 3:
            return None

        # detected boxes
        total_boxes = []

        minl = min(height, width)

        # get all the valid scales
        scales = []
        m = MIN_DET_SIZE / self.minsize
        minl *= m
        factor_count = 0
        while minl > MIN_DET_SIZE:
            scales.append(m * self.factor**factor_count)
            minl *= self.factor
            factor_count += 1

        sliced_index = self.slice_index(len(scales))
        total_boxes = []
        for batch in sliced_index:
            local_boxes = map(
                detect_first_stage_warpper,
                zip(repeat(img), self.PNets[:len(batch)],
                    [scales[i] for i in batch], repeat(self.threshold[0])))
            total_boxes.extend(local_boxes)

        # remove the Nones
        total_boxes = [i for i in total_boxes if i is not None]

        if len(total_boxes) == 0:
            return None

        total_boxes = np.vstack(total_boxes)

        if total_boxes.size == 0:
            return None

        # merge the detection from first stage
        pick = nms(total_boxes[:, 0:5], 0.7, 'Union')
        total_boxes = total_boxes[pick]

        bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
        bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1

        # refine the bboxes
        total_boxes = np.vstack([
            total_boxes[:, 0] + total_boxes[:, 5] * bbw,
            total_boxes[:, 1] + total_boxes[:, 6] * bbh,
            total_boxes[:, 2] + total_boxes[:, 7] * bbw,
            total_boxes[:, 3] + total_boxes[:, 8] * bbh, total_boxes[:, 4]
        ])

        total_boxes = total_boxes.T
        total_boxes = self.convert_to_square(total_boxes)
        total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])

        #############################################
        # second stage
        #############################################
        num_box = total_boxes.shape[0]

        # pad the bbox
        [dy, edy, dx, edx, y, ey, x, ex, tmpw,
         tmph] = self.pad(total_boxes, width, height)
        # (3, 24, 24) is the input shape for RNet
        input_buf = np.zeros((num_box, 3, 24, 24), dtype=np.float32)

        for i in range(num_box):
            tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
            tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1,
                                                             x[i]:ex[i] + 1, :]
            input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (24, 24)))

        output = self.RNet.predict(input_buf)

        # filter the total_boxes with threshold
        passed = np.where(output[1][:, 1] > self.threshold[1])
        total_boxes = total_boxes[passed]

        if total_boxes.size == 0:
            return None

        total_boxes[:, 4] = output[1][passed, 1].reshape((-1, ))
        reg = output[0][passed]

        # nms
        pick = nms(total_boxes, 0.7, 'Union')
        total_boxes = total_boxes[pick]
        total_boxes = self.calibrate_box(total_boxes, reg[pick])
        total_boxes = self.convert_to_square(total_boxes)
        total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])

        #############################################
        # third stage
        #############################################
        num_box = total_boxes.shape[0]

        # pad the bbox
        [dy, edy, dx, edx, y, ey, x, ex, tmpw,
         tmph] = self.pad(total_boxes, width, height)
        # (3, 48, 48) is the input shape for ONet
        input_buf = np.zeros((num_box, 3, 48, 48), dtype=np.float32)

        for i in range(num_box):
            tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.float32)
            tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1,
                                                             x[i]:ex[i] + 1, :]
            input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48)))

        output = self.ONet.predict(input_buf)

        # filter the total_boxes with threshold
        passed = np.where(output[2][:, 1] > self.threshold[2])
        total_boxes = total_boxes[passed]

        if total_boxes.size == 0:
            return None

        total_boxes[:, 4] = output[2][passed, 1].reshape((-1, ))
        reg = output[1][passed]
        points = output[0][passed]

        # compute landmark points
        bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
        bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1
        points[:, 0:5] = np.expand_dims(
            total_boxes[:, 0], 1) + np.expand_dims(bbw, 1) * points[:, 0:5]
        points[:, 5:10] = np.expand_dims(
            total_boxes[:, 1], 1) + np.expand_dims(bbh, 1) * points[:, 5:10]

        # nms
        total_boxes = self.calibrate_box(total_boxes, reg)
        pick = nms(total_boxes, 0.7, 'Min')
        total_boxes = total_boxes[pick]
        points = points[pick]

        if not self.accurate_landmark:
            return total_boxes, points

        #############################################
        # extended stage
        #############################################
        num_box = total_boxes.shape[0]
        patchw = np.maximum(total_boxes[:, 2] - total_boxes[:, 0] + 1,
                            total_boxes[:, 3] - total_boxes[:, 1] + 1)
        patchw = np.round(patchw * 0.25)

        # make it even
        patchw[np.where(np.mod(patchw, 2) == 1)] += 1

        input_buf = np.zeros((num_box, 15, 24, 24), dtype=np.float32)
        for i in range(5):
            x, y = points[:, i], points[:, i + 5]
            x, y = np.round(x - 0.5 * patchw), np.round(y - 0.5 * patchw)
            [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(
                np.vstack([x, y, x + patchw - 1, y + patchw - 1]).T, width,
                height)
            for j in range(num_box):
                tmpim = np.zeros((tmpw[j], tmpw[j], 3), dtype=np.float32)
                tmpim[dy[j]:edy[j] + 1,
                      dx[j]:edx[j] + 1, :] = img[y[j]:ey[j] + 1,
                                                 x[j]:ex[j] + 1, :]
                input_buf[j, i * 3:i * 3 + 3, :, :] = adjust_input(
                    cv2.resize(tmpim, (24, 24)))

        output = self.LNet.predict(input_buf)

        pointx = np.zeros((num_box, 5))
        pointy = np.zeros((num_box, 5))

        for k in range(5):
            # do not make a large movement
            tmp_index = np.where(np.abs(output[k] - 0.5) > 0.35)
            output[k][tmp_index[0]] = 0.5

            pointx[:, k] = np.round(points[:, k] -
                                    0.5 * patchw) + output[k][:, 0] * patchw
            pointy[:, k] = np.round(points[:, k + 5] -
                                    0.5 * patchw) + output[k][:, 1] * patchw

        points = np.hstack([pointx, pointy])
        points = points.astype(np.int32)

        return total_boxes, points
Ejemplo n.º 27
0
    def get_FPS(self, image, test_interval):
        image_shape = np.array(np.shape(image)[0:2])
        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #---------------------------------------------------------#
        crop_img = letterbox_image(image,
                                   [self.image_size[0], self.image_size[1]])
        #----------------------------------------------------------------------------------#
        #   将RGB转化成BGR,这是因为原始的centernet_hourglass权值是使用BGR通道的图片训练的
        #----------------------------------------------------------------------------------#
        photo = np.array(crop_img, dtype=np.float32)[:, :, ::-1]
        #-----------------------------------------------------------#
        #   图片预处理,归一化。获得的photo的shape为[1, 512, 512, 3]
        #-----------------------------------------------------------#
        photo = np.reshape(
            np.transpose(preprocess_image(photo), (2, 0, 1)),
            [1, self.image_size[2], self.image_size[0], self.image_size[1]])

        with torch.no_grad():
            images = Variable(
                torch.from_numpy(np.asarray(photo)).type(torch.FloatTensor))
            if self.cuda:
                images = images.cuda()
            outputs = self.centernet(images)

            if self.backbone == 'hourglass':
                outputs = [
                    outputs[-1]["hm"].sigmoid(), outputs[-1]["wh"],
                    outputs[-1]["reg"]
                ]
            outputs = decode_bbox(outputs[0], outputs[1], outputs[2],
                                  self.image_size, self.confidence, self.cuda)

            try:
                if self.nms:
                    outputs = np.array(nms(outputs, self.nms_threhold))

                output = outputs[0]
                if len(output) > 0:
                    batch_boxes, det_conf, det_label = output[:, :
                                                              4], output[:,
                                                                         4], output[:,
                                                                                    5]

                    det_xmin, det_ymin, det_xmax, det_ymax = batch_boxes[:,
                                                                         0], batch_boxes[:,
                                                                                         1], batch_boxes[:,
                                                                                                         2], batch_boxes[:,
                                                                                                                         3]
                    top_indices = [
                        i for i, conf in enumerate(det_conf)
                        if conf >= self.confidence
                    ]
                    top_conf = det_conf[top_indices]
                    top_label_indices = det_label[top_indices].tolist()
                    top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                        det_xmin[top_indices], -1), np.expand_dims(
                            det_ymin[top_indices], -1), np.expand_dims(
                                det_xmax[top_indices],
                                -1), np.expand_dims(det_ymax[top_indices], -1)

                    boxes = centernet_correct_boxes(
                        top_ymin, top_xmin, top_ymax, top_xmax,
                        np.array([self.image_size[0], self.image_size[1]]),
                        image_shape)
            except:
                pass

        t1 = time.time()
        for _ in range(test_interval):
            with torch.no_grad():
                outputs = self.centernet(images)

                if self.backbone == 'hourglass':
                    outputs = [
                        outputs[-1]["hm"].sigmoid(), outputs[-1]["wh"],
                        outputs[-1]["reg"]
                    ]
                outputs = decode_bbox(outputs[0], outputs[1], outputs[2],
                                      self.image_size, self.confidence,
                                      self.cuda)

                try:
                    if self.nms:
                        outputs = np.array(nms(outputs, self.nms_threhold))

                    output = outputs[0]
                    if len(output) > 0:
                        batch_boxes, det_conf, det_label = output[:, :
                                                                  4], output[:,
                                                                             4], output[:,
                                                                                        5]

                        det_xmin, det_ymin, det_xmax, det_ymax = batch_boxes[:,
                                                                             0], batch_boxes[:,
                                                                                             1], batch_boxes[:,
                                                                                                             2], batch_boxes[:,
                                                                                                                             3]
                        top_indices = [
                            i for i, conf in enumerate(det_conf)
                            if conf >= self.confidence
                        ]
                        top_conf = det_conf[top_indices]
                        top_label_indices = det_label[top_indices].tolist()
                        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                            det_xmin[top_indices], -1), np.expand_dims(
                                det_ymin[top_indices],
                                -1), np.expand_dims(det_xmax[top_indices],
                                                    -1), np.expand_dims(
                                                        det_ymax[top_indices],
                                                        -1)

                        boxes = centernet_correct_boxes(
                            top_ymin, top_xmin, top_ymax, top_xmax,
                            np.array([self.image_size[0], self.image_size[1]]),
                            image_shape)
                except:
                    pass
        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
Ejemplo n.º 28
0
    def detect_image(self, image_id, image):
        f = open("./input/detection-results/" + image_id + ".txt", "w")
        self.confidence = 0.01
        self.nms_threhold = 0.5

        image_shape = np.array(np.shape(image)[0:2])

        crop_img = letterbox_image(image,
                                   [self.image_size[0], self.image_size[1]])

        # 将RGB转化成BGR,这是因为原始的centernet_hourglass权值是使用BGR通道的图片训练的
        photo = np.array(crop_img, dtype=np.float32)[:, :, ::-1]

        # 图片预处理,归一化
        photo = np.reshape(
            np.transpose(preprocess_image(photo), (2, 0, 1)),
            [1, self.image_size[2], self.image_size[0], self.image_size[1]])

        with torch.no_grad():
            photo = np.asarray(photo)

            images = Variable(torch.from_numpy(photo).type(torch.FloatTensor))
            if self.cuda:
                images = images.cuda()

            outputs = self.centernet(images)
            if self.backbone == 'hourglass':
                outputs = [
                    outputs[-1]["hm"].sigmoid(), outputs[-1]["wh"],
                    outputs[-1]["reg"]
                ]
            outputs = decode_bbox(outputs[0], outputs[1], outputs[2],
                                  self.image_size, self.confidence, self.cuda)

        try:
            if self.nms:
                outputs = np.array(nms(outputs, self.nms_threhold))
        except:
            pass

        output = outputs[0]
        if len(output) <= 0:
            return image

        batch_boxes, det_conf, det_label = output[:, :4], output[:,
                                                                 4], output[:,
                                                                            5]

        # 筛选出其中得分高于confidence的框
        det_xmin, det_ymin, det_xmax, det_ymax = batch_boxes[:,
                                                             0], batch_boxes[:,
                                                                             1], batch_boxes[:,
                                                                                             2], batch_boxes[:,
                                                                                                             3]

        top_indices = [
            i for i, conf in enumerate(det_conf) if conf >= self.confidence
        ]
        top_conf = det_conf[top_indices]
        top_label_indices = det_label[top_indices].tolist()
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            det_xmin[top_indices],
            -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims(
                det_xmax[top_indices],
                -1), np.expand_dims(det_ymax[top_indices], -1)

        # 去掉灰条
        boxes = centernet_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.image_size[0], self.image_size[1]]), image_shape)

        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c)]
            score = str(top_conf[i])

            top, left, bottom, right = boxes[i]
            f.write("%s %s %s %s %s %s\n" %
                    (predicted_class, score[:6], str(int(left)), str(
                        int(top)), str(int(right)), str(int(bottom))))

        f.close()
        return
Ejemplo n.º 29
0
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = letterbox_image(image,
                                   [self.image_size[0], self.image_size[1]])

        # 将RGB转化成BGR,这是因为原始的centernet_hourglass权值是使用BGR通道的图片训练的
        photo = np.array(crop_img, dtype=np.float32)[:, :, ::-1]

        # 图片预处理,归一化
        photo = np.reshape(
            np.transpose(preprocess_image(photo), (2, 0, 1)),
            [1, self.image_size[2], self.image_size[0], self.image_size[1]])

        with torch.no_grad():
            photo = np.asarray(photo)

            images = Variable(torch.from_numpy(photo).type(torch.FloatTensor))
            if self.cuda:
                images = images.cuda()

            outputs = self.centernet(images)
            if self.backbone == 'hourglass':
                outputs = [
                    outputs[-1]["hm"].sigmoid(), outputs[-1]["wh"],
                    outputs[-1]["reg"]
                ]
            outputs = decode_bbox(outputs[0], outputs[1], outputs[2],
                                  self.image_size, self.confidence, self.cuda)

        #-------------------------------------------------------#
        #   对于centernet网络来讲,确立中心非常重要。
        #   对于大目标而言,会存在许多的局部信息。
        #   此时对于同一个大目标,中心点比较难以确定。
        #   使用最大池化的非极大抑制方法无法去除局部框
        #   所以我还是写了另外一段对框进行非极大抑制的代码
        #   实际测试中,hourglass为主干网络时有无额外的nms相差不大,resnet相差较大。
        #-------------------------------------------------------#
        try:
            if self.nms:
                outputs = np.array(nms(outputs, self.nms_threhold))
        except:
            pass

        output = outputs[0]
        if len(output) <= 0:
            return image

        batch_boxes, det_conf, det_label = output[:, :4], output[:,
                                                                 4], output[:,
                                                                            5]
        # 筛选出其中得分高于confidence的框
        det_xmin, det_ymin, det_xmax, det_ymax = batch_boxes[:,
                                                             0], batch_boxes[:,
                                                                             1], batch_boxes[:,
                                                                                             2], batch_boxes[:,
                                                                                                             3]

        top_indices = [
            i for i, conf in enumerate(det_conf) if conf >= self.confidence
        ]
        top_conf = det_conf[top_indices]
        top_label_indices = det_label[top_indices].tolist()
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            det_xmin[top_indices],
            -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims(
                det_xmax[top_indices],
                -1), np.expand_dims(det_ymax[top_indices], -1)

        # 去掉灰条
        boxes = centernet_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.image_size[0], self.image_size[1]]), image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = (np.shape(image)[0] +
                     np.shape(image)[1]) // self.image_size[0]

        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c)]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[int(c)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[int(c)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image
Ejemplo n.º 30
0
    def detect_image(self, image_id, image):
        f = open("./input/detection-results/" + image_id + ".txt", "w")
        self.confidence = 0.01
        self.nms_threhold = 0.5

        image_shape = np.array(np.shape(image)[0:2])
        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #---------------------------------------------------------#
        crop_img = letterbox_image(image,
                                   [self.image_size[0], self.image_size[1]])
        #----------------------------------------------------------------------------------#
        #   将RGB转化成BGR,这是因为原始的centernet_hourglass权值是使用BGR通道的图片训练的
        #----------------------------------------------------------------------------------#
        photo = np.array(crop_img, dtype=np.float32)[:, :, ::-1]
        #-----------------------------------------------------------#
        #   图片预处理,归一化。获得的photo的shape为[1, 512, 512, 3]
        #-----------------------------------------------------------#
        photo = np.reshape(
            np.transpose(preprocess_image(photo), (2, 0, 1)),
            [1, self.image_size[2], self.image_size[0], self.image_size[1]])

        with torch.no_grad():
            images = Variable(
                torch.from_numpy(np.asarray(photo)).type(torch.FloatTensor))
            if self.cuda:
                images = images.cuda()

            outputs = self.centernet(images)
            if self.backbone == 'hourglass':
                outputs = [
                    outputs[-1]["hm"].sigmoid(), outputs[-1]["wh"],
                    outputs[-1]["reg"]
                ]
            #-----------------------------------------------------------#
            #   利用预测结果进行解码
            #-----------------------------------------------------------#
            outputs = decode_bbox(outputs[0], outputs[1], outputs[2],
                                  self.image_size, self.confidence, self.cuda)

            #-------------------------------------------------------#
            #   对于centernet网络来讲,确立中心非常重要。
            #   对于大目标而言,会存在许多的局部信息。
            #   此时对于同一个大目标,中心点比较难以确定。
            #   使用最大池化的非极大抑制方法无法去除局部框
            #   所以我还是写了另外一段对框进行非极大抑制的代码
            #   实际测试中,hourglass为主干网络时有无额外的nms相差不大,resnet相差较大。
            #-------------------------------------------------------#
            try:
                if self.nms:
                    outputs = np.array(nms(outputs, self.nms_threhold))
            except:
                pass

            output = outputs[0]
            if len(output) <= 0:
                return image

            batch_boxes, det_conf, det_label = output[:, :
                                                      4], output[:,
                                                                 4], output[:,
                                                                            5]
            det_xmin, det_ymin, det_xmax, det_ymax = batch_boxes[:,
                                                                 0], batch_boxes[:,
                                                                                 1], batch_boxes[:,
                                                                                                 2], batch_boxes[:,
                                                                                                                 3]
            #-----------------------------------------------------------#
            #   筛选出其中得分高于confidence的框
            #-----------------------------------------------------------#
            top_indices = [
                i for i, conf in enumerate(det_conf) if conf >= self.confidence
            ]
            top_conf = det_conf[top_indices]
            top_label_indices = det_label[top_indices].tolist()
            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                det_xmin[top_indices],
                -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims(
                    det_xmax[top_indices],
                    -1), np.expand_dims(det_ymax[top_indices], -1)

            #-----------------------------------------------------------#
            #   去掉灰条部分
            #-----------------------------------------------------------#
            boxes = centernet_correct_boxes(
                top_ymin, top_xmin, top_ymax, top_xmax,
                np.array([self.image_size[0], self.image_size[1]]),
                image_shape)

        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c)]
            score = str(top_conf[i])

            top, left, bottom, right = boxes[i]
            f.write("%s %s %s %s %s %s\n" %
                    (predicted_class, score[:6], str(int(left)), str(
                        int(top)), str(int(right)), str(int(bottom))))

        f.close()
        return