Ejemplo n.º 1
0
    def _pnet_detect(self, inputs, minsize=20, scale_factor=0.709):
        bboxes = []
        scores = []

        # 以12*12为1个单元,将最小人脸调整成12*12大小,而后图像金字塔检测缩放至12,即从检测多个人脸到最后检测一个人脸
        img = self._img_resize(inputs, 12/minsize)

        # 图像金字得到所有预选框
        while min(img.shape[:2]) >= 12:
            cls, reg = self.pnet.predict(tf.expand_dims(img, 0))

            bbox, score = self._get_box(reg, cls[0, :, :, 1], img.shape[0]/inputs.shape[0])     

            img = self._img_resize(img, scale_factor)

            if len(bbox) == 0:
                continue

            keep = nms(bbox, score, 0.5, 'union')

            bboxes.append(bbox[keep])
            scores.append(score[keep])

        if not bboxes:
            return np.empty((0, 4)), np.empty((0, 1))

        bboxes = np.vstack(bboxes)
        scores = np.hstack(scores)

        # 将金字塔后的图片再进行一次抑制, 此时主要避免重合
        keep = nms(bboxes, scores, 0.7, 'min')
        bboxes = bboxes[keep]
        scores = scores[keep]

        return bboxes, scores
Ejemplo n.º 2
0
    def detOnet(self, img, boxes):

        _boxes = self._ro_net(img, boxes, 48)

        _boxes = utils.nms(_boxes, 0.7)

        _boxes = utils.nms(_boxes, 0.3, is_min=True)

        return _boxes
Ejemplo n.º 3
0
def textline_extract(image, prediction, threshold=0.3):
    h, w, _ = image.shape
    cls = np.array(prediction[0])
    regr = np.array(prediction[1])
    cls_prod = np.array(prediction[2])
    anchor = utils.gen_anchor((int(h / 16), int(w / 16)), 16)
    bbox = utils.bbox_transfor_inv(anchor, regr)
    bbox = utils.clip_box(bbox, [h, w])
    #score > 0.7

    fg = np.where(cls_prod[0, :, 1] > threshold)[0]
    select_anchor = bbox[fg, :]
    select_score = cls_prod[0, fg, 1]
    select_anchor = select_anchor.astype('int32')
    #filter size
    keep_index = utils.filter_bbox(select_anchor, 16)
    #nsm
    select_anchor = select_anchor[keep_index]
    select_score = select_score[keep_index]
    select_score = np.reshape(select_score, (select_score.shape[0], 1))
    nmsbox = np.hstack((select_anchor, select_score))
    keep = utils.nms(nmsbox, 0.3)
    select_anchor = select_anchor[keep]
    select_score = select_score[keep]
    #text line
    textConn = text_connect.TextProposalConnector()
    text = textConn.get_text_lines(select_anchor, select_score, [h, w])
    text = list(text.astype('int32'))
    return text
    def detect_pnet(self,image):
        scale = 1
        w ,h = image.size
        _w,_h = w,h
        min_side_len = min(_w,_h)
        boxes = []
        img = image

        while min_side_len>12:
            img_data = transform(img)
            img_data.unsqueeze_(0)#转换成1,c,h,w
            img_data = img_data.to(self.device)
            cond, offset = self.pnet(img_data)
            offset=offset.detach()#变量能求导,提取变量元素,变为标量,结构(N4HW)
            cond = cond.detach()#结构(N1HW)
            _cond,_offset = cond[0][0].cpu(),offset[0].cpu()
            index = torch.gt(_cond,0.6)
            __cond = _cond[index]#花式索引取出满足条件的置信度的值
            indexs  = torch.nonzero(index)#(N,2)
            __offset = _offset[:,indexs[:,0],indexs[:,1]].T#(N,4)
            indexs,__cond ,__offset= indexs.numpy(),__cond.numpy(),__offset.numpy()
            offset_boxes = self.offset_to_boxes(indexs,__cond,__offset,scale)
            scale *= 0.7#缩放比例这个很关键,搞成0.9有些脸要漏掉
            _w,_h = int(w*scale),int(h*scale)
            min_side_len = min(_w,_h)
            img= img.resize((_w, _h))
            _boxes= utils.nms(offset_boxes,i=0.5,isMin=False)
            boxes.extend(_boxes)
        p_boxes = np.array(boxes)

        return p_boxes
Ejemplo n.º 5
0
    def __pnet_detect(self, img): # any image size can enter fully convolution
        total_boxes = np.array([]) # empty boxes
        w, h = img.size
        min_side_len = min(w, h)

        scale = 1 # initial scale
        while min_side_len > 12: #stop at 12pixel
            img_data = self.__image_transform(img) #img to tensor
            if self.isCuda:
                img_data = img_data.cuda()
            img_data.unsqueeze_(0) # add C dimension

            _cls, _offest,_ = self.pnet(img_data)

            cls = _cls[0][0].cpu().data
            offset = _offest[0].cpu().data
            idxs = torch.gt(cls, p_cls) # compare with confidence threshold
            idx = torch.nonzero(idxs,as_tuple=False)
            boxes = self.__box(idx, offset[:, idxs], cls[idxs], scale)

            boxes = utils.nms(np.array(boxes), p_nms) #perform iou
            scale *= 0.7 # resize
            _w = int(w * scale)
            _h = int(h * scale)

            img = img.resize((_w, _h))
            min_side_len = min(_w, _h)
            if boxes.shape[0] != 0:
                total_boxes = np.vstack([total_boxes,boxes]) if total_boxes.size else boxes

        return total_boxes
Ejemplo n.º 6
0
def show_boxes(curimg, ancs, thresh, name):
    regs = zip(*np.where(ancs[:3,:,:] > thresh))
    cellcostx, cellcosty = float(curimg.shape[0]) / float(ancs.shape[0]), float(curimg.shape[1]) / float(ancs.shape[1])
    curimg = curimg.astype(np.float32)

    rects = []
    for i0, pair in enumerate(regs):
        #non opencv format
        xstep, ystep = float(curimg.shape[0]) / float(ancs.shape[1]), float(curimg.shape[1]) / float(ancs.shape[2])
        xcenter = xstep * pair[1] + xstep * ancs[3 + pair[0] * 4, pair[1], pair[2]]
        ycenter = ystep * pair[2] + ystep * ancs[3 + pair[0] * 4 + 1, pair[1], pair[2]]
        dx = xstep * ancs[3 + pair[0] * 4 + 2, pair[1], pair[2]]
        dy = ystep * ancs[3 + pair[0] * 4 + 3, pair[1], pair[2]]
        A = (int(xcenter - dx / 2), int(ycenter - dy / 2))
        B = (int(xcenter + dx / 2), int(ycenter + dy / 2))
        rects.append(np.array([[A[0], A[1], B[0], B[1], ancs[pair]]]))
    #curimg = (cv2.cvtColor(curimg, cv2.COLOR_BGR2RGB)*255.).astype(int)
    if len(rects) > 0:
        rects = np.concatenate(rects, 0)
        rects = rects[utils.nms(rects, 0.18)]
        rects = rects.astype(int)
        for i0 in range(len(rects[:,0])):
            cv2.rectangle(curimg, (rects[i0,1], rects[i0,0]), (rects[i0,3], rects[i0,2]), (255,0,0), 4)
    #cv2.imshow('img', cv2.cvtColor(curimg, cv2.COLOR_BGR2RGB))
    curimg = cv2.cvtColor(curimg, cv2.COLOR_BGR2RGB)
    #print(np.max(curimg))
    #curimg = (cv2.cvtColor(curimg, cv2.COLOR_BGR2RGB)).astype(int)
    #cv2.imwrite(name, curimg)
    #print(np.max(curimg))
    cv2.imshow('img', curimg)
    cv2.waitKey()
    return 1
Ejemplo n.º 7
0
def is_img(img_cv, color):
    j = 0
    if len(img_cv) != 0:
        print("---1312--------------")
        for i in range (len(img_cv)):
            im_cv_r = cv2.resize(img_cv[i], (1300, 414))
            gray = cv2.cvtColor(im_cv_r, cv2.COLOR_BGR2GRAY)
            equ = cv2.equalizeHist(gray)
            gaussian = cv2.GaussianBlur(gray, (3, 3), 0, 0, cv2.BORDER_DEFAULT)
            median = cv2.medianBlur(gaussian, 3)
            original_image = median
            original_image_size = original_image.shape[:2]
            image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size])
            image_data = image_data[np.newaxis, ...]
            data = json.dumps({"signature_name": "serving_default",
                   "instances": image_data.tolist()})
            headers = {"content-type": "application/json"}
            num_classes=65
            json_response = requests.post(
                'http://tf:port/v1/models/yolov3:predict', data=data, headers=headers)
            predictions = json.loads(json_response.text)['predictions']

            pred_sbbox, pred_mbbox, pred_lbbox =predictions[0]['pred_sbbox'],predictions[0]['pred_mbbox'],predictions[0]['pred_lbbox']
            pred_bbox = np.concatenate([np.reshape(pred_sbbox, (-1, 5 + num_classes)),
                                        np.reshape(pred_mbbox, (-1, 5 + num_classes)),
                                        np.reshape(pred_lbbox, (-1, 5 + num_classes))], axis=0)
            bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.3)
            bboxes = utils.nms(bboxes, 0.45, method='nms')
            if np.array(bboxes).shape[0] > 6:
                image = utils.draw_bbox(im_cv_r, bboxes)
                # print(image)
                name = color +'im' + str(i) + '.jpg'
                path = os.path.join("./pre_out/", name)
                cv2.imwrite(path,image)
                print("-------------")
Ejemplo n.º 8
0
def evaluate(path,nms_threshold,conf_threshold):
    gts = json.load(open('data/val.json'))
    nms_threshold = nms_threshold
    conf_threshold = conf_threshold
    thresholds = np.around(np.arange(0.5,0.76,0.05),2)
    pds = json.load(open(path))
    mAP = 0
    batch_metrics={}
    for th in thresholds:
        batch_metrics[th] = []
    n_gt = 0
    for img in tqdm(gts.keys()):
        pred = torch.tensor(pds[img])
        pred = pred.reshape(-1,5)
        gt = gen_gts(gts[img])
        n_gt += gt.shape[0]
        pred_nms = nms(pred,conf_threshold, nms_threshold)       
        for th in batch_metrics:
            batch_metrics[th].append(cal_tp_per_item(pred_nms,gt,th))
    metrics = {}
    for th in batch_metrics:
        tps,scores= [np.concatenate(x, 0) for x in list(zip(*batch_metrics[th]))]
        precision, recall, AP= ap_per_class(tps, scores, n_gt)
        mAP += np.mean(AP)
        if th in plot:
            metrics['AP/'+str(th)] = np.mean(AP)
            metrics['Precision/'+str(th)] = np.mean(precision)
            metrics['Recall/'+str(th)] = np.mean(recall)
    metrics['mAP'] = mAP/len(thresholds)
    for k in metrics:
        print(k,':',metrics[k])
    return metrics['mAP']
Ejemplo n.º 9
0
def test():
    def truths_length(truths):
        for i in range(50):
            if truths[i][1] == 0:
                return i
        return 50

    model.eval()
    num_classes = model.num_classes
    total = 0.0
    proposals = 0.0
    correct = 0.0
    device = torch.device("cuda" if use_cuda else "cpu")

    if model.net_name() == 'region':  # region_layer
        shape = (0, 0)
    else:
        shape = (model.width, model.height)
    for data, target, org_w, org_h in test_loader:
        print("======")
        data = data.to(device)
        output = model(data)
        all_boxes = get_all_boxes(output,
                                  shape,
                                  conf_thresh,
                                  num_classes,
                                  use_cuda=use_cuda)

        for k in range(len(all_boxes)):
            boxes = all_boxes[k]
            correct_yolo_boxes(boxes, org_w[k], org_h[k], model.width,
                               model.height)
            boxes = np.array(nms(boxes, nms_thresh))
            truths = target[k].view(-1, 5)
            num_gts = truths_length(truths)
            total = total + num_gts
            num_pred = len(boxes)
            if num_pred == 0:
                continue

            proposals += int((boxes[:, 4] > conf_thresh).sum())
            for i in range(num_gts):
                gt_boxes = torch.FloatTensor([
                    truths[i][1], truths[i][2], truths[i][3], truths[i][4],
                    1.0, 1.0, truths[i][0]
                ])
                gt_boxes = gt_boxes.repeat(num_pred, 1).t()
                pred_boxes = torch.FloatTensor(boxes).t()
                best_iou, best_j = torch.max(
                    multi_bbox_ious(gt_boxes, pred_boxes, x1y1x2y2=False), 0)
                # pred_boxes and gt_boxes are transposed for torch.max
                if best_iou > iou_thresh and pred_boxes[6][best_j] == gt_boxes[
                        6][0]:
                    correct += 1

    precision = 1.0 * correct / (proposals + eps)
    recall = 1.0 * correct / (total + eps)
    fscore = 2.0 * precision * recall / (precision + recall + eps)
    logging("correct: %d, precision: %f, recall: %f, fscore: %f" %
            (correct, precision, recall, fscore))
Ejemplo n.º 10
0
 def visualize(self, image_name, depth_name, flow_name, box_name,
               figure_path):
     im, orig_im, dp, orig_dp, fl, orig_fl, box, lb, of = \
         self.data.get_one_sample(image_name, depth_name, flow_name, box_name)
     pred, pred_of, loss = self.predict(im, dp, fl, lb, of)
     pred_box = nms(pred, pred_of, self.data.orig_im_size[0],
                    self.data.orig_im_size[1])
     if figure_path is '':
         self.visualize_groundtruth(orig_im, im, orig_dp, dp, orig_fl, fl,
                                    box, lb, of)
         self.visualize_prediction(im, dp, fl, pred, pred_of, loss)
         self.visualize_box(orig_im, orig_dp, orig_fl, pred_box, loss)
         plt.show()
         plt.close('all')
     else:
         if not os.path.exists(figure_path):
             os.makedirs(figure_path)
         dirs = image_name.split('/')
         sub_dir, image_name = dirs[-2], dirs[-1]
         file_name, file_ext = os.path.splitext(image_name)
         file_id = file_name.split('_')[0]
         figure_prefix = os.path.join(figure_path, sub_dir + '_' + file_id)
         self.visualize_groundtruth(orig_im, im, orig_dp, dp, orig_fl, fl,
                                    box, lb, of, figure_prefix)
         self.visualize_prediction(im, dp, fl, pred, pred_of, loss,
                                   figure_prefix)
         self.visualize_box(orig_im, orig_dp, orig_fl, pred_box, loss,
                            figure_prefix)
Ejemplo n.º 11
0
 def model_reponse(self, data_string, original_image_size):
     channel = implementations.insecure_channel(self.host, int(
         self.port))  # 创建channel凭据
     stub = prediction_service_pb2_grpc.PredictionServiceStub(
         channel._channel)  # 利用.proto文件生成的类创建服务存根
     request = predict_pb2.PredictRequest()  # 请求类型
     request.model_spec.name = self.model_name  # 待评估模型的名称
     request.model_spec.signature_name = 'serving_default'  # 待评估模型的签名
     request.inputs['images'].CopyFrom(
         tf.contrib.util.make_tensor_proto(data_string,
                                           shape=[1, 416, 416,
                                                  3]))  # 输入数据格式转换
     result = stub.Predict(request, 10.0)
     sbbox = np.array(list(result.outputs['out1'].float_val))
     mbbox = np.array(list(result.outputs['out2'].float_val))
     lbbox = np.array(list(result.outputs['out3'].float_val))
     pred_bbox = np.concatenate([
         np.reshape(sbbox, (-1, 85)),
         np.reshape(mbbox, (-1, 85)),
         np.reshape(lbbox, (-1, 85))
     ],
                                axis=0)
     bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, 416,
                                      0.3)
     bboxes = utils.nms(bboxes, 0.15, method='nms')
     return bboxes
Ejemplo n.º 12
0
 def nms_box(self, boxes):
     if boxes.shape[0] == 0:
         return np.array([]).reshape(-1, 6)
     lis_tatal = []
     cls_boxes = boxes[:, 5:]
     index = torch.argmax(cls_boxes, dim=1).float()
     index = index.reshape(-1, 1)
     _boxes = torch.cat((boxes[:, 0:5], index), dim=1)  #(n,6)
     _boxes = _boxes.cpu().detach()
     _boxes = _boxes.numpy()
     # print(_boxes.shape)
     # print(_boxes)
     for i in range(10):
         index = np.where(_boxes[:, 5] == i)
         boxes1 = _boxes[index]
         boxes2 = boxes1.copy()
         boxes2[:, 0] = boxes1[:, 0]
         boxes2[:, 1] = boxes1[:, 1] - boxes1[:, 3] * 0.5
         boxes2[:, 2] = boxes1[:, 2] - boxes1[:, 4] * 0.5
         boxes2[:, 3] = boxes2[:, 1] + boxes1[:, 3]
         boxes2[:, 4] = boxes2[:, 2] + boxes1[:, 4]
         boxes2[:, 5] = boxes1[:, 5]
         # print(boxes2)
         nms_boxes1 = utils.nms(boxes2, i=0.3, isMin=False)  #大
         # print(nms_boxes1)
         # nms_boxes1 = utils.nms(boxes2, i=0.3, isMin=False)#iou设为0.3两头鹿要丢一头,因为iou达到了0.55
         if nms_boxes1.shape[0] > 0:
             lis_tatal.extend(nms_boxes1)
     # print(lis_tatal)
     nms_boxes = np.stack(lis_tatal)
     print(nms_boxes)
     return nms_boxes
Ejemplo n.º 13
0
    def __detect_pnet(self, image):

        boxes = []
        img = image
        w, h = img.size
        min_side_len = min(w, h)
        scale = 1

        while min_side_len > 12:
            img_data = self.__image_transform(img)
            img_data.unsqueeze_(0)
            img_data = img_data.to(self.device)
            _cls, _offset, _landmark = self.pnet(img_data)
            cls, offset, landmark = _cls[0][0].cpu().data, _offset[0].cpu().data, _landmark[0].cpu().data
            idxs = torch.nonzero(torch.gt(cls, 0.6))
            boxes.extend(_box(idxs, offset, landmark, cls[idxs[:, 0], idxs[:, 1]], scale))

            if len(boxes) == 0:
                return np.array([])

            scale *= 0.7
            _w, _h = int(w * scale), int(h * scale)
            img = img.resize((_w, _h))
            min_side_len = min(_w, _h)

        return nms(np.stack(boxes), 0.6)
Ejemplo n.º 14
0
    def predict(self):
        np.set_printoptions(threshold=np.inf)
        image_path = './414162.jpg'
        image = np.array(cv2.imread(image_path))
        image_shape = image.shape
        print("image_shape: ", image_shape)
        image = np.copy(image)
        image_data = utils.image_preprocess(image,
                                            [self.input_size, self.input_size])
        image_data = image_data[np.newaxis, ...]

        pred_bbox = self.sess.run([self.pred_bbox],
                                  feed_dict={
                                      self.input: image_data,
                                      self.training: False
                                  })
        pred_bbox = np.array(pred_bbox[0])
        pred_bbox = utils.postprocess_boxes(pred_bbox, image_shape, 416, 0.5)
        print("pred_bbox shape: ", pred_bbox.shape)

        pred_bbox = utils.nms(pred_bbox, 0.45)
        print("pred_bbox after: ", pred_bbox)

        image = utils.draw_bbox(image, pred_bbox, show_label=True)
        cv2.imwrite('./test.jpg', image)
Ejemplo n.º 15
0
    def decode(self, loc_preds, cls_preds, input_size):
        """
        Decode outputs back to bounding box locations and class labels.

        Args:
          loc_preds: (tensor) predicted locations, sized [#anchors, 4].
          cls_preds: (tensor) predicted class labels, sized [#anchors, #classes].
          input_size: (int/tuple) model input size of (w,h).

        Returns:
          boxes: (tensor) decode box locations, sized [#obj,4].
          labels: (tensor) class labels for each box, sized [#obj,].
        """
        CLS_THRESH = 0.5
        NMS_THRESH = 0.5

        input_size = torch.FloatTensor([input_size,input_size]) if isinstance(input_size, int) \
            else torch.FloatTensor(input_size)
        anchor_boxes = self._get_anchor_boxes(input_size)

        loc_xy = loc_preds[:, :2]
        loc_wh = loc_preds[:, 2:]

        xy = loc_xy * anchor_boxes[:, 2:] + anchor_boxes[:, :2]
        wh = loc_wh.exp() * anchor_boxes[:, 2:]
        boxes = torch.cat([xy - wh / 2, xy + wh / 2], 1)  # [#anchors,4]

        score, labels = cls_preds.sigmoid().max(1)  # [#anchors,]
        ids = score > CLS_THRESH
        ids = ids.nonzero().squeeze()  # [#obj,]
        keep = nms(boxes[ids], score[ids], threshold=NMS_THRESH)
        return boxes[ids][keep], labels[ids][keep]
Ejemplo n.º 16
0
    def forward(self, input, thresh, anchors):
        # thresh 计算置信度的时候要达到的阈值
        # 通过网络得到输出NCHW
        output_13, output_26, output_52 = self.net(input.to(device))
        # 通过过滤方法,得到置信度大于阈值的位置
        # 得到置信度大于阈值的位置-idxs_13:大于1的数量,位置,例如:[[0,6,4,2],[0, 6, 5, 2]],shape:[12,4]
        # 位置上的值:大于1的数量,5+cls。shape:[12,85]
        idxs_13, vecs_13 = self._filter(output_13, thresh)
        # 得到 x1, y1, x2, y2, c 置信度, cls 类别, n 那个照片
        boxes_13 = self._parse(idxs_13, vecs_13, 32, anchors[13])

        idxs_26, vecs_26 = self._filter(output_26, thresh)
        boxes_26 = self._parse(idxs_26, vecs_26, 16, anchors[26])

        idxs_52, vecs_52 = self._filter(output_52, thresh)
        boxes_52 = self._parse(idxs_52, vecs_52, 8, anchors[52])

        boxes_all = torch.cat([boxes_13, boxes_26, boxes_52], dim=0)

        # 同一张图片得不同分类分开坐NMS
        last_boxes = []
        for n in range(input.size(0)):
            n_boxes = []
            boxes_n = boxes_all[boxes_all[:, 6] == n]
            print(boxes_n)
            for cls in range(cfg.class_num):
                boxes_c = boxes_n[boxes_n[:, 5] == cls]
                if boxes_c.size(0) > 0:
                    n_boxes.extend(utils.nms(boxes_c, 0.3))
                else:
                    pass
            last_boxes.append(torch.stack(n_boxes))

        return last_boxes
Ejemplo n.º 17
0
    def forward(self, face_conf, face_locdata):
        priors = pyramidAnchors(640)

        face_confdata_0, _ = torch.max(face_conf[:, :, 0:3], dim=2, keepdim=True)
        face_confdata_1 = face_conf[:, :, 3:4]
        face_confdata = F.softmax(torch.cat((face_confdata_0, face_confdata_1), dim=2), dim=2)      # [n, prior_num, 2]
        conf_pred = face_confdata.transpose(2, 1)



        num = face_conf.size(0)
        output = torch.zeros(num, self.top_k, 5)

        prs = torch.Tensor(priors[0]).to(self.device)
        for i in range(1, len(priors)):
            prs = torch.cat((prs, torch.Tensor(priors[i]).to(self.device)), 0)     # [prior_num, 4]


        for i in range(num):
            conf_scores = conf_pred[i].clone()
            c_mask = conf_scores[0].gt(self.confidence_thred)
            scores = conf_scores[0][c_mask]

            if scores.dim() == 0:
                continue

            decoded_boxes = decode(face_locdata[i], prs)
            l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
            boxes = decoded_boxes[l_mask].view(-1, 4)
            ids, count = nms(boxes, scores, self.nms_thred, self.top_k)

            output[i, :count] = \
                torch.cat((scores[ids[:count]].unsqueeze(1),
                           boxes[ids[:count]]), 1)
        return output
Ejemplo n.º 18
0
    def __pnet_detect(self, image):

        boxes = []

        img = image
        w, h = img.size
        min_side_len = min(w, h)

        scale = 1

        while min_side_len > 12:
            img_data = self.__image_transform(img)
            if self.isCuda:
                img_data = img_data.cuda()
            img_data.unsqueeze_(0)

            _cls, _offest = self.pnet(img_data)

            cls, offest = _cls[0][0].cpu().data, _offest[0].cpu().data
            idxs = torch.nonzero(torch.gt(cls, 0.6))

            for idx in idxs:
                boxes.append(
                    self.__box(idx, offest, cls[idx[0], idx[1]], scale))

            scale *= 0.7
            _w = int(w * scale)
            _h = int(h * scale)

            img = img.resize((_w, _h))
            min_side_len = min(_w, _h)

        return utils.nms(np.array(boxes), 0.5)
Ejemplo n.º 19
0
    def save_json(self, epoch, test_split, nms_mode, nms_thresh):
        predictions_path = os.path.join(self.out_dir, test_split, str(epoch))
        try:
            os.makedirs(predictions_path)
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise

        if nms_mode == "new":
            preds_after_nms = nms(self.preds, nms_thresh)
        elif nms_mode == "standard":
            preds_after_nms = standard_nms(self.preds, nms_thresh)
        else:
            print(
                "Error: invalid NMS mode specified, must be 'standard' or 'new'"
            )
            sys.exit()

        print("Saving prediction json at epoch: " + str(epoch) + "...")
        for f, pred in preds_after_nms.items():
            try:
                os.makedirs(os.path.join(predictions_path, f))
            except OSError as e:
                if e.errno != errno.EEXIST:
                    raise

            with open(
                    os.path.join(predictions_path, f, "results_spotting.json"),
                    "w") as outfile:
                json.dump(pred, outfile)
        return predictions_path
Ejemplo n.º 20
0
    def yuNetDetection(self, frame):
        if self.init == 0:
            frameWidth, frameHeight = frame.shape[:2]
            self.pb = PriorBox(input_shape=(640, 480),
                               output_shape=(frameHeight, frameWidth))
            self.init = 1

        blob = cv2.dnn.blobFromImage(frame, size=(640, 480))
        outputNames = ['loc', 'conf', 'iou']
        self.detector.setInput(blob)
        loc, conf, iou = self.detector.forward(outputNames)
        dets = self.pb.decode(np.squeeze(loc, axis=0), np.squeeze(conf,
                                                                  axis=0),
                              np.squeeze(iou, axis=0))
        idx = np.where(dets[:, -1] > self.confidence)[0]
        dets = dets[idx]

        if dets.shape[0]:
            facess = nms(dets, self.threshold)
        else:
            facess = ()
            return facess
        faces = np.array(facess[:, :4])
        faces = faces.astype(np.int)
        faceStartXY = faces[:, :2]
        faceEndXY = faces[:, 2:4]
        faceWH = faceEndXY - faceStartXY
        faces = np.hstack((faceStartXY, faceWH))
        # scores = facess[:, -1]
        return faces
Ejemplo n.º 21
0
def main(_argv):
    input_layer = tf.keras.layers.Input([FLAGS.size, FLAGS.size, 3])
    feature_maps = YOLOv3(input_layer)

    bbox_tensors = []
    for i, fm in enumerate(feature_maps):
        bbox_tensor = decode(fm, i)
        bbox_tensors.append(bbox_tensor)

    model = tf.keras.Model(input_layer, bbox_tensors)
    # model.summary()
    utils.load_weights(model, FLAGS.weights)

    test_img = tf.image.decode_image(open(FLAGS.image, 'rb').read(),
                                     channels=3)
    img_size = test_img.shape[:2]
    test_img = tf.expand_dims(test_img, 0)
    test_img = utils.transform_images(test_img, FLAGS.size)

    pred_bbox = model.predict(test_img)
    pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
    pred_bbox = tf.concat(pred_bbox, axis=0)
    boxes = utils.postprocess_boxes(pred_bbox, img_size, FLAGS.size, 0.3)
    boxes = utils.nms(boxes, 0.45, method='nms')

    original_image = cv2.imread(FLAGS.image)
    img = utils.draw_outputs(original_image, boxes)
    cv2.imwrite(FLAGS.output, img)
Ejemplo n.º 22
0
def rcnn_detect(imidx, imdb, rcnn_model):

    d = rcnn_load_cached_pool5_features(rcnn_model.cache_name, imdb.name,
                                        imidx)
    d['feat'] = rcnn_pool5_to_fcX(d['feat'], rcnn_model.opts.layer, rcnn_model)
    # boxes = selective_search(img, ks = 500)
    # boxes = boxes.swapaxes(0, 1).swapaxes(2, 3)
    # feat = rcnn_extract_features(img, boxes, rcnn_model)
    # feat = rcnn_scale_features(feat, rcnn_model.training_opts.feat_norm_mean)
    scores = rcnn_model.classifier.predict_proba(d['feat'].astype('f'))
    # scores = feat * rcnn_model.detectors.W + rcnn_model.detectors.B

    scores_idx = np.argmax(scores, 1)
    num_classes = len(rcnn_model.classes)
    dets = [[] for _ in xrange(num_classes)]
    for i in xrange(1, num_classes):
        # I = np.where(scores[:, i] > thresh)
        I = np.where(scores_idx == i)[0]
        if I.size == 0:
            continue
        scored_boxes = np.concatenate((d['boxes'][I, :], scores[I, i].reshape(
            (scores[I, i].size, 1))), 1)
        keep = nms(scored_boxes, 0.3)
        dets[i] = scored_boxes[keep, :]

    return dets
Ejemplo n.º 23
0
    def forward(self, input, thresh, anchors):
        output_13, output_26, output_52 = self.net(input)

        idxs_13, vecs_13 = self._filter(output_13, thresh)
        boxes_13 = self._parase(idxs_13, vecs_13, 32, anchors[13])

        idxs_26, vecs_26 = self._filter(output_26, thresh)
        boxes_26 = self._parase(idxs_26, vecs_26, 16, anchors[26])

        idxs_52, vecs_52 = self._filter(output_52, thresh)
        boxes_52 = self._parase(idxs_52, vecs_52, 8, anchors[52])

        boxes_all = torch.cat([boxes_13, boxes_26, boxes_52],dim=0)

        last_boxes = []
        #0: 第几张图片
        #1:第几个框
        #2:框的坐标
        for n in range(input.size(0)):
            n_boxes=[]
            boxes_n = boxes_all[boxes_all[:,6] == n]
            for cls in range(cfg.CLASS_NUM):
                boxes_c = boxes_n[boxes_n[:,5] == cls]
                if boxes_c.size(0) > 0:
                    n_boxes.extend(nms(boxes_c, 0.3))
                else:
                    pass

            last_boxes.append(torch.stack(n_boxes))

        return last_boxes
    def detect(self, im, conf_thresh=0.7):
        im_resized = cv2.resize(im, self.__shape)
        im_rgb = cv2.cvtColor(im_resized, cv2.COLOR_BGR2RGB)
        im_torch = torch.from_numpy(im_rgb.transpose(
            2, 0, 1)).float().div(255.0).unsqueeze(0)
        im_torch = im_torch.to(torch.device("cuda"))
        output = self.__net(im_torch)

        boxes = get_all_boxes(output,
                              self.__shape,
                              conf_thresh,
                              self.__net.num_classes,
                              use_cuda=True)[0]

        boxes = nms(boxes, self.__nms_thresh)

        result = []
        w = im.shape[1]
        h = im.shape[0]
        for i in range(len(boxes)):
            box = boxes[i]

            x1 = int(round((box[0] - box[2] / 2.0) * w))
            y1 = int(round((box[1] - box[3] / 2.0) * h))
            x2 = int(round((box[0] + box[2] / 2.0) * w))
            y2 = int(round((box[1] + box[3] / 2.0) * h))

            x1 = 0 if x1 < 0 else x1
            y1 = 0 if y1 < 0 else y1
            x2 = w - 1 if x2 >= w else x2
            y2 = h - 1 if y2 >= h else y2

            result.append([x1, y1, x2, y2])

        return result
Ejemplo n.º 25
0
 def filter_results(self, scores, boxes):
     # in order to avoid custom C++ extensions
     # we use an NMS implementation written purely
     # on python. This implementation is faster on the
     # CPU, which is why we run this part on the CPU
     cpu_device = torch.device("cpu")
     boxes = boxes[0]
     scores = scores[0]
     boxes = boxes.to(cpu_device)
     scores = scores.to(cpu_device)
     selected_box_probs = []
     labels = []
     for class_index in range(1, scores.size(1)):
         probs = scores[:, class_index]
         mask = probs > self.score_threshold
         probs = probs[mask]
         subset_boxes = boxes[mask, :]
         box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1)
         box_probs = nms(box_probs, self.nms_threshold)
         selected_box_probs.append(box_probs)
         labels.append(
             torch.full((box_probs.size(0), ),
                        class_index,
                        dtype=torch.int64))
     selected_box_probs = torch.cat(selected_box_probs)
     labels = torch.cat(labels)
     return selected_box_probs[:, :4], labels, selected_box_probs[:, 4]
Ejemplo n.º 26
0
    def parse(self, idxs, vecs, t, anchors):
        if idxs.size(0) == 0:
            return torch.Tensor([])
        anchors = torch.Tensor(anchors)

        n = idxs[:, 0]  # 所属的图片
        a = idxs[:, 3]  # 建议框
        conf = vecs[:, 0]  # 置信度

        # (索引值+偏移量)*416/13
        cy = (idxs[:, 1].float() + vecs[:, 2]) * t  # 原图的中心点y
        cx = (idxs[:, 2].float() + vecs[:, 1]) * t  # 原图的中心点x

        w = anchors[a, 0] * torch.exp(vecs[:, 3])
        h = anchors[a, 1] * torch.exp(vecs[:, 4])
        x1 = cx - w / 2
        y1 = cy - h / 2
        x2 = cx + w / 2
        y2 = cy + h / 2
        name = vecs[:, 5:]

        if name.shape[0] == 0:
            name = name.reshape(-1)
        else:
            name = torch.argmax(name, dim=1).float()

        np_boxes = torch.stack([n.float(), conf, x1, y1, x2, y2, name],
                               dim=1).numpy()
        nms = utils.nms(np_boxes, cls_nms, False)

        return nms
Ejemplo n.º 27
0
    def detect_pnet(self, image):
        scale = 1
        w, h = image.size
        _w, _h = w, h
        min_side_len = min(_w, _h)
        boxes = []
        img = image

        while min_side_len > 12:
            img_data = transform(img)
            img_data.unsqueeze_(0)
            img_data = img_data.to(self.device)
            # print(img_data.size())
            cond, offset = self.pnet(img_data)
            offset = offset.detach()  #
            cond = cond.detach()
            _cond, _offset = cond[0][0].cpu(), offset[0].cpu()
            indexs = torch.nonzero(torch.gt(_cond, 0.6))
            for index in indexs:
                boxes.append(
                    self.offset_to_boxes(index, _cond[index[0], index[1]],
                                         _offset, scale))
            scale *= 0.7
            _w, _h = int(w * scale), int(h * scale)
            min_side_len = min(_w, _h)
            img = img.resize((_w, _h))
        if len(boxes) == 0:
            return np.array([])
        p_boxes = utils.nms(np.array(boxes), i=0.5, isMin=False)

        return p_boxes
Ejemplo n.º 28
0
    def pNetDetect(self, imge):
        boxes = []
        w, h = imge.size

        minSideLen = min(w, h)
        scale = 1

        while minSideLen > 12:
            imgData = self.imgTransform(imge)
            imgData = imgData.unsqueeze(0)
            imgData = imgData.to(self.device)

            cons, offsets, _ = self.pNet(imgData)
            idxs = torch.nonzero(torch.gt(cons[0][0], self.pCon))
            boxes.extend(self.returnBox(idxs, offsets[0], cons[0][0], scale))

            scale *= self.pScale

            _w = int(w * scale)
            _h = int(h * scale)

            imge = imge.resize((_w, _h))
            minSideLen = min(_w, _h)

            del imgData, cons, offsets, idxs, _
            gc.collect()

        boxes = torch.stack(boxes)
        return utils.nms(boxes, self.pNms)
Ejemplo n.º 29
0
    def __call__(self, loc, score, anchor, img_size, scale=1.):
        if self.mode == "training":
            n_pre_nms = self.n_train_pre_nms
            n_post_nms = self.n_train_post_nms
        else:
            n_pre_nms = self.n_test_pre_nms
            n_post_nms = self.n_test_post_nms
        # 将RPN网络预测结果转化成建议框
        roi = loc2bbox(anchor, loc)

        # 利用slice进行分割,防止建议框超出图像边缘
        roi[:, slice(0, 4, 2)] = np.clip(roi[:, slice(0, 4, 2)], 0,
                                         img_size[1])
        roi[:, slice(1, 4, 2)] = np.clip(roi[:, slice(1, 4, 2)], 0,
                                         img_size[0])

        # 宽高的最小值不可以小于16
        min_size = self.min_size * scale
        # 计算高宽
        ws = roi[:, 2] - roi[:, 0]
        hs = roi[:, 3] - roi[:, 1]
        # 防止建议框过小
        keep = np.where((hs >= min_size) & (ws >= min_size))[0]
        roi = roi[keep, :]
        score = score[keep]
        # 取出成绩最好的一些建议框
        order = score.ravel().argsort()[::-1]
        if n_pre_nms > 0:
            order = order[:n_pre_nms]
        roi = roi[order, :]
        roi = nms(roi, self.nms_thresh)
        roi = torch.Tensor(roi)
        roi = roi[:n_post_nms]
        return roi
Ejemplo n.º 30
0
def test():
    anchors = config.ANCHORS

    transform = config.test_transforms

    dataset = YOLODataset(
        "COCO/train.csv",
        "COCO/images/images/",
        "COCO/labels/labels_new/",
        S=[13, 26, 52],
        anchors=anchors,
        transform=transform,
    )
    S = [13, 26, 52]
    scaled_anchors = torch.tensor(anchors) / (
        1 / torch.tensor(S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2))
    loader = DataLoader(dataset=dataset, batch_size=1, shuffle=True)
    for x, y in loader:
        boxes = []

        for i in range(y[0].shape[1]):
            anchor = scaled_anchors[i]
            print(anchor.shape)
            print(y[i].shape)
            boxes += cells_to_bboxes(y[i],
                                     is_preds=False,
                                     S=y[i].shape[2],
                                     anchors=anchor)[0]
        boxes = nms(boxes,
                    iou_threshold=1,
                    threshold=0.7,
                    box_format="midpoint")
        print(boxes)
        plot_image(x[0].permute(1, 2, 0).to("cpu"), boxes)
Ejemplo n.º 31
0
def rcnn_detect(imidx, imdb, rcnn_model):

    d = rcnn_load_cached_pool5_features(rcnn_model.cache_name, imdb.name, imidx)
    d['feat'] = rcnn_pool5_to_fcX(d['feat'], rcnn_model.opts.layer, rcnn_model)
    # boxes = selective_search(img, ks = 500)
    # boxes = boxes.swapaxes(0, 1).swapaxes(2, 3)
    # feat = rcnn_extract_features(img, boxes, rcnn_model)
    # feat = rcnn_scale_features(feat, rcnn_model.training_opts.feat_norm_mean)
    scores = rcnn_model.classifier.predict_proba(d['feat'].astype('f'))
    # scores = feat * rcnn_model.detectors.W + rcnn_model.detectors.B

    scores_idx = np.argmax(scores, 1)
    num_classes = len(rcnn_model.classes)
    dets = [[] for _ in xrange(num_classes)]
    for i in xrange(1, num_classes):
        # I = np.where(scores[:, i] > thresh)
        I = np.where(scores_idx == i)[0]
        if I.size == 0:
            continue
        scored_boxes = np.concatenate((d['boxes'][I, :], scores[I, i].reshape((scores[I, i].size, 1))), 1)
        keep = nms(scored_boxes, 0.3)
        dets[i] = scored_boxes[keep, :]

    return dets
Ejemplo n.º 32
0
coder = Coder()

image = np.random.rand(224, 224, 3)
data = np.reshape(image, (1, 3, 224, 224))
gt = coder._generate_boxes(1)
inputs = prepare_inputs(data, gt)

caffe.set_mode_gpu()
net = solver.net
utils.set_inputs(net, **inputs)
for step in range(100):
    solver.step(1)
    delta = unpack_outputs(net.blobs['preds_reshape'].data)
    probs = unpack_outputs(net.blobs['final_probs'].data)

    bboxes = np.zeros((100, 5))
    bboxes[:, 0:4] = coder.decode(delta)
    bboxes[:, 4] = probs[:, 1]
    dets = utils.nms(bboxes)

    if step % 10 == 0: 
        ax = utils.draw_image(image)
        utils.vis_bboxes(ax, dets * 224, 'red')
        utils.vis_bboxes(ax, gt * 224, 'green')
        plt.axis('off')
        plt.tight_layout()
        plt.savefig('%04d.png'%step)
        plt.close()