Beispiel #1
0
 def decoder(self,
             ims,
             anchors,
             cls_score,
             bbox_pred,
             thresh=0.6,
             nms_thresh=0.2,
             test_conf=None):
     if test_conf is not None:
         thresh = test_conf
     bboxes = self.box_coder.decode(anchors, bbox_pred, mode='xywht')
     bboxes = clip_boxes(bboxes, ims)
     scores = torch.max(cls_score, dim=2, keepdim=True)[0]
     keep = (scores >= thresh)[0, :, 0]
     if keep.sum() == 0:
         return [torch.zeros(1), torch.zeros(1), torch.zeros(1, 5)]
     scores = scores[:, keep, :]
     anchors = anchors[:, keep, :]
     cls_score = cls_score[:, keep, :]
     bboxes = bboxes[:, keep, :]
     # NMS
     anchors_nms_idx = nms(
         torch.cat([bboxes, scores], dim=2)[0, :, :], nms_thresh)
     nms_scores, nms_class = cls_score[0, anchors_nms_idx, :].max(dim=1)
     output_boxes = torch.cat(
         [bboxes[0, anchors_nms_idx, :], anchors[0, anchors_nms_idx, :]],
         dim=1)
     return [nms_scores, nms_class, output_boxes]
    def post_process(self, im, sim_ops, scale_factor=1):
        """
		MUST HAVE FUNCTION IN ALL NETWORKS !!!! 
		Post-processing of the results from network. This function can be used to visualize data from hardware.  
		"""
        im = im[:, :, (2, 1, 0)]
        cls_score = sim_ops[0]
        cls_prob = sim_ops[1]
        bbox_pred = sim_ops[2]
        rois = sim_ops[3]
        boxes = rois[:, 1:5] / scale_factor
        scores = cls_prob
        box_deltas = bbox_pred
        pred_boxes = bbox_transform_inv(boxes, box_deltas, False)
        pred_boxes = self._clip_boxes(pred_boxes, im.shape)

        fig, ax = plt.subplots(figsize=(12, 12))
        ax.imshow(im, aspect='equal')
        CONF_THRESH = 0.6
        NMS_THRESH = 0.4
        for cls_ind, cls in enumerate(self.classes[1:]):
            cls_ind += 1  # because we skipped background
            cls_boxes = pred_boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
            cls_scores = scores[:, cls_ind]
            dets = np.hstack(
                (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
            keep = nms(dets, NMS_THRESH)
            dets = dets[keep, :]
            self._vis_detections(im, cls, dets, ax, thresh=CONF_THRESH)
Beispiel #3
0
def im_detect(model, src, target_sizes, use_gpu=True, conf=None):
    if isinstance(target_sizes, int):
        target_sizes = [target_sizes]
    if len(target_sizes) == 1:
        return single_scale_detect(model,
                                   src,
                                   target_size=target_sizes[0],
                                   use_gpu=use_gpu,
                                   conf=conf)
    else:
        ms_dets = None
        for ind, scale in enumerate(target_sizes):
            cls_dets = single_scale_detect(model,
                                           src,
                                           target_size=scale,
                                           use_gpu=use_gpu,
                                           conf=conf)
            if cls_dets.shape[0] == 0:
                continue
            if ms_dets is None:
                ms_dets = cls_dets
            else:
                ms_dets = np.vstack((ms_dets, cls_dets))
        if ms_dets is None:
            return np.zeros((0, 7))
        cls_dets = np.hstack(
            (ms_dets[:, 2:7], ms_dets[:, 1][:, np.newaxis])).astype(np.float32,
                                                                    copy=False)
        keep = nms(cls_dets, 0.1)
        return ms_dets[keep, :]
Beispiel #4
0
    def __call__(self, image):
        """
        :param image: rgb image
        :return: {'label_name':[x1,y1,x2,y2,score],...}
        """
        boxes = np.empty((0, 4))
        scores = np.empty((0, self.labels_numb))

        for img, p in self.__chips__(image):
            b = [p[0], p[1], p[0], p[1]]
            boxes_t, scores_t = self.__net__(img)
            boxes_t += list(map(float, b))
            boxes = np.vstack((boxes, boxes_t))
            scores = np.vstack((scores, scores_t))

        # filter bounding boxes
        results = dict()
        for j in range(1, self.labels_numb):
            inds = np.where(scores[:, j] > self.thresh)[0]
            if len(inds) == 0:
                continue
            c_bboxes = boxes[inds]
            c_scores = scores[inds, j]
            c_dets = np.hstack(
                (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
            keeped = nms(c_dets, 0.45, force_cpu=0)
            c_dets = c_dets[keeped, :]
            results[self.labels_name[j]] = c_dets
        return results
    def predict(self, img, threshold=0.6):
        if type(img) == str:
            img = cv2.imread(img)
        boxes, scores = self.sess.run(self.net.get_output(),
                                      feed_dict={'input:0': img})
        scale = ([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        boxes = boxes[0]
        scores = scores[0]
        boxes *= scale
        label_text = []
        labels = []
        bboxes_out = []
        scores_out = []
        classes_out = []
        # scale each detection back up to the image
        for j in range(1, self.num_classes + 1):
            inds = np.where(scores[:, j] > 0.45)[0]
            c_bboxes = boxes[inds]
            c_scores = scores[inds, j]
            c_dets = np.hstack(
                (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
            keep = nms(c_dets, 0.45, force_cpu=True)
            c_dets = c_dets[keep, :]

            for i in range(len(c_dets)):
                box = [c_dets[i][0], c_dets[i][1], c_dets[i][2], c_dets[i][3]]
                bboxes_out.append(box)
                scores_out.append(c_dets[i][4])
                classes_out.append(j)
        for cls_id in classes_out:
            if cls_id in self.categories:
                class_name = self.categories[cls_id]['name']
                label_text.append(class_name)
        return bboxes_out, label_text, classes_out, scores_out, c_dets
Beispiel #6
0
def nms_process(num_classes, i, scores, boxes, cfg, min_thresh, all_boxes,
                max_per_image):
    for j in range(1, num_classes):  # ignore the bg(category_id=0)
        inds = np.where(scores[:, j] > min_thresh)[0]
        if len(inds) == 0:
            all_boxes[j][i] = np.empty([0, 5], dtype=np.float32)
            continue
        c_bboxes = boxes[inds]
        c_scores = scores[inds, j]
        c_dets = np.hstack((c_bboxes, c_scores[:,
                                               np.newaxis])).astype(np.float32,
                                                                    copy=False)

        soft_nms = cfg.test_cfg.soft_nms
        keep = nms(c_dets, cfg.test_cfg.iou, force_cpu=soft_nms)
        # keep only the highest boxes
        keep = keep[:cfg.test_cfg.keep_per_class]
        c_dets = c_dets[keep, :]
        all_boxes[j][i] = c_dets
    if max_per_image > 0:
        image_scores = np.hstack(
            [all_boxes[j][i][:, -1] for j in range(1, num_classes)])
        if len(image_scores) > max_per_image:
            image_thresh = np.sort(image_scores)[-max_per_image]
            for j in range(1, num_classes):
                keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                all_boxes[j][i] = all_boxes[j][i][keep, :]
Beispiel #7
0
def get_proposal(cls_score_pred, bbox_pred, image_raw_size, anchor_list,
                 num_anchors):
    nms_thresh = float(0.7)

    cls_score_pred = cls_score_pred[:, :, :, num_anchors:]
    scores = cls_score_pred.reshape(-1)
    bbox_pred = bbox_pred.reshape(-1, 4)
    #get the origin box
    bboxes = bbox_inv(anchor_list, bbox_pred, image_raw_size)

    #get top n
    bois, scores = get_top_n(bboxes, scores, top=12000)
    #nms
    bois = bois.reshape(-1, 4).astype(np.float32)
    scores = scores.reshape(-1, 1).astype(np.float32)
    keep = nms(np.hstack((bois, scores)), nms_thresh)

    post_nms_topN = 2000
    keep = keep[:post_nms_topN]

    bois = bois[keep]
    scores = scores[keep]
    old_bois = bois
    #get batch size
    zeros = np.zeros((bois.shape[0], 1), dtype=np.float32)
    bois = np.hstack((zeros, bois))

    return bois, scores
Beispiel #8
0
def im_detect(img, net, detector, transform, thresh=0.01):
    with torch.no_grad():
        t0 = time.time()
        w, h = img.shape[1], img.shape[0]
        x = transform(img)[0].unsqueeze(0)
        x = x.cuda()
        t1 = time.time()
        output = net(x)
        boxes, scores = detector.forward(output)
        t2 = time.time()
        max_conf, max_id = scores[0].topk(1, 1, True, True)
        pos = max_id > 0
        if len(pos) == 0:
            return np.empty((0, 6))
        boxes = boxes[0][pos.view(-1, 1).expand(len(pos), 4)].view(-1, 4)
        scores = max_conf[pos].view(-1, 1)
        max_id = max_id[pos].view(-1, 1)
        inds = scores > thresh
        if len(inds) == 0:
            return np.empty((0, 6))
        boxes = boxes[inds.view(-1, 1).expand(len(inds), 4)].view(-1, 4)
        scores = scores[inds].view(-1, 1)
        max_id = max_id[inds].view(-1, 1)
        c_dets = torch.cat((boxes, scores, max_id.float()), 1).cpu().numpy()
        img_classes = np.unique(c_dets[:, -1])
        output = None
        flag = False
        for cls in img_classes:
            cls_mask = np.where(c_dets[:, -1] == cls)[0]
            image_pred_class = c_dets[cls_mask, :]
            keep = nms(image_pred_class[:, :5],
                       cfg.TEST.NMS_OVERLAP,
                       force_cpu=False)
            keep = keep[:50]
            image_pred_class = image_pred_class[keep, :]
            if not flag:
                output = image_pred_class
                flag = True
            else:
                output = np.concatenate((output, image_pred_class), axis=0)

        if output is not None:
            output[:, 0:2][output[:, 0:2] < 0] = 0
            output[:, 2:4][output[:, 2:4] > 1] = 1
            # scale = np.array([w, h, w, h])
            # output[:, :4] = output[:, :4] * scale

            scale = np.array([512, 512, 512, 512])
            output[:, :4] = output[:, :4] * scale
            roi_offset = np.array((1100, 700))
            output[:, :2] += roi_offset
            output[:, 2:4] += roi_offset

        t3 = time.time()
        print("transform_t:", round(t1 - t0, 3), "detect_time:",
              round(t2 - t1, 3), "nms_time:", round(t3 - t2, 3))

    return output
Beispiel #9
0
 def facebox_detect(self, img_raw):
     img = np.float32(img_raw)
     im_height, im_width, _ = img.shape
     scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])    # w, h, w, h
     scale_coords =torch.Tensor(np.tile([img.shape[1], img.shape[0]], 5))
     img -= (104, 117, 123)
     img = img.transpose(2, 0, 1)
     img = torch.from_numpy(img).unsqueeze(0)
     img = img.to(self.device)
     scale = scale.to(self.device)
     scale_coords = scale_coords.to(self.device)
 
     loc, conf, coords = self.model(img)  # forward pass
     print("bbbb", loc.shape, conf.shape, coords.shape)
     priorbox = PriorBox(self.cfg, image_size=(im_height, im_width))
     priors = priorbox.forward()
     priors = priors.to(self.device)
     prior_data = priors.data
     boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance'])
     coords = decode_f(coords, self.cfg['variance'])    # may XXXXXXXXX
     boxes = boxes * scale
     coords = coords * scale_coords
     coords = coords.data.squeeze(0).cpu().numpy()
     #coords = coords.cpu().detach().squeeze(0).numpy()    # coords is  grad variable, can't trans to numpy direct
     boxes = boxes.cpu().numpy()
     # print("aaaa",boxes.shape, coords.shape)
     scores = conf.data.cpu().numpy()[:, 1]
 
     # ignore low scores
     inds = np.where(scores > self.cfg['confidence_threshold'])[0]
     boxes = boxes[inds]
     scores = scores[inds]
     coords = coords[inds]
 
     # keep top-K before NMS
     order = scores.argsort()[::-1][:self.cfg['top_k']]
     boxes = boxes[order]
     scores = scores[order]
     coords = coords[order]
 
     # do NMS
     dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
     #keep = py_cpu_nms(dets, args.nms_threshold)
     keep = nms(dets, self.cfg['nms_threshold'],False)    # change nms for coords, make code simple
     dets = dets[keep, :]
     coords = coords[keep, :]
 
     # keep top-K faster NMS
     boxes_score = dets[:self.cfg['keep_top_k'], :]
     coords = coords[:self.cfg['keep_top_k'], :]
     # boxes_score[:, :-1] += 1
     # remove the locat is not positive
     po_ng = np.array([np.any(box<0) for box in boxes_score])
     boxes_score = boxes_score[np.where(po_ng==False)]
     coords = coords[np.where(po_ng==False)]
     boxes_score_coords = np.hstack((boxes_score, coords))
     # print("boxes_score_coords:", boxes_score_coords, boxes_score_coords.shape)
     return boxes_score_coords
Beispiel #10
0
def get_results(prediction, confidence, num_classes, nms_conf = 0.4):
    st = time.time()
    conf_mask = (prediction[:,:,4] > confidence).float().unsqueeze(2)
    prediction = prediction*conf_mask
    try:
        ind_nz = torch.nonzero(prediction[:,:,4]).transpose(0,1).contiguous()
    except:
        return 0
    box_a = prediction.new(prediction.shape)
    box_a[:,:,0] = (prediction[:,:,0] - prediction[:,:,2]/2)
    box_a[:,:,1] = (prediction[:,:,1] - prediction[:,:,3]/2)
    box_a[:,:,2] = (prediction[:,:,0] + prediction[:,:,2]/2) 
    box_a[:,:,3] = (prediction[:,:,1] + prediction[:,:,3]/2)
    prediction[:,:,:4] = box_a[:,:,:4]
    batch_size = prediction.size(0)
    output = prediction.new(1, prediction.size(2) + 1)
    write = False

    for ind in range(batch_size):
        st = time.time()
        image_pred = prediction[ind]
        #Get the class having maximum score, and the index of that class
        #Get rid of num_classes softmax scores 
        #Add the class index and the class score of class having maximum score
        max_conf, max_conf_score = torch.max(image_pred[:,5:5+ num_classes], 1)
        max_conf = max_conf.float().unsqueeze(1)
        max_conf_score = max_conf_score.float().unsqueeze(1)
        seq = (image_pred[:,:5], max_conf, max_conf_score)
        image_pred = torch.cat(seq, 1)
        #Get rid of the zero entries
        non_zero_ind =  (torch.nonzero(image_pred[:,4]))
 
        image_pred_ = image_pred[non_zero_ind.squeeze(),:].view(-1,7)
        #Get the various classes detected in the image
        try:
            img_classes = unique(image_pred_[:,-1])
        except:
             continue

        for cls in img_classes:
            cls_mask = image_pred_*(image_pred_[:,-1] == cls).float().unsqueeze(1)
            class_mask_ind = torch.nonzero(cls_mask[:,-2]).squeeze()
            
            image_pred_class = image_pred_[class_mask_ind].view(-1,7)
            keep = nms(image_pred_class.cpu().numpy(), nms_conf, force_cpu=True)
            image_pred_class = image_pred_class[keep]
            # print(image_pred_class)
            batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind)
            seq = batch_ind, image_pred_class
            if not write:
                output = torch.cat(seq,1)
                write = True
            else:
                out = torch.cat(seq, 1)
                output = torch.cat((output, out))
    
    return output
Beispiel #11
0
def test_net(net,img,name,detector,transform,priors,top_k=200,thresh=0.01):

    scale = torch.Tensor([img.shape[1], img.shape[0],
                          img.shape[1], img.shape[0]])
    with torch.no_grad():
        x = transform(img).unsqueeze(0)
        x = x.cuda()
        scale = scale.cuda()

    out = net(x,test=True)
    boxes, scores = detector.forward(out, priors)
    boxes = boxes[0]
    scores = scores[0]

    boxes *= scale
    boxes = boxes.cpu().numpy()
    scores = scores.cpu().numpy()

    flag = True
    for j in range(1, 21):
        inds = np.where(scores[:, j] > thresh)[0]
        if len(inds) == 0:
            continue
        c_bboxes = boxes[inds]
        c_scores = scores[inds, j]
        c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype(
            np.float32, copy=False)
        keep = nms(c_dets, 0.45, force_cpu=True)
        c_dets = c_dets[keep, :]
        cls = np.ones(c_dets.shape[0])*j
        c_dets = np.column_stack((c_dets,cls))
        if flag:
            result = c_dets
            flag = False
        else:
            result = np.vstack((result,c_dets))

    reslut = list(result)
    rgb_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist()
    plt.imshow(rgb_image)
    currentAxis = plt.gca()

    for (x1,y1,x2,y2,s,cls) in result:
        x1 = int(x1)
        y1 = int(y1)
        x2 = int(x2)
        y2 = int(y2)
        cls = int(cls)
        title = "%s:%.2f" % (CLASSES[int(cls)], s)
        coords = (x1,y1), x2-x1+1, y2-y1+1
        color = colors[cls]
        currentAxis.add_patch(plt.Rectangle(*coords, fill=False, edgecolor=color, linewidth=2))
        currentAxis.text(x1, y1, title, bbox={'facecolor': color, 'alpha': 0.5})
    plt.axis('off')
    plt.savefig(name.split('.')[0]+'.eps',format='eps',bbox_inches = 'tight')
    plt.show()
    def predict(self, img_name):
        img = np.float32(cv2.imread(img_name, cv2.IMREAD_COLOR))
        resize = 1
        if resize != 1:
            img = cv2.resize(img,
                             None,
                             None,
                             fx=resize,
                             fy=resize,
                             interpolation=cv2.INTER_LINEAR)
        im_height, im_width, _ = img.shape
        scale = torch.Tensor(
            [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(self.device)
        scale = scale.to(self.device)

        _t = {'forward_pass': Timer(), 'misc': Timer()}
        _t['forward_pass'].tic()
        loc, conf = self.net(img)  # forward pass
        _t['forward_pass'].toc()
        _t['misc'].tic()
        priorbox = PriorBox(cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(self.device)
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
        boxes = boxes * scale / resize
        boxes = boxes.cpu().numpy()
        scores = conf.data.cpu().numpy()[:, 1]

        # ignore low scores
        inds = np.where(scores > self.confidence_threshold)[0]
        boxes = boxes[inds]
        scores = scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1][:self.top_k]
        boxes = boxes[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                                copy=False)
        #keep = py_cpu_nms(dets, self.nms_threshold)
        keep = nms(dets, self.nms_threshold, force_cpu=self.cpu)
        dets = dets[keep, :]

        # keep top-K faster NMS
        dets = dets[:self.keep_top_k, :]
        _t['misc'].toc()

        return dets
Beispiel #13
0
    def predict(cls, input):
        """For the input, do the predictions and return them.
        Args:
            input (a pandas dataframe): The data on which to do the predictions. There will be
                one prediction per row in the dataframe"""
        net, priors, _preprocess, detector = cls.get_model()
        np_image = np.array(input)
        image = np_image[:, :, ::-1].copy()

        loop_start = time.time()
        w, h = image.shape[1], image.shape[0]
        img = _preprocess(image).unsqueeze(0)
        if cfg.test_cfg.cuda:
            img = img.cuda()
        scale = torch.Tensor([w, h, w, h])
        out = net(img)
        boxes, scores = detector.forward(out, priors)
        boxes = (boxes[0] * scale).cpu().numpy()
        scores = scores[0].cpu().numpy()
        allboxes = []
        for j in range(1, cfg.model.m2det_config.num_classes):
            inds = np.where(scores[:, j] > cfg.test_cfg.score_threshold)[0]
            if len(inds) == 0:
                continue
            c_bboxes = boxes[inds]
            c_scores = scores[inds, j]
            c_dets = np.hstack(
                (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
            soft_nms = cfg.test_cfg.soft_nms
            keep = nms(
                c_dets, cfg.test_cfg.iou, force_cpu=soft_nms
            )  #min_thresh, device_id=0 if cfg.test_cfg.cuda else None)
            keep = keep[:cfg.test_cfg.keep_per_class]
            c_dets = c_dets[keep, :]
            allboxes.extend([_.tolist() + [j] for _ in c_dets])

        loop_time = time.time() - loop_start
        allboxes = np.array(allboxes)
        boxes = allboxes[:, :4]
        scores = allboxes[:, 4]
        cls_inds = allboxes[:, 5]

        #        response_str = ''
        #        response_str = response_str+'\n'.join(['pos:{}, ids:{}, score:{:.3f}'.format('(%.1f,%.1f,%.1f,%.1f)' % (o[0],o[1],o[2],o[3]) \
        #               ,labels[int(oo)],ooo) for o,oo,ooo in zip(boxes,cls_inds,scores)])
        #        #print (response_str)
        #        return response_str
        #
        response = {}
        response['pos'] = list(boxes.reshape(-1))
        response['cls_inds'] = list(cls_inds)
        response['scores'] = list(scores)

        return response
Beispiel #14
0
	def post_process(self, im, sim_ops, scale_factor=1):
		"""
		MUST HAVE FUNCTION IN ALL NETWORKS !!!! 
		Post-processing of the results from network. This function can be used to visualize data from hardware.  
		self.post_process(im, [cls_score, cls_prob, bbox_pred, rois], scale_factor)	
		"""
                print("cls_score:\n")
                print(sim_ops[0])
                print("cls_prob:\n")
                print(sim_ops[1])
                print("bbox_pred:\n")
                print(sim_ops[2])
                print("rois:\n")
                print(sim_ops[3])
                print("scale_factor:\n")
                print(scale_factor)

		im = im[:, :, (2, 1, 0)]

		cls_score = sim_ops[0]
		cls_score = convert_to_float_py(cls_score, self._layer_map[77]['fl'])

		cls_prob = sim_ops[1]

		bbox_pred = sim_ops[2]
		bbox_pred = convert_to_float_py(bbox_pred, self._layer_map[78]['fl'])

		rois = sim_ops[3]
		boxes = rois[:, 1:5] / scale_factor

		# ABINASH ONLY FOR DEBUG DELETE IT 
		scores = cls_prob
		#scores = cls_score
		box_deltas = bbox_pred
		pred_boxes = bbox_transform_inv(boxes, box_deltas, False)
		pred_boxes = self._clip_boxes(pred_boxes, im.shape)	

		fig, ax = plt.subplots(figsize=(12, 12))
		ax.imshow(im, aspect='equal')
		CONF_THRESH = 0.6
		NMS_THRESH = 0.4
		for cls_ind, cls in enumerate(self.classes[1:]):
			cls_ind += 1  # because we skipped background
			cls_boxes = pred_boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]

                        print("TL DEBUG, pred_boxes shape: %s, cls_boxes shape: %s, scores shape: %s, cls_scores index: %d\n" %(str(pred_boxes.shape),str(cls_boxes.shape),str(scores.shape), cls_ind))

			cls_scores = scores[:, cls_ind]
                        print(cls_scores)
			dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
			keep = nms(dets, NMS_THRESH)
			dets = dets[keep, :]
			self._vis_detections(im, cls, dets, ax, thresh=CONF_THRESH)
		plt.show()		
Beispiel #15
0
def detect_face(net, img, resize):
    if resize != 1:
        img = cv2.resize(img,
                         None,
                         None,
                         fx=resize,
                         fy=resize,
                         interpolation=cv2.INTER_LINEAR)
    im_height, im_width, _ = img.shape
    scale = torch.Tensor(
        [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
    img -= (104, 117, 123)
    img = img.transpose(2, 0, 1)
    #img = img[[2, 1, 0], :, :]
    img = torch.from_numpy(img).unsqueeze(0)
    if args.cuda:
        img = img.cuda()
        scale = scale.cuda()

    out = net(img)  # forward pass
    priorbox = PriorBox(cfg, out[2], (im_height, im_width), phase='test')
    priors = priorbox.forward()
    if args.cuda:
        priors = priors.cuda()
    loc, conf, _ = out
    print(loc.size(), conf.size())
    prior_data = priors.data
    boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
    boxes = boxes * scale / resize
    boxes = boxes.cpu().numpy()
    scores = conf.data.cpu().numpy()[:, 1]

    # ignore low scores
    inds = np.where(scores > args.confidence_threshold)[0]
    boxes = boxes[inds]
    scores = scores[inds]

    # keep top-K before NMS
    order = scores.argsort()[::-1][:args.top_k]
    boxes = boxes[order]
    scores = scores[order]
    #print(boxes)

    # do NMS
    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
    keep = nms(dets, args.nms_threshold, force_cpu=args.cpu)
    dets = dets[keep, :]
    #print(dets)

    # keep top-K faster NMS
    dets = dets[:args.keep_top_k, :]
    return dets
def im_detect_batch(imgs,
                    img_info,
                    net,
                    detector,
                    thresh=0.01,
                    num_classes=10):
    num_images = len(imgs)
    boxes_batch = [[[] for _ in range(num_images)] for _ in range(num_classes)]
    with torch.no_grad():
        t1 = time.time()
        x = torch.from_numpy(np.array(imgs))
        print(x.shape)
        x = x.cuda()
        output = net(x)
        t4 = time.time()
        boxes, scores = detector.forward(output)
        t2 = time.time()
        for k in range(boxes.size(0)):
            i = k
            boxes_ = boxes[k]
            scores_ = scores[k]
            img_wh = img_info[k]
            boxes_ = boxes_.cpu().numpy()
            scores_ = scores_.cpu().numpy()
            scale = np.array([img_wh[0], img_wh[1], img_wh[0], img_wh[1]])
            boxes_ *= scale
            for j in range(1, num_classes):
                inds = np.where(scores_[:, j] > thresh)[0]
                if len(inds) == 0:
                    boxes_batch[j][i] = np.empty([0, 5], dtype=np.float32)
                    continue
                c_bboxes = boxes_[inds]
                c_scores = scores_[inds, j]
                c_dets = np.hstack(
                    (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32,
                                                                copy=False)
                keep = nms(c_dets, cfg.TEST.NMS_OVERLAP, force_cpu=True)
                keep = keep[:50]
                c_dets = c_dets[keep, :]
                boxes_batch[j][i] = c_dets
        t3 = time.time()
        detect_time = t2 - t4
        nms_time = t3 - t2
        forward_time = t4 - t1
        fps_time = t3 - t1
        print(
            'im_detect: forward_time: {:.3f}s, detect_time {:.3f}s, nms_time: {:.3f}s,  fps_time: {:.3f}s'
            .format(forward_time, detect_time, nms_time, fps_time))
    return boxes_batch, fps_time, forward_time, detect_time, nms_time
Beispiel #17
0
    def forward_torch_nms(self, arm_loc_data, arm_conf_data, odm_loc_data, odm_conf_data, prior_data):
        """
        Deprecated.
        Args:
            loc_data: (tensor) Loc preds from loc layers
                Shape: [batch,num_priors*4]
            conf_data: (tensor) Shape: Conf preds from conf layers
                Shape: [batch*num_priors,num_classes]
            prior_data: (tensor) Prior boxes and variances from priorbox layers
                Shape: [1,num_priors,4]
        """
        loc_data = odm_loc_data
        conf_data = odm_conf_data

        arm_object_conf = arm_conf_data.data[:, :, 1:]
        no_object_index = arm_object_conf <= self.objectness_threshold
        conf_data[no_object_index.expand_as(conf_data)] = 0

        num = loc_data.size(0)  # batch size
        num_priors = prior_data.size(0)
        output = torch.zeros(num, self.num_classes, self.top_k, 5)
        conf_preds = conf_data.view(num, num_priors,
                                    self.num_classes).transpose(2, 1)

        # Decode predictions into bboxes.
        for i in range(num):
            default = decode(arm_loc_data[i], prior_data, self.variance)
            default = center_size(default)
            decoded_boxes = decode(loc_data[i], default, self.variance)
            # For each class, perform nms
            conf_scores = conf_preds[i].clone()
            for cl in range(1, self.num_classes):
                c_mask = conf_scores[cl].gt(self.confidence_threshold)
                scores = conf_scores[cl][c_mask]
                if scores.size(0) == 0:
                    continue
                l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
                boxes = decoded_boxes[l_mask].view(-1, 4)
                # idx of highest scoring and non-overlapping boxes per class
                ids, count = nms(boxes, scores, self.nms_threshold, self.top_k)
                output[i, cl, :count] = \
                    torch.cat((scores[ids[:count]].unsqueeze(1),
                               boxes[ids[:count]]), 1)
        flt = output.contiguous().view(num, -1, 5)
        _, idx = flt[:, :, 0].sort(1, descending=True)
        _, rank = idx.sort(1)
        flt[(rank < self.keep_top_k).unsqueeze(-1).expand_as(flt)].fill_(0)
        return output
    def predict(self, img):
        _t = {'im_detect': Timer(), 'misc': Timer()}
        scale = torch.Tensor(
            [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])

        with torch.no_grad():
            x = self.transform(img).unsqueeze(0)
            if self.cuda:
                x = x.cuda()
                scale = scale.cuda()

        _t['im_detect'].tic()
        out = net(x)  # forward pass
        boxes, scores = self.detection.forward(out, priors)
        detect_time = _t['im_detect'].toc()
        boxes = boxes[0]
        scores = scores[0]

        # scale each detection back up to the image
        boxes *= scale
        boxes = boxes.cpu().numpy()
        scores = scores.cpu().numpy()
        _t['misc'].tic()
        all_boxes = [[] for _ in range(num_classes)]

        for j in range(1, num_classes):
            inds = np.where(scores[:, j] > self.thresh)[0]
            if len(inds) == 0:
                all_boxes[j] = np.zeros([0, 5], dtype=np.float32)
                continue
            c_bboxes = boxes[inds]
            c_scores = scores[inds, j]
            #print(scores[:, j])
            c_dets = np.hstack(
                (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
            # keep = nms(c_bboxes,c_scores)

            keep = nms(c_dets, 0.4, force_cpu=args.cpu)
            c_dets = c_dets[keep, :]
            all_boxes[j] = c_dets

        nms_time = _t['misc'].toc()
        total_time = detect_time + nms_time

        #print('total time: ', total_time)
        return all_boxes, total_time
Beispiel #19
0
    def get_bbox(self, img_raw):
        img = torch.FloatTensor(img_raw).to(self.device)
        im_height, im_width, _ = img.size()
        scale = torch.FloatTensor([im_width, im_height, im_width,
                                   im_height]).to(self.device)
        img -= torch.FloatTensor((104, 117, 123)).to(self.device)
        img = img.permute(2, 0, 1).unsqueeze(0)

        loc, conf = self.net(img)  # forward pass

        priorbox = PriorBox(cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(self.device)
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
        boxes = boxes * scale
        boxes = boxes.cpu().numpy()
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]

        # ignore low scores
        inds = np.where(scores > 0.05)[0]
        boxes = boxes[inds]
        scores = scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1][:5000]
        boxes = boxes[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                                copy=False)
        # keep = py_cpu_nms(dets, args.nms_threshold)
        keep = nms(dets, 0.3, force_cpu=False)
        dets = dets[keep, :]

        # keep top-K faster NMS
        dets = dets[:750, :]
        bboxes = []
        for b in dets:
            if b[4] < 0.65:
                continue
            b = list(map(int, b))

            bboxes.append((b[0], b[1], b[2], b[3]))

        return bboxes
    def facebox_detect(self, img_raw):
        img = np.float32(img_raw)
        im_height, im_width, _ = img.shape
        scale = torch.Tensor(
            [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(self.device)
        scale = scale.to(self.device)

        loc, conf = self.model(img)  # forward pass
        priorbox = PriorBox(self.cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(self.device)
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance'])
        boxes = boxes * scale
        boxes = boxes.cpu().numpy()
        scores = conf.data.cpu().numpy()[:, 1]

        # ignore low scores
        inds = np.where(scores > self.cfg['confidence_threshold'])[0]
        boxes = boxes[inds]
        scores = scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1][:self.cfg['top_k']]
        boxes = boxes[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                                copy=False)
        #keep = py_cpu_nms(dets, args.nms_threshold)
        keep = nms(dets, self.cfg['nms_threshold'], False)
        dets = dets[keep, :]

        # keep top-K faster NMS
        boxes_score = dets[:self.cfg['keep_top_k'], :]
        # boxes_score[:, :-1] += 1
        # remove the locat is not positive
        po_ng = np.array([np.any(box < 0) for box in boxes_score])
        boxes_score = boxes_score[np.where(po_ng == False)]

        return boxes_score
Beispiel #21
0
def imgCallback(msg):
    global captureImage
    captureImage = msg

    try:
        cv_img = CvBridge().imgmsg_to_cv2(captureImage, "bgr8")

        w, h = cv_img.shape[1], cv_img.shape[0]
        img = _preprocess(cv_img).unsqueeze(0)

        if cfg.test_cfg.cuda:
            img = img.cuda()

        scale = torch.Tensor([w, h, w, h])
        out = net(img)

        boxes, scores = detector.forward(out, priors)
        boxes = (boxes[0] * scale).cpu().numpy()
        scores = scores[0].cpu().numpy()
        allboxes = []
        for j in range(1, cfg.model.m2det_config.num_classes):
            inds = np.where(scores[:, j] > cfg.test_cfg.score_threshold)[0]
            if len(inds) == 0:
                continue
            c_bboxes = boxes[inds]
            c_scores = scores[inds, j]
            c_dets = np.hstack(
                (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
            soft_nms = cfg.test_cfg.soft_nms
            keep = nms(
                c_dets, cfg.test_cfg.iou, force_cpu=soft_nms
            )  #min_thresh, device_id=0 if cfg.test_cfg.cuda else None)
            keep = keep[:cfg.test_cfg.keep_per_class]
            c_dets = c_dets[keep, :]
            allboxes.extend([_.tolist() + [j] for _ in c_dets])

        allboxes = np.array(allboxes)
        boxes = allboxes[:, :4]
        scores = allboxes[:, 4]
        cls_inds = allboxes[:, 5]

        pub_result(cv_img, boxes, scores, cls_inds)

    except CvBridgeError as e:
        print(e)
Beispiel #22
0
    def Predict(self, im_path, thresh=0.5, visualize=False, output_img_path="output.jpg"):
        loop_start = time.time()
        image = cv2.imread(im_path, cv2.IMREAD_COLOR)
        w, h = image.shape[1], image.shape[0]
        img = self.system_dict["_preprocess"](image).unsqueeze(0)
        if self.system_dict["cfg"].test_cfg.cuda:
            img = img.cuda()
        scale = torch.Tensor([w, h, w, h])
        out = self.system_dict["net"](img)
        boxes, scores = self.system_dict["detector"].forward(out, self.system_dict["priors"])
        boxes = (boxes[0] * scale).cpu().numpy()
        scores = scores[0].cpu().numpy()
        allboxes = []
        for j in range(1, self.system_dict["num_classes"]):
            inds = np.where(scores[:, j] > self.system_dict["cfg"].test_cfg.score_threshold)[0]
            if len(inds) == 0:
                continue
            c_bboxes = boxes[inds]
            c_scores = scores[inds, j]
            c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype(
                np.float32, copy=False)
            soft_nms = self.system_dict["cfg"].test_cfg.soft_nms
            # min_thresh, device_id=0 if cfg.test_cfg.cuda else None)
            keep = nms(c_dets, self.system_dict["cfg"].test_cfg.iou, force_cpu=soft_nms)
            keep = keep[:self.system_dict["cfg"].test_cfg.keep_per_class]
            c_dets = c_dets[keep, :]
            allboxes.extend([_.tolist() + [j] for _ in c_dets])

        loop_time = time.time() - loop_start
        print("Inference time 2 - {} sec".format(loop_time));

        allboxes = np.array(allboxes)
        boxes = allboxes[:, :4]
        scores = allboxes[:, 4]
        cls_inds = allboxes[:, 5]
        im2show = self.draw_detection(image, boxes, scores, cls_inds, -1, thresh)

        if im2show.shape[0] > 1100:
            im2show = cv2.resize(im2show,
                                 (int(1000. * float(im2show.shape[1]) / im2show.shape[0]), 1000))
        if visualize:
            cv2.imshow('test', im2show)
            cv2.waitKey(2000)

        cv2.imwrite(output_img_path, im2show)
Beispiel #23
0
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors):
  """A simplified version compared to fast/er RCNN
     For details please see the technical report
  """
  if type(cfg_key) == bytes:
      cfg_key = cfg_key.decode('utf-8')
  pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
  post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
  nms_thresh = cfg[cfg_key].RPN_NMS_THRESH

  # Get the scores and bounding boxes
  '''
  scores = tf.reshape(rpn_cls_prob, shape=(-1, 2))
  scores = scores[:, 1:]
  '''
  scores = rpn_cls_prob[:, :, :, num_anchors:]
  scores = scores.reshape((-1, 1))
  
  rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4))
  proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
  proposals = clip_boxes(proposals, im_info[:2])

  # Pick the top region proposals
  order = scores.ravel().argsort()[::-1]
  if pre_nms_topN > 0:
    order = order[:pre_nms_topN]
  proposals = proposals[order, :]
  scores = scores[order]

  # Non-maximal suppression
  keep = nms(np.hstack((proposals, scores)), nms_thresh)

  # Pick th top region proposals after NMS
  if post_nms_topN > 0:
    keep = keep[:post_nms_topN]
  proposals = proposals[keep, :]
  scores = scores[keep]

  # Only support single image as input
  batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
  blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

  return blob, scores
Beispiel #24
0
    def predict_on_video(self, v_f):
        cap = cv2.VideoCapture(v_f)

        while cap.isOpened():
            ok, frame = cap.read()
            if ok:
                img = frame
                boxes, scores = self.predict_on_img(frame)
                # print(boxes.shape)
                # print(scores.shape)
                # scale each detection back up to the image
                tic = time.time()
                for j in range(1, self.num_classes):
                    # print(max(scores[:, j]))
                    inds = np.where(scores[:, j] > 0.6)[0]
                    # conf > 0.6
                    if inds is None:
                        continue
                    c_bboxes = boxes[inds]
                    c_scores = scores[inds, j]
                    c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype(
                        np.float32, copy=False)
                    keep = nms(c_dets, 0.6)
                    c_dets = c_dets[keep, :]
                    c_bboxes = c_dets[:, :4]

                    # print(c_bboxes.shape)
                    # print(c_bboxes.shape[0])
                    if c_bboxes.shape[0] != 0:
                        # print(c_bboxes.shape)
                        # print('{}: {}'.format(j, c_bboxes))
                        for box in c_bboxes:
                            label = self.label_map_list[j-1]
                            cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 1, 0)
                            cv2.putText(img, label, (box[0], box[1]), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                        (0, 255, 0),
                                        1, cv2.LINE_AA)
                # print('post process time: {}'.format(time.time() - tic))
                cv2.imshow('rr', frame)
                cv2.waitKey(1)
            else:
                print('Done')
                exit(0)
    def detect_faces(self, img, resize=1.0):
        im_height, im_width, _ = img.shape
        scale = torch.Tensor(
            [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(self.device)
        scale = scale.to(self.device)

        loc, conf = self.net(img)  # forward pass
        priorbox = PriorBox(cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(self.device)
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
        boxes = boxes * scale / resize
        boxes = boxes.cpu().numpy()
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]

        # ignore low scores
        inds = np.where(scores > self.args.confidence_threshold)[0]
        boxes = boxes[inds]
        scores = scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1][:self.args.top_k]
        boxes = boxes[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                                copy=False)
        # keep = py_cpu_nms(dets, self.args.nms_threshold)
        keep = nms(dets, self.args.nms_threshold, force_cpu=self.args.cpu)
        dets = dets[keep, :]

        # keep top-K faster NMS
        dets = dets[:self.args.keep_top_k, :]

        return dets
    def nms_process(self, network_output, scale, im_height, im_width) -> List[TrackingRegion]:
        priorbox = PriorBox(cfg, network_output[2], (im_height, im_width), phase='test')
        priors = priorbox.forward()
        if self.use_gpu:
            priors = priors.cuda()
        loc, conf, _ = network_output
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
        boxes = boxes * scale
        boxes = boxes.cpu().numpy()
        scores = conf.data.cpu().numpy()[:, 1]

        # ignore low scores
        inds = np.where(scores > self.score_min)[0]
        boxes = boxes[inds]
        scores = scores[inds]

        # keep top-K before NMS, top_k = 5
        order = scores.argsort()[::-1][:5000]
        boxes = boxes[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
        keep = nms(dets, 0.3, force_cpu=False)
        dets = dets[keep, :]

        # keep top-K faster NMS
        dets = dets[:750, :]

        regions = []

        for i in range(dets.shape[0]):
            face_region = TrackingRegion()
            face_region.set_rect(left=dets[i, 0], top=dets[i, 1], right=dets[i, 2], bottom=dets[i, 3])
            face_region.confidence = dets[i, 4]
            face_region.data["class_id"] = "face"
            regions.append(face_region)

        return regions
Beispiel #27
0
    def _single_infer(self, img=None, save_to=''):

        img_copy = img.copy()
        im_height, im_width, _ = img.shape
        img = np.float32(img)
        with torch.no_grad():
            scale = torch.Tensor([im_width, im_height, im_width,
                                  im_height]).to(self.device)
            img -= (104, 117, 123)
            img = img.transpose(2, 0, 1)
            img = torch.from_numpy(img).unsqueeze(0).to(faceu.device)
            loc, conf = self.model(img)  # forward pass
        priors = clib.PriorBox(img_size=(im_height,
                                         im_width)).forward().to(self.device)
        boxes = decode(loc.data.squeeze(0), priors.data)
        boxes = boxes * scale
        boxes = boxes.cpu().numpy()
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]

        # ignore low scores
        inds = np.where(scores > self.args.conf_thres)[0]
        boxes, scores = boxes[inds], scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1][:self.args.top_k]
        boxes, scores = boxes[order], scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                                copy=False)
        keep = nms(dets, self.args.nms_thres, force_cpu=self.args.cpu)
        dets = dets[keep, :]

        # keep top-K faster NMS
        dets = dets[:self.args.keep_top_k, :]

        if self.args.save_flag and len(save_to) > 0:
            self._easy_vis(img_copy, dets, save_to)
        return dets
    def detect(self, image):

        loop_start = time.time()
        w, h = image.shape[1], image.shape[0]
        img = self._preprocess(image).unsqueeze(0)
        if cfg.test_cfg.cuda:
            img = img.cuda()
        scale = torch.Tensor([w, h, w, h])
        out = self.net(img)
        boxes, scores = self.detector.forward(out, self.priors)
        boxes = (boxes[0] * scale).cpu().numpy()
        scores = scores[0].cpu().numpy()
        allboxes = []
        count = 0
        # for j in [2, 6, 7, 14, 15]:
        for j in range(1, len(ch_labels)):
            inds = np.where(scores[:, j] > cfg.test_cfg.score_threshold)[0]
            if len(inds) == 0:
                continue
            c_bboxes = boxes[inds]
            c_scores = scores[inds, j]
            c_dets = np.hstack(
                (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
            soft_nms = cfg.test_cfg.soft_nms
            keep = nms(c_dets, cfg.test_cfg.iou, force_cpu=soft_nms)
            keep = keep[:cfg.test_cfg.keep_per_class]
            c_dets = c_dets[keep, :]
            allboxes.extend([_.tolist() + [j] for _ in c_dets])

        loop_time = time.time() - loop_start
        allboxes = np.array(allboxes)
        boxes = allboxes[:, :4]
        scores = allboxes[:, 4]
        cls_inds = allboxes[:, 5]
        infos, im2show = draw_detection(image, boxes, scores, cls_inds, -1,
                                        args.thresh)
        return infos, im2show
Beispiel #29
0
def test_net(save_folder,
             net,
             detector,
             cuda,
             testset,
             transform,
             max_per_image=300,
             thresh=0.005):

    if not os.path.exists(save_folder):
        os.mkdir(save_folder)
    # dump predictions and assoc. ground truth to text file for now
    num_images = len(testset)
    num_classes = (21, 81)[args.dataset == 'COCO']
    all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]

    _t = {'im_detect': Timer(), 'misc': Timer()}
    det_file = os.path.join(save_folder, 'detections.pkl')

    if args.retest:
        f = open(det_file, 'rb')
        all_boxes = pickle.load(f)
        print('Evaluating detections')
        testset.evaluate_detections(all_boxes, save_folder)
        return

    for i in range(num_images):
        img = testset.pull_image(i)
        x = Variable(transform(img).unsqueeze(0), volatile=True)
        if cuda:
            x = x.cuda()

        _t['im_detect'].tic()
        out = net(x=x, test=True)  # forward pass
        boxes, scores = detector.forward(out, priors)
        detect_time = _t['im_detect'].toc()
        boxes = boxes[0]
        scores = scores[0]

        boxes = boxes.cpu().numpy()
        scores = scores.cpu().numpy()
        # scale each detection back up to the image
        scale = torch.Tensor(
            [img.shape[1], img.shape[0], img.shape[1],
             img.shape[0]]).cpu().numpy()
        boxes *= scale

        _t['misc'].tic()

        for j in range(1, num_classes):
            inds = np.where(scores[:, j] > thresh)[0]
            if len(inds) == 0:
                all_boxes[j][i] = np.empty([0, 5], dtype=np.float32)
                continue
            c_bboxes = boxes[inds]
            c_scores = scores[inds, j]
            c_dets = np.hstack(
                (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
            if args.dataset == 'VOC':
                cpu = False
            else:
                cpu = False

            keep = nms(c_dets, 0.45, force_cpu=cpu)
            keep = keep[:50]
            c_dets = c_dets[keep, :]
            all_boxes[j][i] = c_dets
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in range(1, num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        nms_time = _t['misc'].toc()

        if i % 20 == 0:
            print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s'.format(
                i + 1, num_images, detect_time, nms_time))
            _t['im_detect'].clear()
            _t['misc'].clear()

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    if args.dataset == 'VOC':
        APs, mAP = testset.evaluate_detections(all_boxes, save_folder)
        return APs, mAP
    else:
        testset.evaluate_detections(all_boxes, save_folder)
Beispiel #30
0
def nms_detections(pred_boxes, scores, nms_thresh):
    dets = np.hstack((pred_boxes, scores[:, np.newaxis])).astype(np.float32)
    keep = nms(dets, nms_thresh)
    return keep
Beispiel #31
0
    def forward(self, arguments, device=None, outputs_to_retain=None):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        # use potentially different number of proposals for training vs evaluation
        if len(outputs_to_retain) == 0:
            # print("EVAL")
            pre_nms_topN = self._layer_config['test_pre_nms_topN']
            post_nms_topN = self._layer_config['test_post_nms_topN']
            nms_thresh = self._layer_config['test_nms_thresh']
            min_size = self._layer_config['test_min_size']
        else:
            pre_nms_topN = self._layer_config['train_pre_nms_topN']
            post_nms_topN = self._layer_config['train_post_nms_topN']
            nms_thresh = self._layer_config['train_nms_thresh']
            min_size = self._layer_config['train_min_size']

        bottom = arguments
        assert bottom[0].shape[0] == 1, \
            'Only single item batches are supported'

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[0][:, self._num_anchors:, :, :]
        bbox_deltas = bottom[1]
        im_info = bottom[2][0]

        if DEBUG:
            # im_info = (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
            # e.g.(1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000
            print ('im_size: ({}, {})'.format(im_info[0], im_info[1]))
            print ('scaled im_size: ({}, {})'.format(im_info[2], im_info[3]))
            print ('original im_size: ({}, {})'.format(im_info[4], im_info[5]))

        # 1. Generate proposals from bbox deltas and shifted anchors
        height, width = scores.shape[-2:]

        if DEBUG:
            print ('score map size: {}'.format(scores.shape))

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + \
                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info)

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale. Original size = im_info[4:6], scaled size = im_info[2:4])
        cntk_image_scale = im_info[2] / im_info[4]
        keep = _filter_boxes(proposals, min_size * cntk_image_scale)
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort(kind='mergesort')[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), nms_thresh, use_gpu_nms=False)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]
        scores = scores[keep]

        # pad with zeros if too few rois were found
        num_found_proposals = proposals.shape[0]
        if num_found_proposals < post_nms_topN:
            if DEBUG:
                print("Only {} proposals generated in ProposalLayer".format(num_found_proposals))
            proposals_padded = np.zeros(((post_nms_topN,) + proposals.shape[1:]), dtype=np.float32)
            proposals_padded[:num_found_proposals, :] = proposals
            proposals = proposals_padded

        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        # for CNTK: add batch axis to output shape
        proposals.shape = (1,) + proposals.shape

        return None, proposals
Beispiel #32
0
def test_net(save_folder, net, detector, cuda, testset, transform, max_per_image=300, thresh=0.005):

    if not os.path.exists(save_folder):
        os.mkdir(save_folder)
    # dump predictions and assoc. ground truth to text file for now
    num_images = len(testset)
    num_classes = (21, 81)[args.dataset == 'COCO']
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(num_classes)]

    _t = {'im_detect': Timer(), 'misc': Timer()}
    det_file = os.path.join(save_folder, 'detections.pkl')

    if args.retest:
        f = open(det_file,'rb')
        all_boxes = pickle.load(f)
        print('Evaluating detections')
        testset.evaluate_detections(all_boxes, save_folder)
        return


    for i in range(num_images):
        img = testset.pull_image(i)
        x = Variable(transform(img).unsqueeze(0),volatile=True)
        if cuda:
            x = x.cuda()

        _t['im_detect'].tic()
        out = net(x)      # forward pass
        boxes, scores = detector.forward(out,priors)
        detect_time = _t['im_detect'].toc()
        boxes = boxes[0]
        scores=scores[0]

        boxes = boxes.cpu().numpy()
        scores = scores.cpu().numpy()
        # scale each detection back up to the image
        scale = torch.Tensor([img.shape[1], img.shape[0],
                             img.shape[1], img.shape[0]]).cpu().numpy()
        boxes *= scale

        _t['misc'].tic()

        for j in range(1, num_classes):
            inds = np.where(scores[:, j] > thresh)[0]
            if len(inds) == 0:
                all_boxes[j][i] = np.empty([0, 5], dtype=np.float32)
                continue
            c_bboxes = boxes[inds]
            c_scores = scores[inds, j]
            c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype(
                np.float32, copy=False)
            if args.dataset == 'VOC':
                cpu = True
            else:
                cpu = False

            keep = nms(c_dets, 0.45, force_cpu=cpu)
            keep = keep[:50]
            c_dets = c_dets[keep, :]
            all_boxes[j][i] = c_dets
        if max_per_image > 0:
            image_scores = np.hstack([all_boxes[j][i][:, -1] for j in range(1,num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        nms_time = _t['misc'].toc()

        if i % 20 == 0:
            print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s'
                .format(i + 1, num_images, detect_time, nms_time))
            _t['im_detect'].clear()
            _t['misc'].clear()

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    testset.evaluate_detections(all_boxes, save_folder)
Beispiel #33
0
def nms_detections(pred_boxes, scores, nms_thresh):
    dets = np.hstack((pred_boxes,
                      scores[:, np.newaxis])).astype(np.float32)
    keep = nms(dets, nms_thresh)
    return keep