Esempio n. 1
0
    def decode(self, loc_preds, cls_preds, input_size):
        '''Decode outputs back to bouding box locations and class labels.

        Args:
          loc_preds: (tensor) predicted locations, sized [#anchors, 4].
          cls_preds: (tensor) predicted class labels, sized [#anchors, #classes].
          input_size: (int/tuple) model input size of (w,h).

        Returns:
          boxes: (tensor) decode box locations, sized [#obj,4].
          labels: (tensor) class labels for each box, sized [#obj,].
        '''
        CLS_THRESH = 0.5
        NMS_THRESH = 0.5

        input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \
                     else torch.Tensor(input_size)
        anchor_boxes = self._get_anchor_boxes(input_size)

        loc_xy = loc_preds[:,:2]
        loc_wh = loc_preds[:,2:]

        xy = loc_xy * anchor_boxes[:,2:] + anchor_boxes[:,:2]
        wh = loc_wh.exp() * anchor_boxes[:,2:]
        boxes = torch.cat([xy-wh/2, xy+wh/2], 1)  # [#anchors,4]

        score, labels = cls_preds.sigmoid().max(1)          # [#anchors,]
        ids = score > CLS_THRESH
        ids = ids.nonzero().squeeze()             # [#obj,]
        keep = box_nms(boxes[ids], score[ids], threshold=NMS_THRESH)
        return boxes[ids][keep], labels[ids][keep]
    def decode(self, loc_preds, cls_preds, input_size):
        CLS_THRESH = 0.05
        NMS_THRESH = 0.4

        if isinstance(input_size, int):
            input_size = torch.Tensor([input_size, input_size])
        else:
            input_size = torch.Tensor(input_size)

        anchor_boxes = self.get_anchor_boxes(input_size)
        std=Variable(self.std).cuda()
        loc_preds=loc_preds*std
        loc_xy = loc_preds.data.cpu()[:, :2]
        loc_wh = loc_preds.data.cpu()[:, 2:]
        xy = loc_xy * anchor_boxes[:, 2:] + anchor_boxes[:, :2]
        wh = loc_wh.exp() * anchor_boxes[:, 2:]
        boxes = torch.cat([xy, wh], 1)
        boxes = change_box_order(boxes, 'xywh2xyxy')
        cls_preds=F.softmax(cls_preds,1)
        score, labels = cls_preds.max(1)
        ids =  (labels > 0)&(score>CLS_THRESH)
        ids = ids.nonzero().squeeze()
        if len(ids.size())==0:
            return None, None,None
        ids=ids.data.cpu()
        
        keep = box_nms(boxes.cpu()[ids], score.data.cpu()[ids], threshold=NMS_THRESH)
        return boxes.cpu()[ids][keep],labels.data.cpu()[ids][keep],score.data.cpu()[ids][keep]
    def scaled_window_object_detector(self, in_img, scale_factor=1.1, min_neighbors=3, min_size=(30,30)):
        """This object detector is based on scaled detector window. It scales the detector window instead of
        scaling image. Dectector window pyramid is constructed instead of image pyramid
        """
        v_stride = 1
        h_stride = 1
        objs = []
        # convert to gray scale if the image is color 
        if(len(in_img.shape) == 3):
            gray_img = cv2.cvtColor(in_img, cv2.COLOR_BGR2GRAY)
        else:
            gray_img = in_img

        img_height = gray_img.shape[0]
        img_width = gray_img.shape[1]
        cur_win_width = self.win_width 
        cur_win_height = self.win_height

        # compute integral image. just one time process
        ii_img = cv2.integral(gray_img)
        print ii_img.dtype
        # initial scale 1 . ie. original detector size is used
        scale = 1.0

        # upscale the detector window and detect objects until window_size becomes more than one
        # of the image dimension
        while(cur_win_width < img_width and cur_win_height < img_height):
            # max possible window top left corner positions.
            x_max = img_width - cur_win_width + 1
            y_max = img_height - cur_win_height + 1
            print ('current scale = {:f}'.format(scale))
            print('Detector height = {:d}, Detector width = {:d}'.format(cur_win_height, cur_win_width))
            for row in range(0, y_max, v_stride):
                for col in range(0, x_max, h_stride):
                    #print row, col
                    # detect if the current window contains any objects
                    win_pass = self._evaluate_window_scaled(col, row, scale, ii_img)
                    # record the window if it passes
                    if(win_pass):
                        objs.append(tuple([int(col),
                                     int(row),
                                     int(cur_win_width),
                                     int(cur_win_height)]))
 
            # upscale the detector window
            scale *= scale_factor
            cur_win_width = int(self.win_width*scale)
            cur_win_height = int(self.win_height*scale)
            # perform new detections on the rescaled image.

        print('No of boxes before NMS = {:d}'.format(len(objs)))
        # perform NMS 
        objs = box_nms(objs, 0.2)
        print('No of boxes after NMS = {:d}'.format(len(objs)))
        return objs
    def detect_objects(self, in_img, scale_factor=1.1, min_neighbors=3, min_size=(30,30), max_size=()):
        """Detect objects using the LBP cascade classifier present in the given grayscale image.
        This has similar functionality as that of cv2.detectMultiScale() method
        """
        v_stride = 1
        h_stride = 1
        objs = []
        # convert to gray scale if the image is color 
        if(len(in_img.shape) == 3):
            gray_img = cv2.cvtColor(in_img, cv2.COLOR_BGR2GRAY)
        else:
            gray_img = in_img

        org_height = gray_img.shape[0]
        org_width = gray_img.shape[1]
        cur_width = org_width
        cur_height = org_height
        win_width = self.win_width
        win_height = self.win_height

        # initial scale 1 as we process  original image
        scale = 1.0
        # downscale image and detect objects until one of the image dimension
        # becomes less  than the window size
        while(cur_width > (win_width+1) and cur_height > (win_height+1)):
            # max possible window top left corner positions.
            x_max = cur_width - win_width + 1
            y_max = cur_height - win_height + 1
            # compute integral image
            ii_img = cv2.integral(gray_img)
            print ('current scale = {:f}'.format(scale))
            for row in range(0, y_max, v_stride):
                for col in range(0, x_max, h_stride):
                    # detect if the current window contains any objects
                    win_pass = self._evaluate_window(col, row, ii_img)
                    # record the window if it passes
                    if(win_pass):
                        objs.append(tuple([int(col*scale),
                                     int(row*scale),
                                     int(scale*win_width),
                                     int(scale*win_height)]))
 
            # down scale the image
            cur_width = int(cur_width/scale_factor)
            cur_height = int(cur_height/scale_factor)
            scale *= scale_factor
            gray_img = cv2.resize(gray_img, dsize=(cur_width, cur_height), interpolation=cv2.INTER_LINEAR)
            # perform new detections on the rescaled image.

        print('No of boxes before NMS = {:d}'.format(len(objs)))
        # perform NMS 
        objs = box_nms(objs, 0.2)
        print('No of boxes after NMS = {:d}'.format(len(objs)))
        return objs
Esempio n. 5
0
    def decode(self, loc_preds, cls_preds, input_size):
        '''Decode outputs back to bouding box locations and class labels.

        Args:
          loc_preds: (tensor) predicted locations, sized [#anchors, 4].
          cls_preds: (tensor) predicted class labels, sized [#anchors, #classes].
          input_size: (int/tuple) model input size of (w,h).

        Returns:
          boxes: (tensor) decode box locations, sized [#obj,4].
          labels: (tensor) class labels for each box, sized [#obj,].
        '''
#         CLS_THRESH = 0.08
#         NMS_THRESH = 0.5
        NMS_THRESH = 0.2
        N_BBOXES = 200

        input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \
                     else torch.Tensor(input_size)
        anchor_boxes = self._get_anchor_boxes(input_size).cuda()
#         anchor_boxes = self._get_anchor_boxes(input_size)

        loc_xy = loc_preds[:,:2]
        loc_wh = loc_preds[:,2:]

        xy = loc_xy * anchor_boxes[:,2:] + anchor_boxes[:,:2]
        wh = loc_wh.exp() * anchor_boxes[:,2:]
        boxes = torch.cat([xy-wh/2, xy+wh/2], 1)  # [#anchors,4] (x1, y1, x2, y2)

        score, labels = cls_preds.sigmoid().max(1)          # [#anchors,]
#         ids = score > CLS_THRESH
#         ids = ids.nonzero().squeeze()             # [#obj,]
        
        numpy_score = score.cpu().numpy().astype(np.float) # 如果取前200个最得分最大的框的话
#         numpy_score = score.numpy().astype(np.float)
        rank_ids = np.argsort(numpy_score)[::-1]
#         print(rank_ids)

        if len(rank_ids) > N_BBOXES:
            choose_ids = rank_ids[:N_BBOXES].astype(np.int)
            choose_ids = torch.from_numpy(choose_ids).cuda()
#             choose_ids = torch.from_numpy(choose_ids)
            ids = choose_ids
#         print(ids)
#         print(ids.shape)
#         print(boxes[ids])
#         print(score[ids])
#         keep = nms(torch.cat((boxes[ids].cuda(), score[ids].view(-1, 1).cuda()), 1), NMS_THRESH)
#         keep = keep.long().squeeze(1)
#         print(keep.size())
        
        keep = box_nms(boxes[ids], score[ids], threshold=NMS_THRESH)
    
        return boxes[ids][keep], labels[ids][keep]
Esempio n. 6
0
    def decode(self, loc_preds, cls_preds, input_size):
        '''Decode outputs back to bouding box locations and class labels.

        Args:
          loc_preds: (tensor) predicted locations, sized [#anchors, 4].
          cls_preds: (tensor) predicted class labels, sized [#anchors, #classes].
          input_size: (int/tuple) model input size of (w,h).

        Returns:
          boxes: (tensor) decode box locations, sized [#obj,4].
          labels: (tensor) class labels for each box, sized [#obj,].
        '''
        CLS_THRESH = config.cls_threshold
        NMS_THRESH = config.nms_threshold

        input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \
                     else torch.Tensor(input_size)
        anchor_boxes = self._get_anchor_boxes(input_size)

        loc_xy = loc_preds[:, :2]
        loc_wh = loc_preds[:, 2:]

        xy = loc_xy * anchor_boxes[:, 2:] + anchor_boxes[:, :2]
        wh = loc_wh.exp() * anchor_boxes[:, 2:]
        boxes = torch.cat([xy - wh / 2, xy + wh / 2], 1)  # [#anchors,4]
        """
        cl = cls_preds.sigmoid()
        idd = cl[:, 1] > 0.7
        sum = idd.sum()
        ids = idd == 1
        idss = ids.nonzero().squeeze()
        score, labels = cls_preds.sigmoid().max(1)
        ids = score > CLS_THRESH
        ids = ids.nonzero().squeeze() 
        sum = labels.sum()
        ids = labels == 1
        ids = ids.nonzero().squeeze()             # [#obj,]
        """
        cl = cls_preds.sigmoid()
        #score, labels = cls_preds.sigmoid().max(1)
        ids = cl[:, 1] > CLS_THRESH
        ids = ids == 1
        ids = ids.nonzero().squeeze()
        pre_score = cl[ids, 1]
        pre_boxes = boxes[ids]

        if ids.dim() == 0:
            return None
        keep = box_nms(pre_boxes, pre_score, threshold=NMS_THRESH)
        return boxes[ids][keep]  #,labels[ids][keep]
Esempio n. 7
0
    def decode(self, outputs, input_size):
        '''Transform predicted loc/conf back to real bbox locations and class labels.

        Args:
          outputs: (tensor) model outputs, sized [1,125,13,13].
          input_size: (int) model input size.

        Returns:
          boxes: (tensor) bbox locations, sized [#obj, 4].
          labels: (tensor) class labels, sized [#obj,1].
        '''
        fmsize = outputs.size(2)
        outputs = outputs.view(5, 25, 13, 13)

        loc_xy = outputs[:, :2, :, :]  # [5,2,13,13]
        grid_xy = meshgrid(fmsize,
                           swap_dims=True).view(fmsize, fmsize,
                                                2).permute(2, 0,
                                                           1)  # [2,13,13]
        box_xy = loc_xy.sigmoid() + grid_xy.expand_as(loc_xy)  # [5,2,13,13]

        loc_wh = outputs[:, 2:4, :, :]  # [5,2,13,13]
        anchor_wh = torch.Tensor(self.anchors).view(5, 2, 1, 1).expand_as(
            loc_wh)  # [5,2,13,13]
        box_wh = anchor_wh * loc_wh.exp()  # [5,2,13,13]

        boxes = torch.cat([box_xy - box_wh / 2, box_xy + box_wh / 2],
                          1)  # [5,4,13,13]
        boxes = boxes.permute(0, 2, 3, 1).contiguous().view(-1, 4)  # [845,4]

        iou_preds = outputs[:, 4, :, :].sigmoid()  # [5,13,13]
        cls_preds = outputs[:, 5:, :, :]  # [5,20,13,13]
        cls_preds = cls_preds.permute(0, 2, 3, 1).contiguous().view(-1, 20)
        cls_preds = softmax(cls_preds)  # [5*13*13,20]

        score = cls_preds * iou_preds.view(-1).unsqueeze(1).expand_as(
            cls_preds)  # [5*13*13,20]
        score = score.max(1)[0].view(-1)  # [5*13*13,]
        print(iou_preds.max())
        print(cls_preds.max())
        print(score.max())

        ids = (score > 0.5).nonzero().squeeze()
        keep = box_nms(boxes[ids], score[ids])
        return boxes[ids][keep] / fmsize
Esempio n. 8
0
    def decode(self, loc_preds, cls_preds, input_size):
        '''Decode outputs back to bouding box locations and class labels.

        Args:
          loc_preds: (tensor) predicted locations, sized [#anchors, 4].
          cls_preds: (tensor) predicted class labels, sized [#anchors, #classes].
          input_size: (int/tuple) model input size of (w,h).

        Returns:
          boxes: (tensor) decode box locations, sized [#obj,4].
          labels: (tensor) class labels for each box, sized [#obj,].
        '''
        CLS_THRESH = 0.3
        NMS_THRESH = 0.4

        input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \
                     else torch.Tensor(input_size)
        anchor_boxes = self._get_anchor_boxes(input_size)
        
        anchor_boxes=anchor_boxes.cuda()
        loc_xy = loc_preds[:,:,:2]
        loc_wh = loc_preds[:,:,2:]

        xy = loc_xy * anchor_boxes[:,2:] + anchor_boxes[:,:2]
        wh = loc_wh.exp() * anchor_boxes[:,2:]
        boxes = torch.cat([xy-wh/2, xy+wh/2], -1)  # [#anchors,4]
        
        try:
            score, labels = cls_preds.sigmoid().max(-1)
        except:
            score, labels = cls_preds.unsqueeze(1).sigmoid().max(-1)
        # [#anchors,]
        ids = score > CLS_THRESH
        # ids = ids.nonzero()            # [#obj,]
        #print(len(boxes[ids]))
        _t = {'im_detect': Timer()}
        _t['im_detect'].tic()
        keep = box_nms(boxes[ids], score[ids], threshold=NMS_THRESH)
        print(_t['im_detect'].toc()*1000)
        return boxes[ids][keep], labels[ids][keep],score[ids][keep]
Esempio n. 9
0
    def decode(self, loc_preds, cls_preds, center_preds, input_size,
               cls_threshold, nms_threshold):
        '''Decode outputs back to bouding box locations and class labels.
        Args:
          loc_preds: (tensor) predicted locations, sized [#batch, #samples, 4].
          cls_preds: (tensor) predicted class labels, sized [#batch, #samples , #classes].
          center_preds: (tensor) predicted centerness, sized [#batch, #samples, 1].
          input_size: (int/tuple) model input size of (h, w).

        Returns:
          boxes: (tensor) decode box locations, sized [#obj,4].
          labels: (tensor) class labels for each box, sized [#obj,].
        '''
        scores, labels = cls_preds.sigmoid().max(1)
        pos_ind = scores > cls_threshold
        if pos_ind.to(dtype=torch.int8).nonzero().numel() == 0:
            return [], [], []

        center_preds = center_preds.sigmoid()
        scores = scores[:, None] * center_preds
        # scores = scores[:, None]

        # locations = (#batch, #samples, 2(x-y coordinate))
        locations = self._get_pixel_locations(input_size, loc_preds.device)

        boxes = loc_preds[pos_ind]
        locations = locations[pos_ind]
        scores = scores[pos_ind]
        labels = labels[pos_ind]

        boxes[:, 0] = locations[:, 0] - boxes[:, 0]
        boxes[:, 1] = locations[:, 1] - boxes[:, 1]
        boxes[:, 2] = locations[:, 0] + boxes[:, 2]
        boxes[:, 3] = locations[:, 1] + boxes[:, 3]

        # nms mode = 0: soft-nms(liner), 1: soft-nms(gaussian), 2: hard-nms
        keep = box_nms(boxes, scores, nms_threshold=nms_threshold, mode=2)

        return boxes[keep], scores[keep], labels[keep]
Esempio n. 10
0
    def decode(self, loc_preds, cls_preds, input_size):
        CLS_THRESH = 0.05
        NMS_THRESH = 0.3

        if isinstance(input_size, int):
            input_size = torch.Tensor([input_size, input_size])
        else:
            input_size = torch.Tensor(input_size)

        anchor_boxes = self.get_anchor_boxes(input_size)
        loc_xy = loc_preds[:, :2]
        loc_wh = loc_preds[:, 2:]
        xy = loc_xy * anchor_boxes[:, 2:] + anchor_boxes[:, :2]
        wh = loc_wh.exp() * anchor_boxes[:, 2:]
        boxes = torch.cat([xy, wh], 1)
        boxes = change_box_order(boxes, 'xywh2xyxy')

        score, labels = cls_preds.max(1)
        ids = (score > CLS_THRESH) & (labels > 0)
        ids = ids.nonzero().squeeze()
        keep = box_nms(boxes[ids], score[ids], threshold=NMS_THRESH)
        return boxes[ids][keep], labels[ids][keep]
Esempio n. 11
0
    def decode(self, loc_preds, cls_preds, input_size):
        '''Decode outputs back to bouding box locations and class labels.

        Args:
          loc_preds: (tensor) predicted locations, sized [#anchors, 4].
          cls_preds: (tensor) predicted class labels, sized [#anchors, #classes].
          input_size: (int/tuple) model input size of (input_height, input_width).

        Returns:
          boxes: (tensor) decode box locations, sized [#obj,4].
          labels: (tensor) class labels for each box, sized [#obj,].
        '''
        CLS_THRESH = 0.05
        NMS_THRESH = 0.5
        scale_factor = torch.Tensor([10,10,5,5])  # scale [tx,ty,tw,th]

        input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \
                     else torch.Tensor(input_size)
        anchor_boxes = self._get_anchor_boxes(input_size)

        loc_preds /= scale_factor
        loc_xy = loc_preds[:,:2]
        loc_wh = loc_preds[:,2:]
        xy = loc_xy * anchor_boxes[:,2:] + anchor_boxes[:,:2]
        wh = loc_wh.exp() * anchor_boxes[:,2:]
        boxes = torch.cat([xy-wh/2, xy+wh/2], 1)  # [#anchors,4]
        boxes[:,0].clamp_(min=0)
        boxes[:,1].clamp_(min=0)
        boxes[:,2].clamp_(max=input_size[1])
        boxes[:,3].clamp_(max=input_size[0])

        score, labels = cls_preds.max(1)          # [#anchors,]
        ids = (score > CLS_THRESH) & (labels > 0)
        ids = ids.nonzero().squeeze()             # [#obj,]
        keep = box_nms(boxes[ids], score[ids], threshold=NMS_THRESH)
        return boxes[ids][keep], labels[ids][keep]
Esempio n. 12
0
            num_batch = loc_preds.shape[0]

            for iter_batch in range(num_batch):
                torch.cuda.synchronize()
                timer_post.tic()
                boxes, labels, scores = dataset.data_encoder.decode(
                    loc_preds=loc_preds[iter_batch],
                    cls_preds=cls_preds[iter_batch],
                    input_size=(img_size[1], img_size[0]),
                    cls_threshold=cls_th,
                    top_k=top_k)

                if len(boxes) > 0:
                    # nms mode = 0: soft-nms(liner), 1: soft-nms(gaussian), 2: hard-nms
                    keep = utils.box_nms(boxes,
                                         scores,
                                         nms_threshold=nms_th,
                                         mode=2)
                    boxes = boxes[keep]
                    scores = scores[keep]
                    labels = labels[keep]

                torch.cuda.synchronize()
                timer_post.toc()

                utils._write_results(result_dir, paths[iter_batch], boxes,
                                     scores, labels, dataset.class_idx_map,
                                     img_size, bbox_colormap)
    print()
    print(f'device: {device}')
    print(
        f'mean. elapsed time(inference): {timer_infer.average_time * 1000.:.4f}'
    def detect_objects(self,
                       in_img,
                       scale_factor=1.1,
                       min_neighbors=3,
                       min_size=(30, 30),
                       max_size=()):
        """Detect objects using the LBP cascade classifier present in the given grayscale image.
        This has similar functionality as that of cv2.detectMultiScale() method
        """
        v_stride = 1
        h_stride = 1
        objs = []
        # convert to gray scale if the image is color
        if (len(in_img.shape) == 3):
            gray_img = cv2.cvtColor(in_img, cv2.COLOR_BGR2GRAY)
        else:
            gray_img = in_img

        org_height = gray_img.shape[0]
        org_width = gray_img.shape[1]
        cur_width = org_width
        cur_height = org_height
        win_width = self.win_width
        win_height = self.win_height

        # initial scale 1 as we process  original image
        scale = 1.0
        # downscale image and detect objects until one of the image dimension
        # becomes less  than the window size
        while (cur_width > (win_width + 1) and cur_height > (win_height + 1)):
            # max possible window top left corner positions.
            x_max = cur_width - win_width + 1
            y_max = cur_height - win_height + 1
            # compute integral image
            ii_img = cv2.integral(gray_img)
            print('current scale = {:f}'.format(scale))
            for row in range(0, y_max, v_stride):
                for col in range(0, x_max, h_stride):
                    # detect if the current window contains any objects
                    win_pass = self._evaluate_window(col, row, ii_img)
                    # record the window if it passes
                    if (win_pass):
                        objs.append(
                            tuple([
                                int(col * scale),
                                int(row * scale),
                                int(scale * win_width),
                                int(scale * win_height)
                            ]))

            # down scale the image
            cur_width = int(cur_width / scale_factor)
            cur_height = int(cur_height / scale_factor)
            scale *= scale_factor
            gray_img = cv2.resize(gray_img,
                                  dsize=(cur_width, cur_height),
                                  interpolation=cv2.INTER_LINEAR)
            # perform new detections on the rescaled image.

        print('No of boxes before NMS = {:d}'.format(len(objs)))
        # perform NMS
        objs = box_nms(objs, 0.2)
        print('No of boxes after NMS = {:d}'.format(len(objs)))
        return objs
def iter_scan(scan,
              scan_array,
              patient_df,
              net,
              cube_size=64,
              stride=50,
              iou=0.01):
    scan_df = pd.DataFrame(columns=["scan_id", "z", "y", "x", "iou"])
    start_time = time.time()
    gt_boxes, gt_labels = annotation(patient_df)
    #print(gt_boxes, gt_labels)
    ais_gt_boxes, mia_gt_boxes = split_class(gt_boxes, gt_labels)
    #print(ais_gt_boxes, mia_gt_boxes)
    ais_locs = torch.FloatTensor(1, 6)
    ais_probs = torch.FloatTensor(1)

    mia_locs = torch.FloatTensor(1, 6)
    mia_probs = torch.FloatTensor(1)

    for z in range(0, scan_array.shape[0], stride):
        for y in range(0, scan_array.shape[1], stride):
            for x in range(0, scan_array.shape[2], stride):
                start_coord = torch.FloatTensor([z, y, x])
                end_coord = start_coord + torch.FloatTensor(
                    [cube_size, cube_size, cube_size])
                zmax = min(z + cube_size, scan_array.shape[0])
                ymax = min(y + cube_size, scan_array.shape[1])
                xmax = min(x + cube_size, scan_array.shape[2])
                cube_sample = np.zeros((cube_size, cube_size, cube_size),
                                       dtype=np.float32)
                cube_sample[:(zmax - z), :(ymax -
                                           y), :(xmax -
                                                 x)] = scan_array[z:zmax,
                                                                  y:ymax,
                                                                  x:xmax]
                cube_sample = np.expand_dims(cube_sample, 0)
                cube_sample = np.expand_dims(cube_sample, 0)
                input_cube = Variable(torch.from_numpy(cube_sample).cuda())
                locs, clss = net(input_cube)
                locs = locs.data.cpu().squeeze()
                clss = clss.data.cpu().squeeze()
                ais_boxes, ais_scores, ais_labels, mia_boxes, mia_scores, mia_labels = DataEncoder(
                ).decode(locs, clss, [cube_size, cube_size, cube_size])
                if not isinstance(ais_boxes, int):
                    ais_boxes = calc_scan_coord(ais_boxes, start_coord)
                    ais_locs = torch.cat([ais_locs, ais_boxes], 0)
                    ais_probs = torch.cat([ais_probs, ais_scores], 0)

                if not isinstance(mia_boxes, int):
                    mia_boxes = calc_scan_coord(mia_boxes, start_coord)
                    mia_locs = torch.cat([mia_locs, mia_boxes], 0)
                    mia_probs = torch.cat([mia_probs, mia_scores], 0)

    end_time = time.time()
    run_time = end_time - start_time
    print(run_time)
    if not isinstance(ais_gt_boxes, int):
        ais_locs = ais_locs[1:, :]
        ais_probs = ais_probs[1:]
        ais_keep = box_nms(ais_locs, ais_probs)
        ais_locs = ais_locs[ais_keep]
        ais_probs = ais_probs[ais_keep]
        ais_count, best_ious = find_best_pred(ais_gt_boxes, ais_locs)
        ais_locs = change_box_order(ais_locs, "zyxzyx2zyxdhw")
        for i in range(ais_locs.size(0)):
            insert = {
                "scan_id": scan,
                "z": ais_locs[i, 0],
                "y": ais_locs[i, 1],
                "x": ais_locs[i, 2],
                "iou": best_ious[i]
            }
            la_df = pd.DataFrame(data=insert, index=["0"])
            scan_df = scan_df.append(la_df, ignore_index=True)

    else:
        ais_count = np.zeros(3)

    if not isinstance(mia_gt_boxes, int):
        mia_locs = mia_locs[1:, :]
        mia_probs = mia_probs[1:]
        mia_keep = box_nms(mia_locs, mia_probs)
        mia_locs = mia_locs[mia_keep]
        mia_probs = mia_probs[mia_keep]
        mia_count, best_ious = find_best_pred(mia_gt_boxes, mia_locs)
        for i in range(mia_locs.size(0)):
            insert = {
                "scan_id": scan,
                "z": mia_locs[i, 0],
                "y": mia_locs[i, 1],
                "x": mia_locs[i, 2],
                "iou": best_ious[i]
            }
            la_df = pd.DataFrame(data=insert, index=["0"])
            scan_df = scan_df.append(la_df, ignore_index=True)
    else:
        mia_count = np.zeros(3)

    return ais_count, mia_count, scan_df
Esempio n. 15
0
    def decode(self, loc_preds, cls_preds, input_size):
        '''Decode outputs back to bouding box locations and class labels.

        Args:
          loc_preds: (tensor) predicted locations, sized [#anchors, 4].
          cls_preds: (tensor) predicted class labels, sized [#anchors, #classes].
          input_size: (int/tuple) model input size of (w,h).

        Returns:
          boxes: (tensor) decode box locations, sized [#obj,4].
          labels: (tensor) class labels for each box, sized [#obj,].
        '''
        #if debug_flag:
        #    pdb.set_trace()
        CLS_THRESH = 0.3
        NMS_THRESH = 0.3

        input_size = torch.Tensor([input_size, input_size]) if isinstance(
            input_size, int) else torch.Tensor(input_size)
        anchor_boxes = self._get_anchor_boxes(input_size)

        loc_xy = loc_preds[:, :2]
        loc_wh = loc_preds[:, 2:]

        xy = loc_xy * anchor_boxes[:, 2:] + anchor_boxes[:, :2]
        wh = loc_wh.exp() * anchor_boxes[:, 2:]
        boxes = torch.cat([xy - wh / 2, xy + wh / 2], 1)  # [#anchors,4]

        score, labels = cls_preds.sigmoid().max(1)  # [#anchors,]
        ids = score > CLS_THRESH
        ids = ids.nonzero().squeeze()  # [#obj,]
        #print('\n\n*****', ids, '\n\n*****')
        if torch.numel(ids) > 1:
            keep = box_nms(boxes[ids], score[ids], threshold=NMS_THRESH)
            #pdb.set_trace()
        elif torch.numel(ids) == 1:
            #pdb.set_trace()
            keep = box_nms(boxes[ids].view(1, 4),
                           score[ids],
                           threshold=NMS_THRESH)
            if torch.numel(keep) == 1:
                return boxes[ids].view(1, 4), labels[ids], score
            return boxes[ids][keep], labels[ids], score
        elif torch.numel(ids) == 0:
            while (torch.numel(ids) == 0):
                if CLS_THRESH > 0.1:
                    CLS_THRESH -= 0.05
                else:
                    CLS_THRESH -= 0.01
                ids = score > CLS_THRESH
                ids = ids.nonzero().squeeze()
            if torch.numel(ids) > 1:
                keep = box_nms(boxes[ids], score[ids], threshold=NMS_THRESH)
            elif torch.numel(ids) == 1:
                keep = box_nms(boxes[ids].view(1, 4),
                               score[ids],
                               threshold=NMS_THRESH)
                if torch.numel(keep) == 1:
                    return boxes[ids].view(1, 4), labels[ids], score
                return boxes[ids][keep], labels[ids], score

        return boxes[ids][keep], labels[ids][keep], score
    def block_integral_object_detector(self,
                                       in_img,
                                       scale_factor=1.1,
                                       blk_height=60,
                                       blk_width=80,
                                       min_neighbors=3,
                                       min_size=(30, 30)):
        """This uses block integral image instead of full integral image. 
        """
        v_stride = 1
        h_stride = 1
        objs = []
        # convert to gray scale if the image is color
        if (len(in_img.shape) == 3):
            gray_img = cv2.cvtColor(in_img, cv2.COLOR_BGR2GRAY)
        else:
            gray_img = in_img

        org_height = gray_img.shape[0]
        org_width = gray_img.shape[1]
        cur_width = org_width
        cur_height = org_height
        win_width = self.win_width
        win_height = self.win_height
        blk_horz_stride = blk_width - win_width
        blk_vert_stride = blk_height - win_height

        # initial scale 1 as we process  original image
        scale = 1.0
        # downscale image and detect objects until one of the image dimension
        # becomes less  than the window size
        while (cur_width > (win_width + 1) and cur_height > (win_height + 1)):
            # max possible window top left corner positions.
            x_max = cur_width - win_width + 1
            y_max = cur_height - win_height + 1
            # extract a sliding image block and compute integral image on that.
            # detect the objects in the current block
            print('Current scale = {:f}'.format(scale))
            blk_y = 0
            while blk_y < y_max:
                blk_x = 0
                while blk_x < x_max:
                    print('Block position (y,x) = ({:d},{:d})'.format(
                        blk_y, blk_x))
                    # we cannot have full block in the edge of the image
                    max_blk_width = min(blk_width, cur_width - blk_x)
                    max_blk_height = min(blk_height, cur_height - blk_y)
                    # extract a block and
                    img_blk = gray_img[blk_y:blk_y + max_blk_height,
                                       blk_x:blk_x + max_blk_width]
                    ii_img = cv2.integral(img_blk)
                    # now use sliding window detector to find objects in the current block
                    for row in range(0, max_blk_height - win_height + 1,
                                     v_stride):
                        for col in range(0, max_blk_width - win_width + 1,
                                         h_stride):
                            # detect if the current window contains any objects
                            win_pass = self._evaluate_window(col, row, ii_img)
                            # record the window if it passes
                            if (win_pass):
                                objs.append(
                                    tuple([
                                        int((col + blk_x) * scale),
                                        int((row + blk_y) * scale),
                                        int(scale * win_width),
                                        int(scale * win_height)
                                    ]))
                    # slide the block horizontally
                    blk_x += blk_horz_stride
                # slide the block vertically
                blk_y += blk_vert_stride
            # down scale the image
            cur_width = int(cur_width / scale_factor)
            cur_height = int(cur_height / scale_factor)
            scale *= scale_factor
            gray_img = cv2.resize(gray_img,
                                  dsize=(cur_width, cur_height),
                                  interpolation=cv2.INTER_LINEAR)
            # perform new detections on the rescaled image.

        print('No of boxes before NMS = {:d}'.format(len(objs)))
        # perform NMS
        objs = box_nms(objs, 0.2)
        print('No of boxes after NMS = {:d}'.format(len(objs)))
        return objs
Esempio n. 17
0
 def decode(self,
            cls_preds,
            loc_preds,
            input_size=None,
            device=torch.device('cuda:0')):
     '''
     将网络的输出转化为正常的人们所能理解的标签和boxes
     这里的cls_preds和loc_preds因为是网络的输出,所以其第一个维度是batch
     这里需要注意,即其可以直接输入多张图片;
     args:
         cls_preds: tensor,每个anchor被预测的标签logits,size是[batch,
             #anchors, #classes]
         loc_preds: tensor,每个anchor被预测的bbr的偏移量,size是[batch,
             #anchors, 4],#anchors是所有特征图上的所有anchors
         input_size:int/tuple,输入图像的大小,可以是None,此时使用实例化
             YEncoder对象时候输入的input_size;
         cuda:anchor_boxes所在的设备,默认是在GPU上,如果是测试和在cpu上进行
             预测可能需要更改;
     returns:
         labels: list of tensors, 每个tensors的size是[#boxes_i, #classes],
             表示的是一张图片中预测框在每一类的logits值;
         boxes: list of tensors,每个tensors的size是[#boxes_i, 4],是一张图
             片中预测框的位置(xmin, ymin, xmax, ymax)
     '''
     # 根据图像的大小和anchor设定来计算出所有anchor的信息
     if input_size is None:
         input_size = self.input_size
         anchor_boxes = self.anchor_boxes
         anchor_boxes = anchor_boxes.to(device)
     else:
         if len(input_size) != 2:
             raise ValueError('TCT的input_size不是1920x1200,所以不能是None')
         input_size = torch.tensor(input_size, dtype=torch.float)
         anchor_boxes = self._get_anchor_boxes(input_size)
         anchor_boxes = anchor_boxes.to(device)
     if cls_preds.dim() == 3:
         anchor_boxes = anchor_boxes.unsqueeze(0).expand_as(loc_preds)
     # 取出预测的中心偏移量和宽高缩放量
     loc_xy = loc_preds[..., :2]
     loc_wh = loc_preds[..., 2:]
     # 利用anchor的信息和bbr的效果(在每个anchor上需要再调整)来得到每个
     #   最后的预测框的loc,并转换成xyxy mode
     xy = loc_xy * anchor_boxes[..., 2:] + anchor_boxes[..., :2]
     wh = loc_wh.exp() * anchor_boxes[..., 2:]
     boxes = torch.cat([xy - wh / 2, xy + wh / 2], 2)  # xyxy format
     # 发现有许多的预测框超出图像,这里进行一下限制,防止干扰计算IoU
     boxes[..., :2] = boxes[..., :2].clamp(min=0.)
     boxes[..., 2] = boxes[..., 2].clamp(max=input_size[0].item())
     boxes[..., 3] = boxes[..., 3].clamp(max=input_size[1].item())
     # 将preds进行sigmoid,变成概率,并去除那些得分(最大)比较低的框
     cls_preds = cls_preds.sigmoid()
     score, labels = cls_preds.max(2)
     ids = score > self.cls_thre
     # ids = ids.nonzero().squeeze()  # [#obj, ]
     result_boxes = []
     result_score = []
     for i in range(cls_preds.size(0)):
         obj_boxes, obj_score = boxes[i][ids[i]], score[i][ids[i]]
         objs_score = cls_preds[i][ids[i]]
         # 再对剩下的预测框进行nms,得到的即是最后的结果
         keep = box_nms(obj_boxes, obj_score, threshold=self.nms_thre)
         result_boxes.append(obj_boxes[keep])
         result_score.append(objs_score[keep])
     # 经过nms后,每张图片得到的预测框的数量是不一样的,所以无法将其都
     #   stack到一个tensor中,因为预测框的数量要作为dim=1来存在,只能
     #   使用list来保存
     # !!这里进行了修改,不会再返回所有类的score,而是只返回预测类的score
     #   和预测的类
     return result_score, result_boxes
    def block_integral_object_detector(self, in_img, scale_factor=1.1, blk_height=60, blk_width=80, min_neighbors=3, min_size=(30,30)):
        """This uses block integral image instead of full integral image. 
        """
        v_stride = 1
        h_stride = 1
        objs = []
        # convert to gray scale if the image is color 
        if(len(in_img.shape) == 3):
            gray_img = cv2.cvtColor(in_img, cv2.COLOR_BGR2GRAY)
        else:
            gray_img = in_img

        org_height = gray_img.shape[0]
        org_width = gray_img.shape[1]
        cur_width = org_width
        cur_height = org_height
        win_width = self.win_width
        win_height = self.win_height
        blk_horz_stride = blk_width - win_width
        blk_vert_stride = blk_height - win_height

        # initial scale 1 as we process  original image
        scale = 1.0
        # downscale image and detect objects until one of the image dimension
        # becomes less  than the window size
        while(cur_width > (win_width+1) and cur_height > (win_height+1)):
            # max possible window top left corner positions.
            x_max = cur_width - win_width + 1
            y_max = cur_height - win_height + 1
            # extract a sliding image block and compute integral image on that.
            # detect the objects in the current block
            print('Current scale = {:f}'.format(scale))
            blk_y = 0
            while blk_y < y_max:
                blk_x = 0
                while blk_x < x_max:
                    print ('Block position (y,x) = ({:d},{:d})'.format(blk_y, blk_x))
                    # we cannot have full block in the edge of the image
                    max_blk_width = min(blk_width, cur_width - blk_x)
                    max_blk_height = min(blk_height, cur_height - blk_y)
                    # extract a block and
                    img_blk = gray_img[blk_y:blk_y+max_blk_height, blk_x:blk_x+max_blk_width]
                    ii_img = cv2.integral(img_blk)
                    # now use sliding window detector to find objects in the current block
                    for row in range(0, max_blk_height-win_height+1, v_stride):
                        for col in range(0, max_blk_width-win_width+1, h_stride):
                            # detect if the current window contains any objects
                            win_pass = self._evaluate_window(col, row, ii_img)
                            # record the window if it passes
                            if(win_pass):
                                objs.append(tuple([int((col+blk_x)*scale),
                                     int((row+blk_y)*scale),
                                     int(scale*win_width),
                                     int(scale*win_height)]))
                    # slide the block horizontally
                    blk_x += blk_horz_stride
                # slide the block vertically        
                blk_y += blk_vert_stride
            # down scale the image
            cur_width = int(cur_width/scale_factor)
            cur_height = int(cur_height/scale_factor)
            scale *= scale_factor
            gray_img = cv2.resize(gray_img, dsize=(cur_width, cur_height), interpolation=cv2.INTER_LINEAR)
            # perform new detections on the rescaled image.

        print('No of boxes before NMS = {:d}'.format(len(objs)))
        # perform NMS 
        objs = box_nms(objs, 0.2)
        print('No of boxes after NMS = {:d}'.format(len(objs)))
        return objs
Esempio n. 19
0
    def decode(self, loc_preds, cls_preds, input_size):
        '''
        Decode outputs back to bounding box locations and class labels.

        Args:
          loc_preds: (tensor) predicted locations, sized [#anchors, 6]
          cls_preds: (tensor) predicted class labels, sized [#anchors, #classes]
          input_size: (int/tuple) model input size of (z, h, w)

        Return:
          boxes: (tensor) decode box locations, sozed [#obj, 6]
          labels: (tensor) class labels for each box, sized [#obj,]
        '''
        CLS_THRESH = 0.75
        NMS_THRESH = 0.05
        if isinstance(input_size, int):
            input_size = torch.Tensor([input_size, input_size, input_size])
        else:
            input_size = torch.Tensor(input_size)
        anchor_boxes = self.get_anchor_boxes(input_size)

        loc_zyx = loc_preds[:, :3]
        loc_dhw = loc_preds[:, 3:]
        zyx = loc_zyx * anchor_boxes[:, 3:] + anchor_boxes[:, :3]
        dhw = loc_dhw.exp() * anchor_boxes[:, 3:]
        boxes = torch.cat([zyx - dhw / 2, zyx + dhw / 2], 1)  # [#anchors, 6]
        scores, labels = F.softmax(Variable(cls_preds), dim=1).data.max(
            1)  # [#anchors,] the best class for each anchor
        obj_idx = (labels > 0)
        obj_num = obj_idx.long().sum()
        if obj_num == 0:
            #print("Not found any object")
            return 0, 0, 0, 0, 0, 0
        else:
            obj_mask = obj_idx.unsqueeze(1).expand_as(boxes)
            masked_scores = scores[obj_idx]
            masked_labels = labels[obj_idx]
            #print(masked_scores, masked_labels)
            masked_boxes = boxes[obj_mask].view(-1, 6)
            ids = (masked_scores > CLS_THRESH)
            if ids.long().sum() == 0:
                return 0, 0, 0, 0, 0, 0
            else:
                box_ids = ids.unsqueeze(1).expand_as(masked_boxes)
                obj_boxes = masked_boxes[box_ids].view(-1, 6)
                obj_scores = masked_scores[ids]
                obj_labels = masked_labels[ids]

                ais_ids = (obj_labels == 1)
                if ais_ids.long().sum() == 0:
                    ais_pred_boxes = 0
                    ais_pred_scores = 0
                    ais_pred_labels = 0
                else:
                    # print(ais_ids.long().sum())
                    ais_ids = ais_ids.nonzero().squeeze()
                    #ais_masks = ais_ids.unsqueeze(1).expand_as(obj_boxes)
                    ais_labels = obj_labels[ais_ids]
                    ais_scores = obj_scores[ais_ids]
                    ais_boxes = obj_boxes[ais_ids]
                    #print(ais_boxes.size())
                    ais_keep = box_nms(ais_boxes,
                                       ais_scores,
                                       threshold=NMS_THRESH)
                    ais_pred_labels = ais_labels[ais_keep]
                    ais_pred_scores = ais_scores[ais_keep]
                    ais_pred_boxes = ais_boxes[ais_keep]

                mia_ids = (obj_labels == 2)
                if mia_ids.long().sum() == 0:
                    mia_pred_boxes = 0
                    mia_pred_scores = 0
                    mia_pred_labels = 0
                else:
                    mia_ids = mia_ids.nonzero().squeeze()
                    #mia_masks = mia_ids.unsqueeze(1).expand_as(obj_boxes)
                    mia_labels = obj_labels[mia_ids]
                    mia_scores = obj_scores[mia_ids]
                    mia_boxes = obj_boxes[mia_ids]
                    mia_keep = box_nms(mia_boxes,
                                       mia_scores,
                                       threshold=NMS_THRESH)
                    mia_pred_boxes = mia_boxes[mia_keep]
                    mia_pred_scores = mia_scores[mia_keep]
                    mia_pred_labels = mia_labels[mia_keep]
                    #keep = box_nms(masked_boxes[ids], masked_scores[ids], threshold=NMS_THRESH)
                return ais_pred_boxes, ais_pred_scores, ais_pred_labels, mia_pred_boxes, mia_pred_scores, mia_pred_labels
Esempio n. 20
0
    def decode(self, loc_preds, cls_preds, pad_data, input_size, ori_img_shape,
               img_idx):
        '''Decode outpus back to bounding box locations and class labels

        Args:
            loc_preds: (tesnor) predicted locations, sized [#anchors, 4].
            cls_preds: (tensor) predicted class labels, sized [#anchors, #classes].
            input_size : (int/tuple) model input size of (w,h)

        Returns:
            boxes (tensor) decode box locations, size [#obj, 4]
            lbaels: (tensor) class labels for each box, sized [#obj,]
        '''
        CONF_THRES = 0.05
        NMS_THRES = 0.5
        # pdb.set_trace()
        input_size = torch.Tensor([input_size[2], input_size[3]])
        pad_data = torch.Tensor([pad_data[3], pad_data[2]])
        anchor_boxes = self._get_anchor_boxes(pad_data)
        boxes_preds_obj = []
        score_obj = []
        labels_obj = []
        obj_idx = []

        for p in range(len(anchor_boxes)):
            loc_preds[p][img_idx][:, :2] *= 0.4
            loc_preds[p][img_idx][:, 2:] *= 0.8
            loc_xy_preds = loc_preds[p][img_idx][:, :2]
            loc_wh_preds = loc_preds[p][img_idx][:, 2:]

            xy_preds = loc_xy_preds * anchor_boxes[p][:, 2:].cuda() +\
                       anchor_boxes[p][:, :2].cuda()
            wh_preds = torch.exp(loc_wh_preds) * anchor_boxes[p][:, 2:].cuda()
            x1y1_preds = xy_preds - wh_preds / 2
            x1y1_preds_ori = torch.zeros(x1y1_preds.shape)
            x1y1_preds_ori[:,0] = x1y1_preds[:,0] * torch.Tensor([ori_img_shape[2]]).\
                             cuda() / torch.Tensor([input_size[0]]).cuda()
            x1y1_preds_ori[:,1] = x1y1_preds[:,1] * torch.Tensor([ori_img_shape[1]]).\
                             cuda() / torch.Tensor([input_size[1]]).cuda()
            x2y2_preds = xy_preds + wh_preds / 2
            x2y2_preds_ori = torch.zeros(x2y2_preds.shape)
            x2y2_preds_ori[:,0] = x2y2_preds[:,0] * torch.Tensor([ori_img_shape[2]]).\
                             cuda() / torch.Tensor([input_size[0]]).cuda()
            x2y2_preds_ori[:,1] = x2y2_preds[:,1] * torch.Tensor([ori_img_shape[1]]).\
                             cuda() / torch.Tensor([input_size[1]]).cuda()
            boxes_preds = torch.cat([x1y1_preds_ori, x2y2_preds_ori], 1)

            score, labels = cls_preds[p][img_idx].sigmoid().max(1)
            if self.loss_fn == 'sigmoid':
                obj_idx_p = score > CONF_THRES
            elif self.loss_fn == 'softmax':
                obj_idx_p = torch.mul(score > CONF_THRES, labels > 0)
            # if boxes_preds[obj_idx_p].shape[0] > 1000:
            #     boxes_preds_obj.append(boxes_preds[obj_idx_p][:1000])
            #     score_obj.append(score[obj_idx_p][:1000])
            #     labels_obj.append(labels[obj_idx_p][:1000])
            # else:
            boxes_preds_obj.append(boxes_preds[obj_idx_p])
            score_obj.append(score[obj_idx_p])
            labels_obj.append(labels[obj_idx_p])
            obj_idx.append(obj_idx_p)

        boxes_preds_all = torch.cat(boxes_preds_obj, 0)
        score_all = torch.cat(score_obj, 0)
        labels_all = torch.cat(labels_obj, 0)
        obj_idx_all = torch.cat(obj_idx, 0)
        if obj_idx_all.nonzero().shape[0] != 0:
            nms_boxes = box_nms(boxes_preds_all,
                                score_all,
                                threshold=NMS_THRES)
            return boxes_preds_all[nms_boxes], labels_all[
                nms_boxes], score_all[nms_boxes]
        else:
            return torch.tensor([]), torch.tensor([]), torch.tensor([])
    def scaled_window_object_detector(self,
                                      in_img,
                                      scale_factor=1.1,
                                      min_neighbors=3,
                                      min_size=(30, 30)):
        """This object detector is based on scaled detector window. It scales the detector window instead of
        scaling image. Dectector window pyramid is constructed instead of image pyramid
        """
        v_stride = 1
        h_stride = 1
        objs = []
        # convert to gray scale if the image is color
        if (len(in_img.shape) == 3):
            gray_img = cv2.cvtColor(in_img, cv2.COLOR_BGR2GRAY)
        else:
            gray_img = in_img

        img_height = gray_img.shape[0]
        img_width = gray_img.shape[1]
        cur_win_width = self.win_width
        cur_win_height = self.win_height

        # compute integral image. just one time process
        ii_img = cv2.integral(gray_img)
        print ii_img.dtype
        # initial scale 1 . ie. original detector size is used
        scale = 1.0

        # upscale the detector window and detect objects until window_size becomes more than one
        # of the image dimension
        while (cur_win_width < img_width and cur_win_height < img_height):
            # max possible window top left corner positions.
            x_max = img_width - cur_win_width + 1
            y_max = img_height - cur_win_height + 1
            print('current scale = {:f}'.format(scale))
            print('Detector height = {:d}, Detector width = {:d}'.format(
                cur_win_height, cur_win_width))
            for row in range(0, y_max, v_stride):
                for col in range(0, x_max, h_stride):
                    #print row, col
                    # detect if the current window contains any objects
                    win_pass = self._evaluate_window_scaled(
                        col, row, scale, ii_img)
                    # record the window if it passes
                    if (win_pass):
                        objs.append(
                            tuple([
                                int(col),
                                int(row),
                                int(cur_win_width),
                                int(cur_win_height)
                            ]))

            # upscale the detector window
            scale *= scale_factor
            cur_win_width = int(self.win_width * scale)
            cur_win_height = int(self.win_height * scale)
            # perform new detections on the rescaled image.

        print('No of boxes before NMS = {:d}'.format(len(objs)))
        # perform NMS
        objs = box_nms(objs, 0.2)
        print('No of boxes after NMS = {:d}'.format(len(objs)))
        return objs