Ejemplo n.º 1
0
    def encode(self, boxes, labels):
        '''Encode target bounding boxes and class labels.
        SSD coding rules:
          tx = (x - anchor_x) / (variance[0]*anchor_w)
          ty = (y - anchor_y) / (variance[0]*anchor_h)
          tw = log(w / anchor_w)
          th = log(h / anchor_h)
        Args:
          boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj,4].
          labels: (tensor) object class labels, sized [#obj,].
        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
          cls_targets: (tensor) encoded class labels, sized [#anchors,].
        '''

        anchor_boxes = self.anchor_boxes
        ious = bbox_iou(anchor_boxes, boxes)
        max_ious, max_ids = ious.max(1)
        boxes = boxes[max_ids]

        boxes = change_bbox_order(boxes, 'xyxy2xywh')
        anchor_boxes = change_bbox_order(anchor_boxes, 'xyxy2xywh')

        loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
        loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:])
        loc_targets = torch.cat([loc_xy, loc_wh], 1)
        cls_targets = 1 + labels[max_ids]
        # cls_targets[max_ious<0.5] = 0
        # ignore = (max_ious>0.4) & (max_ious<0.5)  # ignore ious between [0.4,0.5]
        # cls_targets[ignore] = -1                  # mark ignored to -1
        return loc_targets, cls_targets
Ejemplo n.º 2
0
 def tracker2object(self, boxes: [OBSTACLE], th=0.5):
     n_b = len(boxes)
     n_o = len(self.obstacles)
     iou_mat = np.zeros((n_o, n_b))
     for i in range(n_o):
         for j in range(n_b):
             iou_mat[i, j] = bbox_iou(self.obstacles[i], boxes[j])
     count = min(n_b, n_o)
     used = []
     idmax = 0
     obstacles = []
     while count > 0:
         r, k = np.unravel_index(np.argmax(iou_mat, axis=None),
                                 iou_mat.shape)
         if iou_mat[r, k] > th:
             used.append(k)
             obstacle = self.obstacles[r]
             box = boxes[k]
             if idmax < obstacle._id:
                 idmax = obstacle._id
             obstacle.update_box(box)
             obstacles.append(obstacle)
         iou_mat[r, :] = -99
         iou_mat[:, k] = -99
         count = count - 1
     idx = range(n_b)
     idx = [elem for elem in idx if elem not in used]
     self.obstacles = obstacles
     for i, c in enumerate(idx):
         # dst  =  self.calculate_position(boxes[c])
         obstacle = OBSTACLE(boxes[c], i + idmax + 1)
         self.obstacles.append(obstacle)
     return
Ejemplo n.º 3
0
    def encode(self, boxes, labels):
        '''Encode target bounding boxes and class labels.
        SSD coding rules:
          tx = (x - anchor_x) / (variance[0]*anchor_w)
          ty = (y - anchor_y) / (variance[0]*anchor_h)
          tw = log(w / anchor_w)
          th = log(h / anchor_h)
        Args:
          boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj,4].
          labels: (tensor) object class labels, sized [#obj,].
        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
          cls_targets: (tensor) encoded class labels, sized [#anchors,].
        '''
        def argmax(x):
            v, i = x.max(0)
            j = v.max(0)[1].item()
            return (i[j], j)

        default_boxes = self.default_boxes
        default_boxes = change_bbox_order(default_boxes, 'xywh2xyxy')

        ious = bbox_iou(default_boxes, boxes)
        index = torch.empty(len(default_boxes), dtype=torch.long).fill_(-1)
        masked_ious = ious.clone()
        while True:
            i, j = argmax(masked_ious)
            if masked_ious[i, j] < 1e-6:
                break
            index[i] = j
            masked_ious[i, :] = 0
            masked_ious[:, j] = 0

        mask = (index < 0) & (ious.max(1)[0] > 0.5)
        if mask.any():
            index[mask] = ious[mask].max(1)[1]

        boxes = boxes[index.clamp(min=0)]
        boxes = change_bbox_order(boxes, 'xyxy2xywh')
        default_boxes = change_bbox_order(default_boxes, 'xyxy2xywh')

        loc_xy = (boxes[:, :2] - default_boxes[:, :2]
                  ) / default_boxes[:, 2:] / self.variances[0]
        loc_wh = torch.log(
            boxes[:, 2:] / default_boxes[:, 2:]) / self.variances[1]
        loc_targets = torch.cat([loc_xy, loc_wh], dim=1)
        cls_targets = 1 + labels[index.clamp(min=0)]
        cls_targets[index < 0] = 0
        return loc_targets, cls_targets
Ejemplo n.º 4
0
    def compare(self, data1, data2, thresh_iou):
        if data2['xmin'] <= data1['xmin'] <= data1['xmax'] <= data2['xmax'] \
           and data2['ymin'] <= data1['ymin'] <= data1['ymax'] <= data2['ymax']:
            return True
        if data1['xmin'] <= data2['xmin'] <= data2['xmax'] <= data1['xmax'] \
           and data1['ymin'] <= data2['ymin'] <= data2['ymax'] <= data1['ymax']:
            return True
        box1 = BoundBox(data1['xmin'], data1['ymin'], data1['xmax'], data1['ymax'])
        box2 = BoundBox(data2['xmin'], data2['ymin'], data2['xmax'], data2['ymax'])

        iou = bbox_iou(box1, box2)

        if iou > thresh_iou:
            return True
        else:
            return False
Ejemplo n.º 5
0
def random_crop(img, boxes, labels, min_scale=0.3, max_aspect_ratio=2.):
    '''
    :param img: (PIL.Image)
    :param boxes: (tensor) [N,4]
    :param labels: (tensor) [N,]
    :param min_scale: (float) minimal image width/height scale
    :param max_aspect_ratio: (float) maximum width/height aspect ratio
    :return:
        img, boxes, labels
    '''
    imw, imh = img.size
    params = [(0, 0, imw, imh)]  # crop roi (x,y,w,h) out
    ## ????
    for min_iou in (0, 0.1, 0.3, 0.5, 0.7, 0.9):
        for _ in range(100):
            scale = random.uniform(min_scale, 1)
            aspect_ratio = random.uniform(
                max(1 / max_aspect_ratio, scale * scale),
                min(max_aspect_ratio, 1 / (scale * scale)))
            w = int(imw * scale * math.sqrt(aspect_ratio))
            h = int(imh * scale / math.sqrt(aspect_ratio))

            x = random.randrange(imw - w)
            y = random.randrange(imh - h)

            roi = torch.FloatTensor([[x, y, x + w, y + h]])
            ious = bbox_iou(boxes, roi)
            if ious.min() >= min_iou:
                params.append((x, y, w, h))
                break

    x, y, w, h = random.choice(params)
    img = img.crop((x, y, x + w, y + h))

    center = (boxes[:, :2] + boxes[:, 2:]) / 2
    mask = (center[:, 0] >= x) & (center[:, 0] <= x + w) \
           & (center[:, 1] >= y) & (center[:, 1] <= y + h)
    if mask.any():
        boxes = boxes[mask] - torch.FloatTensor([x, y, x, y])
        boxes = bbox_clamp(boxes, 0, 0, w, h)
        labels = labels[mask]
    else:
        boxes = torch.FloatTensor([[0, 0, 0, 0]])
        labels = torch.LongTensor([0])
    return img, boxes, labels
Ejemplo n.º 6
0
def do_nms(boxes, nms_thresh):
    if len(boxes) > 0:
        nb_class = len(boxes[0].classes)
    else:
        return

    for c in range(nb_class):
        sorted_indices = np.argsort([-box.classes[c] for box in boxes])

        for i in range(len(sorted_indices)):
            index_i = sorted_indices[i]

            if boxes[index_i].classes[c] == 0: continue

            for j in range(i + 1, len(sorted_indices)):
                index_j = sorted_indices[j]

                if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh:
                    boxes[index_j].classes[c] = 0
Ejemplo n.º 7
0
    def update(self,
               pred_bboxes,
               pred_labels,
               pred_scores,
               gt_bboxes,
               gt_labels,
               gt_difficults=None):
        """Update internal buffer with latest prediction and gt pairs.

        Parameters
        ----------
        pred_bboxes : list of mxnet.NDArray or numpy.ndarray
            Prediction bounding boxes with shape `B, N, 4`.
            Where B is the size of mini-batch, N is the number of bboxes.
        pred_labels : list of mxnet.NDArray or numpy.ndarray
            Prediction bounding boxes labels with shape `B, N`.
        pred_scores : list of mxnet.NDArray or numpy.ndarray
            Prediction bounding boxes scores with shape `B, N`.
        gt_bboxes : list of mxnet.NDArray or numpy.ndarray
            Ground-truth bounding boxes with shape `B, M, 4`.
            Where B is the size of mini-batch, M is the number of ground-truths.
        gt_labels : list of mxnet.NDArray or numpy.ndarray
            Ground-truth bounding boxes labels with shape `B, M`.
        gt_difficults : list of mxnet.NDArray or numpy.ndarray, optional, default is None
            Ground-truth bounding boxes difficulty labels with shape `B, M`.

        """
        def as_numpy(a):
            """Convert a (list of) mx.NDArray into numpy.ndarray"""
            if isinstance(a, (list, tuple)):
                out = [
                    x.asnumpy() if isinstance(x, mx.nd.NDArray) else x
                    for x in a
                ]
                try:
                    out = np.concatenate(out, axis=0)
                except ValueError:
                    out = np.array(out)
                return out
            elif isinstance(a, mx.nd.NDArray):
                a = a.asnumpy()

            return a

        if gt_difficults is None:
            gt_difficults = [None for _ in as_numpy(gt_labels)]

        if isinstance(gt_labels, list):
            if len(gt_difficults) != len(gt_labels) * gt_labels[0].shape[0]:
                gt_difficults = [None] * len(gt_labels) * gt_labels[0].shape[0]

        for pred_bbox, pred_label, pred_score, gt_bbox, gt_label, gt_difficult in zip(
                *[
                    as_numpy(x) for x in [
                        pred_bboxes, pred_labels, pred_scores, gt_bboxes,
                        gt_labels, gt_difficults
                    ]
                ]):
            # strip padding -1 for pred and gt
            valid_pred = np.where(pred_label.flat >= 0)[0]
            pred_bbox = pred_bbox[valid_pred, :]
            pred_label = pred_label.flat[valid_pred].astype(int)
            pred_score = pred_score.flat[valid_pred]
            valid_gt = np.where(gt_label.flat >= 0)[0]
            gt_bbox = gt_bbox[valid_gt, :]
            gt_label = gt_label.flat[valid_gt].astype(int)
            if gt_difficult is None:
                gt_difficult = np.zeros(gt_bbox.shape[0])
            else:
                gt_difficult = gt_difficult.flat[valid_gt]

            for l in np.unique(
                    np.concatenate((pred_label, gt_label)).astype(int)):
                pred_mask_l = pred_label == l
                pred_bbox_l = pred_bbox[pred_mask_l]
                pred_score_l = pred_score[pred_mask_l]
                # sort by score
                order = pred_score_l.argsort()[::-1]
                pred_bbox_l = pred_bbox_l[order]
                pred_score_l = pred_score_l[order]

                # add by smher, 2019.11.19
                valid_pred_score = np.where(pred_score_l > self._score_thresh)
                pred_bbox_l = pred_bbox_l[valid_pred_score]
                pred_score_l = pred_score_l[valid_pred_score]

                gt_mask_l = gt_label == l
                gt_bbox_l = gt_bbox[gt_mask_l]
                gt_difficult_l = gt_difficult[gt_mask_l]

                self._n_pos[l] += np.logical_not(gt_difficult_l).sum()
                self._score[l].extend(pred_score_l)

                if len(pred_bbox_l) == 0:
                    continue
                if len(gt_bbox_l) == 0:
                    self._match[l].extend((0, ) * pred_bbox_l.shape[0])
                    continue

                # VOC evaluation follows integer typed bounding boxes.
                pred_bbox_l = pred_bbox_l.copy()
                pred_bbox_l[:, 2:] += 1
                gt_bbox_l = gt_bbox_l.copy()
                gt_bbox_l[:, 2:] += 1

                iou = bbox_iou(pred_bbox_l, gt_bbox_l)
                gt_index = iou.argmax(axis=1)
                # set -1 if there is no matching ground truth
                gt_index[iou.max(axis=1) < self.iou_thresh] = -1
                del iou

                selec = np.zeros(gt_bbox_l.shape[0], dtype=bool)
                for gt_idx in gt_index:
                    if gt_idx >= 0:
                        if gt_difficult_l[gt_idx]:
                            self._match[l].append(-1)
                        else:
                            if not selec[gt_idx]:
                                self._match[l].append(1)
                            else:
                                self._match[l].append(0)
                        selec[gt_idx] = True
                    else:
                        self._match[l].append(0)
Ejemplo n.º 8
0
    def __getitem__(self, idx):
        # get image input size, change every 10 batches
        net_h, net_w = self._get_net_size(idx)
        base_grid_h, base_grid_w = net_h//self.downsample, net_w//self.downsample

        # determine the first and the last indices of the batch
        l_bound = idx*self.batch_size
        r_bound = (idx+1)*self.batch_size

        if r_bound > len(self.instances):
            r_bound = len(self.instances)
            l_bound = r_bound - self.batch_size

        x_batch = np.zeros((r_bound - l_bound, net_h, net_w, 3))             # input images
        t_batch = np.zeros((r_bound - l_bound, 1, 1, 1,  self.max_box_per_image, 4))   # list of groundtruth boxes

        # initialize the inputs and the outputs
        yolo_1 = np.zeros((r_bound - l_bound, 1*base_grid_h,  1*base_grid_w, 3, 4+1+self.objects)) # desired network output 1
        yolo_2 = np.zeros((r_bound - l_bound, 2*base_grid_h,  2*base_grid_w, 3, 4+1+self.objects)) # desired network output 2
        yolo_3 = np.zeros((r_bound - l_bound, 4*base_grid_h,  4*base_grid_w, 3, 4+1+self.objects)) # desired network output 3
        yolos = [yolo_1, yolo_2, yolo_3]
        
        instance_count = 0
        true_box_index = 0

        # do the logic to fill in the inputs and the output
        for train_instance in self.instances[l_bound:r_bound]:
            # augment input image and fix object's position and size
            img, all_objs = self._aug_image(train_instance, net_h, net_w)
            
            for obj in all_objs:
                # find the best anchor box for this object
                max_anchor = None                
                max_index  = -1
                max_iou    = -1

                shifted_box = BoundBox(0, 0, obj['xmax']-obj['xmin'], obj['ymax']-obj['ymin'])    
                
                for i in range(len(ANC_VALS)):
                    anchor =BoundBox(0, 0, ANC_VALS[i][0],ANC_VALS[i][1]) 
                    iou    = bbox_iou(shifted_box, anchor)

                    if max_iou < iou:
                        max_anchor = anchor
                        max_index  = i
                        max_iou    = iou                
                
                # determine the yolo to be responsible for this bounding box
                yolo = yolos[max_index//3]
                grid_h, grid_w = yolo.shape[1:3]
                
                # determine the position of the bounding box on the grid
                center_x = .5*(obj['xmin'] + obj['xmax'])
                g_center_x = center_x / float(net_w) * grid_w # sigma(t_x) + c_x
                center_y = .5*(obj['ymin'] + obj['ymax'])
                g_center_y = center_y / float(net_h) * grid_h # sigma(t_y) + c_y
                
                # determine the sizes of the bounding box
                w = obj['xmax'] - obj['xmin']
                h = obj['ymax'] - obj['ymin']

                box = [center_x, center_y, w, h]

                # determine the index of the label
                obj_indx = self.labels.index(obj['name'])  

                # determine the location of the cell responsible for this object
                grid_x = int(np.floor(g_center_x))
                grid_y = int(np.floor(g_center_y))

                # assign ground truth x, y, w, h, confidence and class probs to y_batch
 #               yolo[instance_count, grid_y, grid_x, ]      = 0
                yolo[instance_count, grid_y, grid_x, max_index%3, 0:4] = box
                yolo[instance_count, grid_y, grid_x, max_index%3, 4  ] = 1.
                yolo[instance_count, grid_y, grid_x, max_index%3, 5+obj_indx] = 1


            # assign input image to x_batch
            x_batch[instance_count] = img/255.

            # increase instance counter in the current batch
            instance_count += 1                 
                
  #      yolo_1 = yolo_1.reshape((yolo_1.shape[0],yolo_1.shape[1],yolo_1.shape[2],3*(self.objects+5)))
       # print(yolo_1.shape)
 #       return x_batch, yolo_3#  [dummy_yolo_1]
        return x_batch, [yolo_1, yolo_2, yolo_3]#  [dummy_yolo_1]
        return [x_batch, t_batch, yolo_1], [dummy_yolo_1]
Ejemplo n.º 9
0
    def __getitem__(self, idx):
        net_h, net_w = self._get_net_size(idx)
        base_grid_h, base_grid_w = net_h // self.downsample, net_w // self.downsample

        l_bound = idx * self.batch_size
        r_bound = (idx + 1) * self.batch_size

        if r_bound > len(self.train_list):
            r_bound = len(self.train_list)
            l_bound = r_bound - self.batch_size

        x_batch = np.zeros((self.batch_size, net_h, net_w, 3))
        t_batch = np.zeros(
            (self.batch_size, 1, 1, 1, self.max_box_per_image, 4))

        yolo_1 = np.zeros(
            (self.batch_size, 1 * base_grid_h, 1 * base_grid_w,
             len(self.anchors) // 3, 4 + 1 + len(self.label_list)))
        yolo_2 = np.zeros(
            (self.batch_size, 2 * base_grid_h, 2 * base_grid_w,
             len(self.anchors) // 3, 4 + 1 + len(self.label_list)))
        yolo_3 = np.zeros(
            (self.batch_size, 4 * base_grid_h, 4 * base_grid_w,
             len(self.anchors) // 3, 4 + 1 + len(self.label_list)))

        yolos = [yolo_3, yolo_2, yolo_1]

        dummy_yolo_1 = np.zeros((self.batch_size, 1))
        dummy_yolo_2 = np.zeros((self.batch_size, 1))
        dummy_yolo_3 = np.zeros((self.batch_size, 1))

        true_box_index = 0

        for instance_count, train_instace in enumerate(
                self.train_list[l_bound:r_bound]):
            aug_img, aug_objs = self.augmentation(train_instace, net_h, net_w)

            for obj in aug_objs:
                max_anchor = None
                max_index = -1
                max_iou = -1

                shifted_box = BoundBox(0, 0, obj['xmax'] - obj['xmin'],
                                       obj['ymax'] - obj['ymin'])

                for i in range(len(self.anchors)):
                    anchor = self.anchors[i]
                    iou = bbox_iou(shifted_box, anchor)

                    if max_iou < iou:
                        max_anchor = anchor
                        max_index = i
                        max_iou = iou

                yolo = yolos[max_index // 3]
                grid_h, grid_w = yolo.shape[1:3]

                center_x = .5 * (obj['xmin'] + obj['xmax'])
                center_x = center_x / float(net_w) * grid_w
                center_y = .5 * (obj['ymin'] + obj['ymax'])
                center_y = center_y / float(net_h) * grid_h

                w = np.log(
                    (obj['xmax'] - obj['xmin']) / float(max_anchor.xmax))
                h = np.log(
                    (obj['ymax'] - obj['ymin']) / float(max_anchor.ymax))

                box = [center_x, center_y, w, h]

                obj_indx = self.label_list.index(obj['name'])

                grid_x = int(np.floor(center_x))
                grid_y = int(np.floor(center_y))

                yolo[instance_count, grid_y, grid_x, max_index % 3] = 0
                yolo[instance_count, grid_y, grid_x, max_index % 3, 0:4] = box
                yolo[instance_count, grid_y, grid_x, max_index % 3, 4] = 1.
                yolo[instance_count, grid_y, grid_x, max_index % 3,
                     5 + obj_indx] = 1

                true_box = [
                    center_x, center_y, obj['xmax'] - obj['xmin'],
                    obj['ymax'] - obj['ymin']
                ]
                t_batch[instance_count, 0, 0, 0, true_box_index] = true_box

                true_box_index += 1
                true_box_index = true_box_index % self.max_box_per_image

            x_batch[instance_count] = normalize(aug_img)

        return [x_batch, t_batch, yolo_1, yolo_2,
                yolo_3], [dummy_yolo_1, dummy_yolo_2, dummy_yolo_3]
Ejemplo n.º 10
0
def random_crop_with_constraints(bbox,
                                 size,
                                 min_scale=0.3,
                                 max_scale=1,
                                 max_aspect_ratio=2,
                                 constraints=None,
                                 max_trial=50):
    """Crop an image randomly with bounding box constraints.

    This data augmentation is used in training of
    Single Shot Multibox Detector [#]_. More details can be found in
    data augmentation section of the original paper.
    .. [#] Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy,
       Scott Reed, Cheng-Yang Fu, Alexander C. Berg.
       SSD: Single Shot MultiBox Detector. ECCV 2016.

    Parameters
    ----------
    bbox : numpy.ndarray
        Numpy.ndarray with shape (N, 4+) where N is the number of bounding boxes.
        The second axis represents attributes of the bounding box.
        Specifically, these are :math:`(x_{min}, y_{min}, x_{max}, y_{max})`,
        we allow additional attributes other than coordinates, which stay intact
        during bounding box transformations.
    size : tuple
        Tuple of length 2 of image shape as (width, height).
    min_scale : float
        The minimum ratio between a cropped region and the original image.
        The default value is :obj:`0.3`.
    max_scale : float
        The maximum ratio between a cropped region and the original image.
        The default value is :obj:`1`.
    max_aspect_ratio : float
        The maximum aspect ratio of cropped region.
        The default value is :obj:`2`.
    constraints : iterable of tuples
        An iterable of constraints.
        Each constraint should be :obj:`(min_iou, max_iou)` format.
        If means no constraint if set :obj:`min_iou` or :obj:`max_iou` to :obj:`None`.
        If this argument defaults to :obj:`None`, :obj:`((0.1, None), (0.3, None),
        (0.5, None), (0.7, None), (0.9, None), (None, 1))` will be used.
    max_trial : int
        Maximum number of trials for each constraint before exit no matter what.

    Returns
    -------
    numpy.ndarray
        Cropped bounding boxes with shape :obj:`(M, 4+)` where M <= N.
    tuple
        Tuple of length 4 as (x_offset, y_offset, new_width, new_height).

    """
    # default params in paper
    if constraints is None:
        constraints = (
            (0.1, None),
            (0.3, None),
            (0.5, None),
            (0.7, None),
            (0.9, None),
            (None, 1),
        )

    w, h = size

    candidates = [(0, 0, w, h)]
    for min_iou, max_iou in constraints:
        min_iou = -np.inf if min_iou is None else min_iou
        max_iou = np.inf if max_iou is None else max_iou

        for _ in range(max_trial):
            scale = random.uniform(min_scale, max_scale)
            aspect_ratio = random.uniform(
                max(1 / max_aspect_ratio, scale * scale),
                min(max_aspect_ratio, 1 / (scale * scale)))
            crop_h = int(h * scale / np.sqrt(aspect_ratio))
            crop_w = int(w * scale * np.sqrt(aspect_ratio))

            crop_t = random.randrange(h - crop_h)
            crop_l = random.randrange(w - crop_w)
            crop_bb = np.array(
                (crop_l, crop_t, crop_l + crop_w, crop_t + crop_h))

            if len(bbox) == 0:
                top, bottom = crop_t, crop_t + crop_h
                left, right = crop_l, crop_l + crop_w
                return bbox, (left, top, right - left, bottom - top)

            iou = bbox_iou(bbox, crop_bb[np.newaxis])
            if min_iou <= iou.min() and iou.max() <= max_iou:
                top, bottom = crop_t, crop_t + crop_h
                left, right = crop_l, crop_l + crop_w
                candidates.append((left, top, right - left, bottom - top))
                break

    # random select one
    while candidates:
        crop = candidates.pop(np.random.randint(0, len(candidates)))
        new_bbox = bbox_crop(bbox, crop, allow_outside_center=False)
        if new_bbox.size < 1:
            continue
        new_crop = (crop[0], crop[1], crop[2], crop[3])
        return new_bbox, new_crop
    return bbox, (0, 0, w, h)
Ejemplo n.º 11
0
    def __getitem__(self, idx):
        # get image input size, change every 10 batches
        base_grid_h, base_grid_w = self.net_h//self.downsample, self.net_w//self.downsample

        # determine the first and the last indices of the batch
        l_bound = idx*self.batch_size
        r_bound = (idx+1)*self.batch_size

        if r_bound > len(self.instances):
            r_bound = len(self.instances)
            l_bound = r_bound - self.batch_size

        x_batch = np.zeros((r_bound - l_bound, self.net_h, self.net_w, 3))             # input images

        # initialize the inputs and the outputs
        yolo_1 = np.zeros((r_bound - l_bound, 1*base_grid_h,  1*base_grid_w, 3, 4+1+self.objects)) # desired network output 1
        yolo_2 = np.zeros((r_bound - l_bound, 2*base_grid_h,  2*base_grid_w, 3, 4+1+self.objects)) # desired network output 2
        yolo_3 = np.zeros((r_bound - l_bound, 4*base_grid_h,  4*base_grid_w, 3, 4+1+self.objects)) # desired network output 3
        yolos = [yolo_1, yolo_2, yolo_3]
        
        instance_count = 0
        true_box_index = 0

        # do the logic to fill in the inputs and the output
        for train_instance in self.instances[l_bound:r_bound]:
            # augment input image and fix object's position and size
            img, all_objs = self._aug_image(train_instance, self.net_h, self.net_w)
            
            for obj in all_objs:
                # find the best anchor box for this object
                max_anchor = None                
                max_index  = -1
                max_iou    = -1

                shifted_box = BoundBox(0, 0, obj['xmax']-obj['xmin'], obj['ymax']-obj['ymin'])    
                
                for i in range(len(ANC_VALS)):
                    anchor =BoundBox(0, 0, ANC_VALS[i][0],ANC_VALS[i][1]) 
                    iou    = bbox_iou(shifted_box, anchor)

                    if max_iou < iou:
                        max_anchor = anchor
                        max_index  = i
                        max_iou    = iou                
                
                # determine the yolo to be responsible for this bounding box
                yolo = yolos[max_index//3]
                grid_h, grid_w = yolo.shape[1:3]
                
                # determine the position of the bounding box on the grid
                center_x = .5*(obj['xmin'] + obj['xmax'])
                g_center_x = center_x / float(self.net_w) * grid_w # sigma(t_x) + c_x
                center_y = .5*(obj['ymin'] + obj['ymax'])
                g_center_y = center_y / float(self.net_h) * grid_h # sigma(t_y) + c_y
                
                # determine the sizes of the bounding box
                w = obj['xmax'] - obj['xmin']
                h = obj['ymax'] - obj['ymin']

                box = [center_x, center_y, w, h]

                # determine the index of the label
                obj_indx = self.labels.index(obj['name'])  

                # determine the location of the cell responsible for this object
                grid_x = int(np.floor(g_center_x))
                grid_y = int(np.floor(g_center_y))

                # assign ground truth x, y, w, h, confidence and class probs to y_batch
                yolo[instance_count, grid_y, grid_x, max_index%3, 0:4] = box
                yolo[instance_count, grid_y, grid_x, max_index%3, 4  ] = 1.
                yolo[instance_count, grid_y, grid_x, max_index%3, 5+obj_indx] = 1


            # assign input image to x_batch
            x_batch[instance_count] = img/255.

            # increase instance counter in the current batch
            instance_count += 1                 
                
        return x_batch, [yolo_1, yolo_2, yolo_3]#  [dummy_yolo_1]
Ejemplo n.º 12
0
    def encode(self, boxes, labels, iou_threshold=0.5):
        target_list = list()
        objmask_list = list()
        noobjmask_list = list()
        for i, ori_anchors in enumerate(self.anchors):
            in_h = in_w = int(self.fm_size[i])
            # self.input_size[0] / in_w, self.input_size[1] / in_h
            w_fm_stride, h_fm_stride = self.input_size / in_w, self.input_size / in_h
            anchors = [(a_w / w_fm_stride, a_h / h_fm_stride)
                       for a_w, a_h in ori_anchors]
            num_anchors = len(anchors)
            obj_mask = torch.zeros(num_anchors, in_h, in_w)
            noobj_mask = torch.ones(num_anchors, in_h, in_w)
            tx = torch.zeros(num_anchors, in_h, in_w)
            ty = torch.zeros(num_anchors, in_h, in_w)
            tw = torch.zeros(num_anchors, in_h, in_w)
            th = torch.zeros(num_anchors, in_h, in_w)
            tconf = torch.zeros(num_anchors, in_h, in_w)
            tcls = torch.zeros(num_anchors, in_h, in_w, self.num_classes)

            for t in range(boxes.size(0)):
                # Convert to position relative to box
                gx = (boxes[t, 0].item() + boxes[t, 2].item()) / (
                    2.0 * self.input_size) * in_w  # [0]
                gy = (boxes[t, 1].item() + boxes[t, 3].item()) / (
                    2.0 * self.input_size) * in_h  # [1]
                gw = (boxes[t, 2].item() -
                      boxes[t, 0].item()) / self.input_size * in_w  # [0]
                gh = (boxes[t, 3].item() -
                      boxes[t, 1].item()) / self.input_size * in_h  # [1]
                if gw * gh == 0 or gx >= in_w or gy >= in_h:
                    continue

                # Get grid box indices
                gi = int(gx)
                gj = int(gy)
                # Get shape of gt box
                gt_box = torch.FloatTensor([0, 0, gw, gh]).unsqueeze(0)
                # Get shape of anchor box
                anchor_shapes = torch.FloatTensor(
                    np.concatenate((np.zeros(
                        (num_anchors, 2)), np.array(anchors)), 1))
                # Calculate iou between gt and anchor shapes
                anch_ious = bbox_iou(gt_box, anchor_shapes)
                # Where the overlap is larger than threshold set mask to zero (ignore)
                noobj_mask[anch_ious[0] > iou_threshold] = 0
                # Find the best matching anchor box
                best_n = np.argmax(anch_ious, axis=1)

                # Masks
                obj_mask[best_n, gj, gi] = 1
                # Coordinates
                tx[best_n, gj, gi] = gx - gi
                ty[best_n, gj, gi] = gy - gj
                # Width and height
                tw[best_n, gj, gi] = math.log(gw / anchors[best_n][0] + 1e-16)
                th[best_n, gj, gi] = math.log(gh / anchors[best_n][1] + 1e-16)
                # object
                tconf[best_n, gj, gi] = 1
                # One-hot encoding of label
                tcls[best_n, gj, gi, int(labels[t])] = 1

            obj_mask = obj_mask.view(-1, 1)
            noobj_mask = noobj_mask.view(-1, 1)
            tx = tx.view(-1, 1)
            ty = ty.view(-1, 1)
            tw = tw.view(-1, 1)
            th = th.view(-1, 1)
            tconf = tconf.view(-1, 1)
            tcls = tcls.view(-1, self.num_classes)
            target = torch.cat((tx, ty, tw, th, tconf, tcls), -1)
            target_list.append(target)
            objmask_list.append(obj_mask)
            noobjmask_list.append(noobj_mask)

        target = torch.cat(target_list, 0)
        obj_mask = torch.cat(objmask_list, 0)
        noobj_mask = torch.cat(noobjmask_list, 0)
        return target, torch.cat([obj_mask, noobj_mask], dim=1)
Ejemplo n.º 13
0
    def __getitem__(self, idx):
        # get image input size, change every 10 batches
        # net_h, net_w 是输入图像的高宽,每10个batch随机变换一次
        net_h, net_w = self._get_net_size(idx)
        # 32倍下采样的特征图的高宽
        base_grid_h, base_grid_w = net_h//self.downsample, net_w//self.downsample

        # determine the first and the last indices of the batch
        l_bound = idx*self.batch_size
        r_bound = (idx+1)*self.batch_size

        # 这个感觉不是很合理
        if r_bound > len(self.instances):
            r_bound = len(self.instances)
            l_bound = r_bound - self.batch_size

        # 准备样本,一个batch的输入图像
        x_batch = np.zeros((r_bound - l_bound, net_h, net_w, 3))             # input images
        # 每个图像中的所有对象边框,shape=(batch,1,1,1,一个图像中最多几个对象,4个坐标)
        t_batch = np.zeros((r_bound - l_bound, 1, 1, 1,  self.max_box_per_image, 4))   # list of groundtruth boxes

        # initialize the inputs and the outputs,分别对应32、16、8倍下采样的输出特征图
        # [batch_size,特征图高,特征图宽,anchor数量3,边框坐标4+置信度1+预测对象类别数]
        yolo_1 = np.zeros((r_bound - l_bound, 1*base_grid_h,  1*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 1
        yolo_2 = np.zeros((r_bound - l_bound, 2*base_grid_h,  2*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 2
        yolo_3 = np.zeros((r_bound - l_bound, 4*base_grid_h,  4*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 3

        # 8、16、32倍下采样对应到先验框 [55,69, 75,234, 133,240,   136,129, 142,363, 203,290,   228,184, 285,359, 341,260]
        yolos = [yolo_3, yolo_2, yolo_1]

        dummy_yolo_1 = np.zeros((r_bound - l_bound, 1))
        dummy_yolo_2 = np.zeros((r_bound - l_bound, 1))
        dummy_yolo_3 = np.zeros((r_bound - l_bound, 1))

        instance_count = 0  # batch中的第几张图像
        true_box_index = 0  # 图像中的第几个对象

        # do the logic to fill in the inputs and the output
        for train_instance in self.instances[l_bound:r_bound]:
            # augment input image and fix object's position and size
            img, all_objs = self._aug_image(train_instance, net_h, net_w)
            
            for obj in all_objs:
                # find the best anchor box for this object
                max_anchor = None  # IOU最大的那个anchor
                max_index  = -1     # IOU最大的那个anchor 的index
                max_iou    = -1

                shifted_box = BoundBox(0, 
                                       0,
                                       obj['xmax']-obj['xmin'],                                                
                                       obj['ymax']-obj['ymin'])    
                
                for i in range(len(self.anchors)):
                    anchor = self.anchors[i]
                    iou    = bbox_iou(shifted_box, anchor)

                    if max_iou < iou:
                        max_anchor = anchor
                        max_index  = i
                        max_iou    = iou                
                
                # determine the yolo to be responsible for this bounding box
                # 3种尺度的特征图,与当前对象最匹配的那种anchor,所属的那个特征图的tensor,就是这里的yolo
                yolo = yolos[max_index//3]
                grid_h, grid_w = yolo.shape[1:3]
                
                # determine the position of the bounding box on the grid
                # 对象的边框中心坐标 被转换到 特征图网格上,其值相当于 期望预测的坐标 sigma(t_x) + c_x,sigma(t_y) + c_y
                center_x = .5*(obj['xmin'] + obj['xmax'])
                center_x = center_x / float(net_w) * grid_w # 期望预测的坐标 sigma(t_x) + c_x = center_x
                center_y = .5*(obj['ymin'] + obj['ymax'])
                center_y = center_y / float(net_h) * grid_h # 期望预测的坐标 sigma(t_y) + c_y = center_y
                
                # determine the sizes of the bounding box
                w = np.log((obj['xmax'] - obj['xmin']) / float(max_anchor.xmax)) # t_w,注:truth_w = anchor_w * exp(t_w)
                h = np.log((obj['ymax'] - obj['ymin']) / float(max_anchor.ymax)) # t_h,注:truth_h = anchor_h * exp(t_h)

                box = [center_x, center_y, w, h]

                # determine the index of the label
                obj_indx = self.labels.index(obj['name'])  

                # determine the location of the cell responsible for this object
                grid_x = int(np.floor(center_x))
                grid_y = int(np.floor(center_y))

                # assign ground truth x, y, w, h, confidence and class probs to y_batch
                # max_index%3 对应到最佳匹配的anchor,一个对象仅有一个anchor负责检测
                yolo[instance_count, grid_y, grid_x, max_index%3]      = 0
                yolo[instance_count, grid_y, grid_x, max_index%3, 0:4] = box      # 边框坐标
                yolo[instance_count, grid_y, grid_x, max_index%3, 4  ] = 1.       # 边框置信度
                yolo[instance_count, grid_y, grid_x, max_index%3, 5+obj_indx] = 1 # 对象分类

                # assign the true box to t_batch. true_box的x、y是特征图上的坐标(比如13*13特征图),宽和高是原始图像上对象的宽和高
                true_box = [center_x, center_y, obj['xmax'] - obj['xmin'], obj['ymax'] - obj['ymin']]
                t_batch[instance_count, 0, 0, 0, true_box_index] = true_box
                # 因为有 instance_count 区分不同的图像,true_box_index 应该只需在每次图像切换时 true_box_index=0 即可。这里在整个batch累加true_box_index,暂不确定是否有特别的用意。
                true_box_index += 1
                true_box_index  = true_box_index % self.max_box_per_image    

            # assign input image to x_batch
            if self.norm != None: 
                x_batch[instance_count] = self.norm(img)
            else:
                # plot image and bounding boxes for sanity check
                for obj in all_objs:
                    cv2.rectangle(img, (obj['xmin'],obj['ymin']), (obj['xmax'],obj['ymax']), (255,0,0), 3)
                    cv2.putText(img, obj['name'], 
                                (obj['xmin']+2, obj['ymin']+12), 
                                0, 1.2e-3 * img.shape[0], 
                                (0,255,0), 2)
                
                x_batch[instance_count] = img

            # increase instance counter in the current batch
            instance_count += 1                 
                
        return [x_batch, t_batch, yolo_1, yolo_2, yolo_3], [dummy_yolo_1, dummy_yolo_2, dummy_yolo_3]
Ejemplo n.º 14
0
def write_results_half(prediction,
                       confidence,
                       num_classes,
                       nms=True,
                       nms_conf=0.4):
    conf_mask = (prediction[:, :, 4] > confidence).half().unsqueeze(2)
    prediction = prediction * conf_mask

    try:
        ind_nz = torch.nonzero(prediction[:, :, 4]).transpose(0,
                                                              1).contiguous()
    except:
        return 0

    box_a = prediction.new(prediction.shape)
    box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2)
    box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2)
    box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2)
    box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2)
    prediction[:, :, :4] = box_a[:, :, :4]

    batch_size = prediction.size(0)

    output = prediction.new(1, prediction.size(2) + 1)
    write = False

    for ind in range(batch_size):
        #select the image from the batch
        image_pred = prediction[ind]

        #Get the class having maximum score, and the index of that class
        #Get rid of num_classes softmax scores
        #Add the class index and the class score of class having maximum score
        max_conf, max_conf_score = torch.max(image_pred[:, 5:5 + num_classes],
                                             1)
        max_conf = max_conf.half().unsqueeze(1)
        max_conf_score = max_conf_score.half().unsqueeze(1)
        seq = (image_pred[:, :5], max_conf, max_conf_score)
        image_pred = torch.cat(seq, 1)

        #Get rid of the zero entries
        non_zero_ind = (torch.nonzero(image_pred[:, 4]))
        try:
            image_pred_ = image_pred[non_zero_ind.squeeze(), :]
        except:
            continue

        #Get the various classes detected in the image
        img_classes = unique(image_pred_[:, -1].long()).half()

        #WE will do NMS classwise
        for cls in img_classes:
            #get the detections with one particular class
            cls_mask = image_pred_ * (image_pred_[:, -1]
                                      == cls).half().unsqueeze(1)
            class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze()

            image_pred_class = image_pred_[class_mask_ind]

            #sort the detections such that the entry with the maximum objectness
            #confidence is at the top
            conf_sort_index = torch.sort(image_pred_class[:, 4],
                                         descending=True)[1]
            image_pred_class = image_pred_class[conf_sort_index]
            idx = image_pred_class.size(0)

            #if nms has to be done
            if nms:
                #For each detection
                for i in range(idx):
                    #Get the IOUs of all boxes that come after the one we are looking at
                    #in the loop
                    try:
                        ious = bbox_iou(image_pred_class[i].unsqueeze(0),
                                        image_pred_class[i + 1:])
                    except ValueError:
                        break

                    except IndexError:
                        break

                    #Zero out all the detections that have IoU > treshhold
                    iou_mask = (ious < nms_conf).half().unsqueeze(1)
                    image_pred_class[i + 1:] *= iou_mask

                    #Remove the non-zero entries
                    non_zero_ind = torch.nonzero(
                        image_pred_class[:, 4]).squeeze()
                    image_pred_class = image_pred_class[non_zero_ind]

            #Concatenate the batch_id of the image to the detection
            #this helps us identify which image does the detection correspond to
            #We use a linear straucture to hold ALL the detections from the batch
            #the batch_dim is flattened
            #batch is identified by extra batch column
            batch_ind = image_pred_class.new(image_pred_class.size(0),
                                             1).fill_(ind)
            seq = batch_ind, image_pred_class

            if not write:
                output = torch.cat(seq, 1)
                write = True
            else:
                out = torch.cat(seq, 1)
                output = torch.cat((output, out))

    return output
Ejemplo n.º 15
0
    def update(self,
               pred_bboxes,
               pred_labels,
               pred_scores,
               gt_bboxes,
               gt_labels,
               gt_difficults=None):
        def as_numpy(a):
            """Convert a (list of) mx.NDArray into numpy.ndarray"""
            if isinstance(a, (list, tuple)):
                out = [
                    x.asnumpy() if isinstance(x, mx.nd.NDArray) else x
                    for x in a
                ]
                try:
                    out = np.concatenate(out, axis=0)
                except ValueError:
                    out = np.array(out)
                return out
            elif isinstance(a, mx.nd.NDArray):
                a = a.asnumpy()
            return a

        if gt_difficults is None:
            gt_difficults = [None for _ in as_numpy(gt_labels)]
        '''
        if isinstance(gt_labels, list):
            if len(gt_difficults) != len(gt_labels) * gt_labels[0].shape[0]:
                gt_difficults = [None] * len(gt_labels) * gt_labels[0].shape[0]
        '''

        # calculate the FD, suppose the predicted label is all correct
        # obj_nums = gt_bboxes[0].shape[1]
        # if obj_nums < 101:
        #     gt_labels = [X[:, 0:obj_nums, :] for X in pred_labels]
        # print('len of gt_labels: ', len(gt_labels))
        # print('len of pred_labels: ', len(pred_labels))
        # print('shape of gt_labels ele: ', gt_labels[0].shape)
        # print('shape of pred_labels ele: ', pred_labels[0].shape)

        for pred_bbox, pred_label, pred_score, gt_bbox, gt_label, gt_difficult in zip(
                *[
                    as_numpy(x) for x in [
                        pred_bboxes, pred_labels, pred_scores, gt_bboxes,
                        gt_labels, gt_difficults
                    ]
                ]):
            # strip padding -1 for pred and gt
            valid_pred = np.where(pred_label.flat >= 0)[0]
            pred_bbox = pred_bbox[valid_pred, :]
            pred_label = pred_label.flat[valid_pred].astype(int)
            pred_score = pred_score.flat[valid_pred]
            valid_gt = np.where(gt_label.flat >= 0)[0]
            gt_bbox = gt_bbox[valid_gt, :]
            gt_label = gt_label.flat[valid_gt].astype(int)
            if gt_difficult is None:
                gt_difficult = np.zeros(gt_bbox.shape[0])
            else:
                gt_difficult = gt_difficult.flat[valid_gt]

            for l in np.unique(
                    np.concatenate((pred_label, gt_label)).astype(int)):
                pred_mask_l = pred_label == l
                pred_bbox_l = pred_bbox[pred_mask_l]
                pred_score_l = pred_score[pred_mask_l]
                # sort by score
                order = pred_score_l.argsort()[::-1]
                pred_bbox_l = pred_bbox_l[order]
                pred_score_l = pred_score_l[order]
                # get the predicts, which socre > scores thresh
                pred_score_l_idx = np.where(
                    pred_score_l >= self.score_thresh)[0]
                pred_bbox_l = pred_bbox_l[pred_score_l_idx]
                pred_score_l = pred_score_l[pred_score_l_idx]

                gt_mask_l = gt_label == l
                gt_bbox_l = gt_bbox[gt_mask_l]
                gt_difficult_l = gt_difficult[gt_mask_l]

                self._n_pos[l] += np.logical_not(gt_difficult_l).sum()
                self._score[l].extend(pred_score_l)

                if len(pred_bbox_l) == 0:
                    continue
                pred_bbox_l = pred_bbox_l.copy()
                pred_bbox_l[:, 2:] += 1
                gt_bbox_l = gt_bbox_l.copy()
                gt_bbox_l[:, 2:] += 1
                iou = bbox_iou(pred_bbox_l, gt_bbox_l)
                for iou_thresh in self.ovp_thresh:
                    iou_thresh_key = str(iou_thresh)
                    if len(gt_bbox_l) == 0:
                        self._match[iou_thresh_key][l].extend(
                            (0, ) * pred_bbox_l.shape[0])
                        continue

                    # VOC evaluation follows integer typed bounding boxes.

                    gt_index = iou.argmax(axis=1)
                    # set -1 if there is no matching ground truth
                    gt_index[iou.max(axis=1) < iou_thresh] = -1

                    selec = np.zeros(gt_bbox_l.shape[0], dtype=bool)
                    for gt_idx in gt_index:
                        if gt_idx >= 0:
                            if gt_difficult_l[gt_idx]:
                                self._match[iou_thresh_key][l].append(-1)
                            else:
                                if not selec[gt_idx]:
                                    self._match[iou_thresh_key][l].append(1)
                                else:
                                    self._match[iou_thresh_key][l].append(0)
                            selec[gt_idx] = True
                        else:
                            self._match[iou_thresh_key][l].append(0)
Ejemplo n.º 16
0
    def __getitem__(self, idx):
        # get image input size, change every 10 batches
        net_h, net_w = self._get_net_size(idx)
        base_grid_h, base_grid_w = net_h//self.downsample, net_w//self.downsample

        # determine the first and the last indices of the batch
        l_bound = idx*self.batch_size
        r_bound = (idx+1)*self.batch_size

        if r_bound > len(self.instances):
            r_bound = len(self.instances)
            l_bound = r_bound - self.batch_size

        x_batch = np.zeros((r_bound - l_bound, net_h, net_w, 3))             # input images
        t_batch = np.zeros((r_bound - l_bound, 1, 1, 1,  self.max_box_per_image, 4))   # list of groundtruth boxes

        # initialize the inputs and the outputs
        yolo_1 = np.zeros((r_bound - l_bound, 1*base_grid_h,  1*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 1
        yolo_2 = np.zeros((r_bound - l_bound, 2*base_grid_h,  2*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 2
        yolo_3 = np.zeros((r_bound - l_bound, 4*base_grid_h,  4*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 3
        yolos = [yolo_3, yolo_2, yolo_1]

        dummy_yolo_1 = np.zeros((r_bound - l_bound, 1))
        dummy_yolo_2 = np.zeros((r_bound - l_bound, 1))
        dummy_yolo_3 = np.zeros((r_bound - l_bound, 1))
        
        instance_count = 0
        true_box_index = 0

        # do the logic to fill in the inputs and the output
        for train_instance in self.instances[l_bound:r_bound]:
            # augment input image and fix object's position and size
            img, all_objs = self._aug_image(train_instance, net_h, net_w)
            
            for obj in all_objs:
                # find the best anchor box for this object
                max_anchor = None                
                max_index  = -1
                max_iou    = -1

                shifted_box = BoundBox(0, 
                                       0,
                                       obj['xmax']-obj['xmin'],                                                
                                       obj['ymax']-obj['ymin'])    
                
                for i in range(len(self.anchors)):
                    anchor = self.anchors[i]
                    iou    = bbox_iou(shifted_box, anchor)

                    if max_iou < iou:
                        max_anchor = anchor
                        max_index  = i
                        max_iou    = iou                
                
                # determine the yolo to be responsible for this bounding box
                yolo = yolos[max_index//3]
                grid_h, grid_w = yolo.shape[1:3]
                
                # determine the position of the bounding box on the grid
                center_x = .5*(obj['xmin'] + obj['xmax'])
                center_x = center_x / float(net_w) * grid_w # sigma(t_x) + c_x
                center_y = .5*(obj['ymin'] + obj['ymax'])
                center_y = center_y / float(net_h) * grid_h # sigma(t_y) + c_y
                
                # determine the sizes of the bounding box
                w = np.log((obj['xmax'] - obj['xmin']) / float(max_anchor.xmax)) # t_w
                h = np.log((obj['ymax'] - obj['ymin']) / float(max_anchor.ymax)) # t_h

                box = [center_x, center_y, w, h]

                # determine the index of the label
                obj_indx = self.labels.index(obj['name'])  

                # determine the location of the cell responsible for this object
                grid_x = int(np.floor(center_x))
                grid_y = int(np.floor(center_y))

                # assign ground truth x, y, w, h, confidence and class probs to y_batch
                yolo[instance_count, grid_y, grid_x, max_index%3]      = 0
                yolo[instance_count, grid_y, grid_x, max_index%3, 0:4] = box
                yolo[instance_count, grid_y, grid_x, max_index%3, 4  ] = 1.
                yolo[instance_count, grid_y, grid_x, max_index%3, 5+obj_indx] = 1

                # assign the true box to t_batch
                true_box = [center_x, center_y, obj['xmax'] - obj['xmin'], obj['ymax'] - obj['ymin']]
                t_batch[instance_count, 0, 0, 0, true_box_index] = true_box

                true_box_index += 1
                true_box_index  = true_box_index % self.max_box_per_image    

            # assign input image to x_batch
            if self.norm != None: 
                x_batch[instance_count] = self.norm(img)
            else:
                # plot image and bounding boxes for sanity check
                for obj in all_objs:
                    cv2.rectangle(img, (obj['xmin'],obj['ymin']), (obj['xmax'],obj['ymax']), (255,0,0), 3)
                    cv2.putText(img, obj['name'], 
                                (obj['xmin']+2, obj['ymin']+12), 
                                0, 1.2e-3 * img.shape[0], 
                                (0,255,0), 2)
                
                x_batch[instance_count] = img

            # increase instance counter in the current batch
            instance_count += 1                 
                
        return [x_batch, t_batch, yolo_1, yolo_2, yolo_3], [dummy_yolo_1, dummy_yolo_2, dummy_yolo_3]
Ejemplo n.º 17
0
    def __getitem__(self, idx):
        # get image input size, change every 10 batches
        net_h, net_w = self._get_net_size(idx)
        base_grid_h, base_grid_w = net_h//self.downsample, net_w//self.downsample

        # determine the first and the last indices of the batch
        l_bound = idx*self.batch_size
        r_bound = (idx+1)*self.batch_size

        if r_bound > len(self.instances):
            r_bound = len(self.instances)
            l_bound = r_bound - self.batch_size

        x_batch = np.zeros((r_bound - l_bound, net_h, net_w, 3))             # input images
        t_batch = np.zeros((r_bound - l_bound, 1, 1, 1,  self.max_box_per_image, 4))   # list of groundtruth boxes

        # initialize the inputs and the outputs
        yolo_1 = np.zeros((r_bound - l_bound, 1*base_grid_h,  1*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 1
        yolo_2 = np.zeros((r_bound - l_bound, 2*base_grid_h,  2*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 2
        yolo_3 = np.zeros((r_bound - l_bound, 4*base_grid_h,  4*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 3
        yolos = [yolo_3, yolo_2, yolo_1]

        dummy_yolo_1 = np.zeros((r_bound - l_bound, 1))
        dummy_yolo_2 = np.zeros((r_bound - l_bound, 1))
        dummy_yolo_3 = np.zeros((r_bound - l_bound, 1))
        
        instance_count = 0
        true_box_index = 0

        # do the logic to fill in the inputs and the output
        for train_instance in self.instances[l_bound:r_bound]:
            # augment input image and fix object's position and size
            img, all_objs = self._aug_image(train_instance, net_h, net_w)
            
            for obj in all_objs:
                # find the best anchor box for this object
                max_anchor = None                
                max_index  = -1
                max_iou    = -1

                shifted_box = BoundBox(0, 
                                       0,
                                       obj['xmax']-obj['xmin'],                                                
                                       obj['ymax']-obj['ymin'])    
                
                for i in range(len(self.anchors)):
                    anchor = self.anchors[i]
                    iou    = bbox_iou(shifted_box, anchor)

                    if max_iou < iou:
                        max_anchor = anchor
                        max_index  = i
                        max_iou    = iou                
                
                # determine the yolo to be responsible for this bounding box
                yolo = yolos[max_index//3]
                grid_h, grid_w = yolo.shape[1:3]
                
                # determine the position of the bounding box on the grid
                center_x = .5*(obj['xmin'] + obj['xmax'])
                center_x = center_x / float(net_w) * grid_w # sigma(t_x) + c_x
                center_y = .5*(obj['ymin'] + obj['ymax'])
                center_y = center_y / float(net_h) * grid_h # sigma(t_y) + c_y
                
                # determine the sizes of the bounding box
                w = np.log((obj['xmax'] - obj['xmin']) / float(max_anchor.xmax)) # t_w
                h = np.log((obj['ymax'] - obj['ymin']) / float(max_anchor.ymax)) # t_h

                box = [center_x, center_y, w, h]

                # determine the index of the label
                obj_indx = self.labels.index(obj['name'])  

                # determine the location of the cell responsible for this object
                grid_x = int(np.floor(center_x))
                grid_y = int(np.floor(center_y))

                # assign ground truth x, y, w, h, confidence and class probs to y_batch
                yolo[instance_count, grid_y, grid_x, max_index%3]      = 0
                yolo[instance_count, grid_y, grid_x, max_index%3, 0:4] = box
                yolo[instance_count, grid_y, grid_x, max_index%3, 4  ] = 1.
                yolo[instance_count, grid_y, grid_x, max_index%3, 5+obj_indx] = 1

                # assign the true box to t_batch
                true_box = [center_x, center_y, obj['xmax'] - obj['xmin'], obj['ymax'] - obj['ymin']]
                t_batch[instance_count, 0, 0, 0, true_box_index] = true_box

                true_box_index += 1
                true_box_index  = true_box_index % self.max_box_per_image    

            # assign input image to x_batch
            if self.norm != None: 
                x_batch[instance_count] = self.norm(img)
            else:
                # plot image and bounding boxes for sanity check
                for obj in all_objs:
                    cv2.rectangle(img, (obj['xmin'],obj['ymin']), (obj['xmax'],obj['ymax']), (255,0,0), 3)
                    cv2.putText(img, obj['name'], 
                                (obj['xmin']+2, obj['ymin']+12), 
                                0, 1.2e-3 * img.shape[0], 
                                (0,255,0), 2)
                
                x_batch[instance_count] = img

            # increase instance counter in the current batch
            instance_count += 1                 
                
        return [x_batch, t_batch, yolo_1, yolo_2, yolo_3], [dummy_yolo_1, dummy_yolo_2, dummy_yolo_3]