Beispiel #1
0
def get_targets(pred_boxes, pred_conf, pred_cls, targets, anchors, num_anchors,
                num_classes, in_h, ignore_thres, img_dim):

    mask = torch.zeros(targets.size(0), num_anchors, in_h, in_h)
    conf_mask = torch.zeros(targets.size(0), num_anchors, in_h, in_h)
    tx = torch.zeros(targets.size(0), num_anchors, in_h, in_h)
    ty = torch.zeros(targets.size(0), num_anchors, in_h, in_h)
    tw = torch.zeros(targets.size(0), num_anchors, in_h, in_h)
    th = torch.zeros(targets.size(0), num_anchors, in_h, in_h)
    tconf = torch.ByteTensor(targets.size(0), num_anchors, in_h, in_h).fill_(0)
    tcls = torch.ByteTensor(targets.size(0), num_anchors, in_h, in_h,
                            num_classes).fill_(0)

    counter = 0
    correct = 0

    for batch in range(targets.size(0)):
        for t in range(targets.shape[1]):
            if targets[batch, t].sum() == 0:
                continue
            counter += 1

            gx = targets[batch, t, 1] * in_h
            gy = targets[batch, t, 2] * in_h
            gw = targets[batch, t, 3] * in_h
            gh = targets[batch, t, 4] * in_h

            gi = int(gx)
            gj = int(gy)

            gt_box = torch.FloatTensor(np.array([0, 0, gw, gh])).unsqueeze(0)
            anchor_shapes = torch.FloatTensor(
                np.concatenate((np.zeros((num_anchors, 2)), np.array(anchors)),
                               1))
            anch_ious = bbox_iou(gt_box, anchor_shapes, True)
            conf_mask[batch, anch_ious > ignore_thres, gj, gi] = 0
            best_n = np.argmax(anch_ious)
            gt_box = torch.FloatTensor(np.array([gx, gy, gw, gh])).unsqueeze(0)
            pred_box = pred_boxes[batch, best_n, gj, gi].unsqueeze(0)
            mask[batch, best_n, gj, gi] = 1
            conf_mask[batch, best_n, gj, gi] = 1

            tx[batch, best_n, gj, gi] = gx - gi
            ty[batch, best_n, gj, gi] = gy - gj

            tw[batch, best_n, gj,
               gi] = math.log(gw / anchors[best_n][0] + 1e-16)
            th[batch, best_n, gj,
               gi] = math.log(gh / anchors[best_n][1] + 1e-16)

            target_label = int(targets[batch, t, 0])
            tcls[batch, best_n, gj, gi, target_label] = 1
            tconf[batch, best_n, gj, gi] = 1

            iou = bbox_iou(gt_box, pred_box)
            pred_label = torch.argmax(pred_cls[batch, best_n, gj, gi])
            score = pred_conf[batch, best_n, gj, gi]
            if iou > 0.5 and pred_label == target_label and score > 0.5:
                correct += 1
    return counter, correct, mask, conf_mask, tx, ty, tw, th, tconf, tcls
def get_ground_truth(boxes):
    gt = np.zeros((grid_h, grid_w, num_box, 4 + 1 + num_classes),
                  dtype=np.float32)

    for bbox in boxes:
        bx, by, bw, bh = bbox
        center_x = bx + bw / 2.
        center_x = center_x / float(image_w / grid_w)
        center_y = by + bh / 2.
        center_y = center_y / float(image_h / grid_h)
        cell_x = int(np.floor(center_x))
        cell_y = int(np.floor(center_y))
        center_w = bw / grid_size
        center_h = bh / grid_size
        box = [center_x, center_y, center_w, center_h]

        # find the anchor that best predicts this box
        best_anchor = -1
        max_iou = -1

        shifted_box = BoundBox(0, 0, center_w, center_h)

        for i in range(len(anchor_boxes)):
            anchor = anchor_boxes[i]
            iou = bbox_iou(shifted_box, anchor)

            if max_iou < iou:
                best_anchor = i
                max_iou = iou

        # assign ground truth x, y, w, h, confidence and class probs
        gt[cell_y, cell_x, best_anchor, 0] = 1.0
        gt[cell_y, cell_x, best_anchor, 1:5] = box
        gt[cell_y, cell_x, best_anchor, 5] = 1.0
    return gt
Beispiel #3
0
def comput_loss(proc_pred,
                annotations_gt,
                targets,
                iou_th=0.5,
                giou_ratio=0.5):
    #procpred = process_preds(model_out[0], int(np.sqrt(out.shape[1])) , 256, 56)
    boxloss, closs, objloss = torch.tensor([0]).float(), torch.tensor(
        [0]).float(), torch.tensor([0]).float()
    for j in range(len(proc_pred)):
        for i, gt in enumerate(annotations_gt[j]):
            # get ious+
            ious = bbox_iou(gt.float(), xywh2xyxy(procpred[j, :, :4]).float())
            # get reelvant predictions
            pertinent = torch.where(ious > iou_th)[0]

            if len(pertinent):
                best_id = torch.max(ious[pertinent], 0)[1]
                best_bb = procpred[j, best_id, :]
                closs += pred_criterion(best_bb[5:].unsqueeze(0),
                                        torch.tensor(targets[i]))
                boxloss += (1 - ious[pertinent]).mean()

            trgt_objectness = (
                1 - giou_ratio) + giou_ratio * ious.detach().clamp(0)
            objloss += obj_criterion(procpred[j, ..., 4], trgt_objectness)

    loss = 2 * boxloss + closs + 2 * objloss
    loss_print = dict(box=boxloss.detach(),
                      pred=closs.detach(),
                      obj=objloss.detach())
    return loss, loss_print
Beispiel #4
0
def validate(model):
    
    anchor = generate_anchor(8, [8, ], [0.33, 0.5, 1, 2, 3], 17)
    
    prec1 = 0
    model = model.eval()
    transform=transforms.Compose([
                       transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225]),
                   ])
    lines = []
    for k in xrange(20):
        all_sample = [ args.vot2018[i] for i in sorted(random.sample(xrange(len(args.vot2018)), 30)) ]

        nSamples = len(all_sample)
        for i in xrange(nSamples):

            sequence = all_sample[i]
            ran_id = random.randint(0,len(sequence)-1)

            while len(sequence[ran_id])<2:

                        sequence = all_sample[random.randint(0,nSamples-1)]

                        ran_id = random.randint(0,len(sequence)-1)

            track_obj = sequence[ran_id]

            ran_f1 = random.randint(0,len(track_obj)-1)

            ran_f2 = random.randint(0,len(track_obj)-1)
            lines.append([track_obj[ran_f1],track_obj[ran_f2]])
        random.shuffle(lines)
        
    for line in lines:
        
        z,x,gt_box,regression_target,conf_target= load_data(line,0)
        
        inpz = transform(z)
        inpx = transform(x)
        score, delta = model(inpz.unsqueeze(0).cuda(),inpx.unsqueeze(0).cuda())
        
        delta = delta.permute(1, 2, 3, 0).contiguous().view(4, -1).data.cpu().numpy()
        score = F.softmax(score.permute(1, 2, 3, 0).contiguous().view(2, -1), dim=0).data[1, :].cpu().numpy()
        delta[0, :] = delta[0, :] * anchor[:, 2] + anchor[:, 0]
        delta[1, :] = delta[1, :] * anchor[:, 3] + anchor[:, 1]
        delta[2, :] = np.exp(delta[2, :]) * anchor[:, 2]
        delta[3, :] = np.exp(delta[3, :]) * anchor[:, 3]
        best_pscore_id = np.argmax(score)
        target = delta[:, best_pscore_id]
        prec1 += bbox_iou(target, gt_box, False)
    prec1 = prec1/600
        
        
        

        
    
        
    return prec1
Beispiel #5
0
def compute_batch_info(outputs, labels, iou_thres):
    """
    compute true positive, predicted scores and predicted labels per sample batch
    here, the outputs are the concated outputs.
    """
    batch_metrics = []
    for idx in range(len(outputs)):
        # idx is img_idx
        """
        Before outputs flows into compute_batch_info fcn,
        the outputs should be processed by nms, and outputs will be a list.
        len(outputs) == batch_num
        So it may exist None.
        every item in outputs: [detection_num,7]
        (x1,y1,x2,y2,conf_score,cls_score,cls_pred)
        """
        if outputs[idx] is None:
            continue

        output = outputs[idx]
        pred_boxes = output[:, :4]
        pred_conf = output[:, 4]
        pred_cls = output[:, -1]

        # true positive
        tp = np.zeros(pred_boxes.shape[0])

        # choose the detections of the (idx) image
        annotations = labels[labels[:, 0] == idx][:, 1:]
        img_labels = annotations[:, 0] if len(annotations) else []
        if len(annotations):
            detected_boxes = []
            img_boxes = annotations[:, 1:]

            for pred_idx, (pred_box,
                           pred_label) in enumerate(zip(pred_boxes, pred_cls)):

                # if targets are all found, then break
                if len(detected_boxes) == len(annotations):
                    break

                # ignore if label is not one of the img labels
                if pred_label not in img_labels:
                    continue

                iou, box_idx = bbox_iou(pred_box.unsqueeze(0),
                                        img_boxes).max(0)
                if iou > iou_thres and box_idx not in detected_boxes:
                    tp[pred_idx] = 1
                    detected_boxes += [box_idx]

        if labels.is_cuda:
            pred_conf = pred_conf.detach().cpu()
            pred_cls = pred_cls.detach().cpu()
        batch_metrics.append([tp, pred_conf, pred_cls])
    return batch_metrics
Beispiel #6
0
    def _calc_ious(self, anchor, bbox, inside_index):
        # ious between the anchors and the gt boxes
        ious = bbox_iou(anchor, bbox)
        argmax_ious = ious.argmax(axis=1)
        max_ious = ious[np.arange(len(inside_index)), argmax_ious]
        gt_argmax_ious = ious.argmax(axis=0)
        gt_max_ious = ious[gt_argmax_ious, np.arange(ious.shape[1])]
        gt_argmax_ious = np.where(ious == gt_max_ious)[0]

        return argmax_ious, max_ious, gt_argmax_ious
def compute_metrics(gt,
                    img_shape,
                    noise_size=5,
                    noise_position=5,
                    create_bbox_proba=0.5,
                    destroy_bbox_proba=0.5,
                    k=10):
    """
    1. Add noise to ground truth Bounding Boxes.
    2.Compute Fscore, IoU, Map of two lists of Bounding Boxes.

    :param gt: list of GT bounding boxes
    :param img_shape: Original image shape
    :param noise_size: Change bbox size param
    :param noise_position: Increase bbox size param
    :param destroy_bbox_proba: Proba of destroying Bboxes
    :param create_bbox_proba: Proba of creating Bboxes
    :param k: Map at k
    :return: Noisy Bboxes, Fscore, IoU, MaP
    """

    # Add noise to GT depending on noise parameter
    bboxes = u.add_noise_to_bboxes(gt,
                                   img_shape,
                                   noise_size=True,
                                   noise_size_factor=noise_size,
                                   noise_position=True,
                                   noise_position_factor=noise_position)

    # Randomly create and destroy bounding boxes depending
    # on probability parameter
    bboxes = u.create_bboxes(bboxes, img_shape, prob=create_bbox_proba)
    bboxes = u.destroy_bboxes(bboxes, prob=destroy_bbox_proba)

    bboxTP, bboxFN, bboxFP = evalf.performance_accumulation_window(bboxes, gt)
    """
    Compute F-score of GT against modified bboxes PER FRAME NUMBER
    """
    # ToDo: Add dependency on frame number

    fscore = u.fscore(bboxTP, bboxFN, bboxFP)
    """
    Compute IoU of GT against modified Bboxes PER FRAME NUMBER:
    """
    iou = list()

    for b, box in enumerate(gt):
        iou.append(u.bbox_iou(bboxes[b], gt[b]))
    """
    Compute mAP of GT against modified bboxes PER FRAME NUMBER:
    """
    map = u.mapk(bboxes, gt, k)

    return (bboxes, fscore, iou, map)
Beispiel #8
0
def test(model, test_loader, config):
    """Test the model during training"""

    def truths_length(truth):
        for k in range(50):
            if truth[k][1] == 0:
                return k

    model.eval()
    num_classes = config['num_classes']
    anchors = config['anchors']
    num_anchors = len(anchors) // 2
    conf_thresh = config['conf_thresh']
    nms_thresh = config['nms_thresh']
    iou_thresh = config['iou_thresh']
    eps = 1e-5
    total = 0.
    proposals = 0.
    correct = 0.

    for batch_idx, (data, target) in enumerate(test_loader):
        data = data.cuda()
        data = Variable(data, volatile=True)
        output = model(data).data
        all_boxes = get_region_boxes(output, conf_thresh, num_classes, anchors,
                                     num_anchors)
        for i in range(output.size(0)):
            boxes = all_boxes[i]
            boxes = nms(boxes, nms_thresh)
            truths = target[i].view(-1, 5)
            num_gts = truths_length(truths)

            total += num_gts
            for l in range(len(boxes)):
                if boxes[l][4] > conf_thresh:
                    proposals += 1
            for l in range(num_gts):
                box_gt = [truths[l][1], truths[l][2], truths[l][3],
                          truths[l][4], 1., 1., truths[l][0]]
                best_iou = 0
                best_j = -1
                for j in range(len(boxes)):
                    iou = bbox_iou(box_gt, boxes[j], x1y1x2y2=False)
                    if iou > best_iou:
                        best_j = j
                        best_iou = iou
                if best_iou > iou_thresh and boxes[best_j][6] == box_gt[6]:
                    correct += 1

    precision = 1. * correct / (proposals + eps)
    recall = 1. * correct / (total + eps)
    fscore = 2. * precision * recall / (precision + recall + eps)
    print('precision: {}, recall: {}, fscore: {}'.format(
        precision, recall, fscore))
Beispiel #9
0
    def get_target(self, target, anchors, in_w, in_h, ignore_threshold):
        bs = target.size(0)

        mask = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        noobj_mask = torch.ones(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        tx = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        ty = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        tw = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        th = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        tconf = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False)
        tcls = torch.zeros(bs, self.num_anchors, in_h, in_w, self.num_classes, requires_grad=False)
        for b in range(bs):
            for t in range(target.shape[1]):
                if target[b, t].sum() == 0:
                    continue
                # Convert to position relative to box
                gx = target[b, t, 1] * in_w
                gy = target[b, t, 2] * in_h
                gw = target[b, t, 3] * in_w
                gh = target[b, t, 4] * in_h
                # Get grid box indices
                gi = int(gx)
                gj = int(gy)
                # Get shape of gt box
                gt_box = torch.FloatTensor(np.array([0, 0, gw, gh])).unsqueeze(0)
                # Get shape of anchor box
                anchor_shapes = torch.FloatTensor(np.concatenate((np.zeros((self.num_anchors, 2)),
                                                                  np.array(anchors)), 1))
                # Calculate iou between gt and anchor shapes
                anch_ious = bbox_iou(gt_box, anchor_shapes)
                # Where the overlap is larger than threshold set mask to zero (ignore)
                noobj_mask[b, anch_ious > ignore_threshold, gj, gi] = 0
                # Find the best matching anchor box
                best_n = np.argmax(anch_ious)

                # Masks
                mask[b, best_n, gj, gi] = 1
                # Coordinates
                tx[b, best_n, gj, gi] = gx - gi
                ty[b, best_n, gj, gi] = gy - gj
                # Width and height
                tw[b, best_n, gj, gi] = math.log(gw/anchors[best_n][0] + 1e-16)
                th[b, best_n, gj, gi] = math.log(gh/anchors[best_n][1] + 1e-16)
                # object
                tconf[b, best_n, gj, gi] = 1
                # One-hot encoding of label
                tcls[b, best_n, gj, gi, int(target[b, t, 0])] = 1

        return mask, noobj_mask, tx, ty, tw, th, tconf, tcls
Beispiel #10
0
def build_targets(target, anchors, num_anchors, nH, nW):
    nB = target.size(0)
    nA = num_anchors
    anchor_step = len(anchors) / num_anchors
    mask = torch.zeros(nB, nA, nH, nW)
    tx = torch.zeros(nB, nA, nH, nW)
    ty = torch.zeros(nB, nA, nH, nW)
    tw = torch.zeros(nB, nA, nH, nW)
    th = torch.zeros(nB, nA, nH, nW)
    tconf = torch.zeros(nB, nA, nH, nW)
    tcls = torch.zeros(nB, nA, nH, nW)

    nGT = 0
    for b in range(nB):
        for t in range(50):
            if target[b][t * 5 + 1] == 0:
                break
            nGT = nGT + 1
            best_iou = 0.0
            best_n = -1
            i = int(target[b][t * 5 + 1] * nW)
            j = int(target[b][t * 5 + 2] * nH)
            w = target[b][t * 5 + 3] * nW
            h = target[b][t * 5 + 4] * nH
            gt_box = [0, 0, w, h]
            for n in range(nA):
                anchor_box = [
                    0, 0, anchors[anchor_step * n],
                    anchors[anchor_step * n + 1]
                ]
                iou = bbox_iou(anchor_box, gt_box, x1y1x2y2=False)
                if iou > best_iou:
                    best_iou = iou
                    best_n = n

            mask[b][best_n][j][i] = 1
            tx[b][best_n][j][i] = target[b][t * 5 + 1] * nW - i
            ty[b][best_n][j][i] = target[b][t * 5 + 2] * nH - j
            tw[b][best_n][j][i] = math.log(w / anchors[anchor_step * best_n])
            th[b][best_n][j][i] = math.log(h /
                                           anchors[anchor_step * best_n + 1])
            tconf[b][best_n][j][i] = best_iou
            tcls[b][best_n][j][i] = target[b][t * 5]

    return nGT, mask, tx, ty, tw, th, tconf, tcls
Beispiel #11
0
    def encode(self, bboxes, labels, input_size):
        '''Encode target bounding boxes and class labels.
        We obey the Faster RCNN box coder:
          tx = (x - anchor_x) / anchor_w
          ty = (y - anchor_y) / anchor_h
          tw = log(w / anchor_w)
          th = log(h / anchor_h)
        Args:
          bboxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4].
          labels: (tensor) object class labels, sized [#obj,].
          input_size: (int/tuple) model input size of (w,h).
        Returns:
          reg_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
          cls_targets: (tensor) encoded class labels, sized [#anchors,].
        '''
        input_size = torch.Tensor(input_size)
        anchor_bboxes = self._get_anchor_boxes(input_size)
        # (xc, yc, w, h) -> (x1, y1, x2, y2)
        a = anchor_bboxes[:, :2]
        b = anchor_bboxes[:, 2:]
        anchor_bboxes_wh = torch.cat([a - b / 2, a + b / 2], 1)  # [anchor# 4]

        ious = bbox_iou(
            anchor_bboxes_wh,
            bboxes)  # [anchor#, object#] iou for each anchor and bbox
        max_ious, max_ids = ious.max(
            1)  # max (and indice) for each row (anchor)
        bboxes = bboxes[max_ids]

        # (x1, y1, x2, y2) -> (xc, yc, w, h)
        a = bboxes[:, :2]
        b = bboxes[:, 2:]
        bboxes = torch.cat([(a + b) / 2, b - a + 1], 1)  # [anchor# 4]
        loc_xy = (bboxes[:, :2] - anchor_bboxes[:, :2]) / anchor_bboxes[:, 2:]
        loc_wh = torch.log(bboxes[:, 2:] / anchor_bboxes[:, 2:])
        reg_targets = torch.cat([loc_xy, loc_wh], 1)
        cls_targets = labels[max_ids]
        cls_targets[max_ious < self.config.max_iou] = 0
        cls_targets[(max_ious > self.config.min_iou)
                    & (max_ious < self.config.max_iou
                       )] = -1  # for now just mark ignored to -1
        _, best_anchor_ids = ious.max(0)
        cls_targets[best_anchor_ids] = labels
        return reg_targets, cls_targets
Beispiel #12
0
        def get_item(self, img):
            # get image object
            image, image_size = self.get_img(img['file_name'])

            # construct output from object's x, y, w, h
            true_box_index = 0

            # annotation
            y_anntn = np.zeros(shape=(
                self.grid_width, self.grid_height, self.max_grid_box, 4+1+self.num_categories))
            true_box = np.zeros(shape=(1, 1, 1,  self.max_image_box, 4))
            annIds = self.dataset.getAnnIds(imgIds=img['id'])
            annotations = self.dataset.loadAnns(annIds)

            for annotation in annotations:
                box = self.get_box(annotation, image_size=image_size)
                x, y, w, h = box

                grid_x = int(np.floor(x))
                grid_y = int(np.floor(y))

                # find the anchor that best predicts this box
                best_anchor = -1
                max_iou = -1

                for i in range(len(self.anchors)):
                    anchor = self.anchors[i]
                    iou = bbox_iou([0, 0, w, h], anchor)
                    if max_iou < iou:
                        best_anchor = i
                        max_iou = iou

                cat_id = self.cat_ids[annotation['category_id']]
                y_anntn[grid_x, grid_y, best_anchor, 0:4] = box
                y_anntn[grid_x, grid_y, best_anchor, 4] = 1.0
                y_anntn[grid_x, grid_y, best_anchor, 5 + cat_id] = 1.0
                # assign the true box to b_batch
                true_box[0, 0, 0, true_box_index] = box

                true_box_index += 1
                true_box_index = true_box_index % self.max_image_box

            return image, y_anntn, true_box
    def _calc_ious(
        self,
        anchor,
        bbox,
    ):
        # ious between the anchors and the gt boxes
        # 这里输入的anchor已经删除了超出图像边界的样本
        ious = utils.bbox_iou(anchor, bbox)  # [nanchor,nbbox],以下的最接近表示IOU最大
        argmax_ious = ious.argmax(axis=1)  # 每个anchor,与其最接近的bbox的索引,[nanchor,]
        max_ious = ious[np.arange(ious.shape[0]),
                        argmax_ious]  # 每个anchor,与其最接近的bbox的IOU值,[nanchor,]
        gt_argmax_ious = ious.argmax(axis=0)  # 每个bbox,与其最接近的anchor的索引,[nbbox,]
        gt_max_ious = ious[
            gt_argmax_ious,
            np.arange(ious.shape[1])]  # 每个bbox,与其最接近的anchor的IOU值[nbbox,]
        # 注意,这里得到的 与每个bbox最近的anchor索引和IOU值 并不是全部的,因为np.argmax只会选取第一个碰到的最大值,所以还需要下面这步来得到所有的最大值的index
        gt_argmax_ious = np.where(
            ious == gt_max_ious)[0]  # 这里并不关心anchor是跟哪个bbox最接近,全都要

        return argmax_ious, max_ious, gt_argmax_ious
Beispiel #14
0
def best_anchor_box(box):
    # find the anchor that best predicts this box
    best_anchor = -1
    max_iou = -1

    shifted_box = BoundingBox(0, 0, box[2], box[3])
    anchors = [
        BoundingBox(0, 0, config.ANCHORS[2 * i], config.ANCHORS[2 * i + 1])
        for i in range(len(config.ANCHORS) // 2)
    ]

    for i in range(len(anchors)):
        anchor = anchors[i]
        iou = bbox_iou(shifted_box, anchor)

        if max_iou < iou:
            best_anchor = i
            max_iou = iou

    return best_anchor
Beispiel #15
0
def get_ground_truth(coco, imgId):
    gt = np.zeros((grid_h, grid_w, num_box, 4 + 1 + num_classes),
                  dtype=np.float32)
    annIds = coco.getAnnIds(imgIds=[imgId])
    annos = coco.loadAnns(ids=annIds)
    for anno in annos:
        category_id = anno['category_id']
        bx, by, bw, bh = anno['bbox']
        bx = 1.0 * bx * image_w
        by = 1.0 * by * image_h
        bw = 1.0 * bw * image_w
        bh = 1.0 * bh * image_h
        center_x = bx + bw / 2.
        center_x = center_x / grid_size
        center_y = by + bh / 2.
        center_y = center_y / grid_size
        cell_x = int(np.clip(np.floor(center_x), 0.0, (grid_w - 1)))
        cell_y = int(np.clip(np.floor(center_y), 0.0, (grid_h - 1)))
        center_w = bw / grid_size
        center_h = bh / grid_size
        box = [center_x, center_y, center_w, center_h]

        # find the anchor that best predicts this box
        best_anchor = -1
        max_iou = -1

        shifted_box = BoundBox(0, 0, center_w, center_h)

        for i in range(len(anchor_boxes)):
            anchor = anchor_boxes[i]
            iou = bbox_iou(shifted_box, anchor)

            if max_iou < iou:
                best_anchor = i
                max_iou = iou

        # assign ground truth x, y, w, h, confidence and class probs
        gt[cell_y, cell_x, best_anchor, 0] = 1.0
        gt[cell_y, cell_x, best_anchor, 1:5] = box
        gt[cell_y, cell_x, best_anchor, 5 + catId2idx[category_id]] = 1.0
    return gt
    def __init__(self, num_parts=24):
        self.default = -3
        self.num_parts = num_parts

        self.fs = sorted(os.listdir("data/breatheless"))
        self.data = {}

        # Sort by filename
        pkl_data = pkl.load(open("data/breatheless.pkl", "rb"))
        pkl_data = sorted(pkl_data, key=lambda d: d["file_name"])

        # Choose the object with largest bounding box
        best = {"i": 0, "area": 0}
        for i, bbox in enumerate(pkl_data[0]["pred_boxes_XYXY"]):
            x0, y0, x1, y1 = [e.item() for e in bbox]
            area = bbox_area(x0, y0, x1, y1)
            if area > best["area"]:
                best = {"i": i, "area": area}

        # Tracking using hungarian method
        prev_bbox = pkl_data[0]["pred_boxes_XYXY"][best["i"]]
        for d in pkl_data:

            best = {"i": 0, "iou": 0}
            for i, bbox in enumerate(d["pred_boxes_XYXY"]):
                iou = bbox_iou(prev_bbox, bbox)
                if iou > best["iou"]:
                    best = {"i": i, "iou": iou}

            fname = d["file_name"].split("/")[-1]
            self.data[fname] = {
                "pred": d["pred_densepose"][best["i"]],
                "bbox": d["pred_boxes_XYXY"][best["i"]]
            }

            prev_bbox = d["pred_boxes_XYXY"][best["i"]]
Beispiel #17
0
    def __call__(self, images, annotations, shapes, aug=True):
        # get image input size, change every 10 batches
        if aug:
            self.idx += 1
            net_h, net_w = self._get_net_size()
        else:
            net_h, net_w = self.config['model']['input_size'], self.config[
                'model']['input_size']

        base_grid_h, base_grid_w = net_h // self.down_sample, net_w // self.down_sample

        x_batch = np.zeros((self.batch_size, net_h, net_w, 3),
                           dtype=np.float32)
        t_batch = np.zeros(
            (self.batch_size, 1, 1, 1, self.max_box_per_image, 4),
            dtype=np.float32)

        # initialize the inputs and the outputs
        yolo_1 = np.zeros((self.batch_size, 1 * base_grid_h, 1 * base_grid_w,
                           len(self.anchors) // 3, 4 + 1 + len(self.labels)),
                          dtype=np.float32)
        yolo_2 = np.zeros((self.batch_size, 2 * base_grid_h, 2 * base_grid_w,
                           len(self.anchors) // 3, 4 + 1 + len(self.labels)),
                          dtype=np.float32)
        yolo_3 = np.zeros((self.batch_size, 4 * base_grid_h, 4 * base_grid_w,
                           len(self.anchors) // 3, 4 + 1 + len(self.labels)),
                          dtype=np.float32)
        yolos = [yolo_3, yolo_2, yolo_1]

        instance_count = 0
        true_box_index = 0

        # do the logic to fill in the inputs and the output
        for img, ann, shape in zip(images, annotations, shapes):
            ann = json.loads(ann)
            img = cv2.resize(img, (shape[1], shape[0]))
            # augment input image and fix object's position and size
            if aug:
                img, all_objs = self._aug_image(img, ann, net_h, net_w)
            else:
                img, all_objs = self._raw_image(img, ann, net_h, net_w)

            for obj in all_objs:
                # find the best anchor box for this object
                max_anchor = None
                max_index = -1
                max_iou = -1
                # not only max iou anchor but also larger than threshold anchors are positive.
                positive_anchors = []
                positive_threshold = 0.3

                shifted_box = BoundBox(0, 0, obj['xmax'] - obj['xmin'],
                                       obj['ymax'] - obj['ymin'])

                for i in range(len(self.anchors)):
                    anchor = self.anchors[i]
                    iou = bbox_iou(shifted_box, anchor)

                    if max_iou < iou:
                        max_anchor = anchor
                        max_index = i
                        max_iou = iou
                    if iou > positive_threshold:
                        positive_anchors.append([i, anchor])
                if not positive_anchors:
                    positive_anchors.append([max_index, max_anchor])

                for max_index, max_anchor in positive_anchors:
                    # determine the yolo to be responsible for this bounding box
                    yolo = yolos[max_index // 3]
                    grid_h, grid_w = yolo.shape[1:3]

                    # determine the position of the bounding box on the grid
                    center_x = .5 * (obj['xmin'] + obj['xmax'])
                    center_x = center_x / float(
                        net_w) * grid_w  # sigma(t_x) + c_x
                    center_y = .5 * (obj['ymin'] + obj['ymax'])
                    center_y = center_y / float(
                        net_h) * grid_h  # sigma(t_y) + c_y

                    # determine the sizes of the bounding box
                    w = np.log((obj['xmax'] - obj['xmin']) /
                               float(max_anchor.xmax))  # t_w
                    h = np.log((obj['ymax'] - obj['ymin']) /
                               float(max_anchor.ymax))  # t_h

                    box = [center_x, center_y, w, h]

                    # determine the index of the label
                    obj_indx = self.labels.index(obj['name'])

                    # determine the location of the cell responsible for this object
                    grid_x = int(np.floor(center_x))
                    grid_y = int(np.floor(center_y))

                    # assign ground truth x, y, w, h, confidence and class probs to y_batch
                    yolo[instance_count, grid_y, grid_x, max_index % 3] = 0
                    yolo[instance_count, grid_y, grid_x, max_index % 3,
                         0:4] = box
                    yolo[instance_count, grid_y, grid_x, max_index % 3, 4] = 1.
                    yolo[instance_count, grid_y, grid_x, max_index % 3,
                         5 + obj_indx] = 1

                    # assign the true box to t_batch
                    true_box = [
                        center_x, center_y, obj['xmax'] - obj['xmin'],
                        obj['ymax'] - obj['ymin']
                    ]
                    t_batch[instance_count, 0, 0, 0, true_box_index] = true_box

                    true_box_index += 1
                    true_box_index = true_box_index % self.max_box_per_image

            # assign input image to x_batch
            if aug and self.norm is not None:
                x_batch[instance_count] = self.norm(img)
            elif not aug:
                x_batch[instance_count] = img
            # increase instance counter in the current batch
            instance_count += 1

        output = [x_batch, t_batch, yolo_1, yolo_2, yolo_3]
        if not aug:
            output += [images, annotations, shapes]
        return output
Beispiel #18
0
    def build_targets(self, pred_boxes, target, anchors, nA, nH, nW):
        nB = target.size(0)
        anchor_step = anchors.size(1)  # anchors[nA][anchor_step]
        noobj_mask = torch.ones(nB, nA, nH, nW)
        obj_mask = torch.zeros(nB, nA, nH, nW)
        coord_mask = torch.zeros(nB, nA, nH, nW)
        tcoord = torch.zeros(4, nB, nA, nH, nW)
        tconf = torch.zeros(nB, nA, nH, nW)
        tcls = torch.zeros(nB, nA, nH, nW, self.num_classes)

        nAnchors = nA * nH * nW
        nPixels = nH * nW
        nGT = 0
        nRecall = 0
        nRecall75 = 0

        # it works faster on CPU than on GPU.
        anchors = anchors.to("cpu")

        for b in range(nB):
            cur_pred_boxes = pred_boxes[b * nAnchors:(b + 1) * nAnchors].t()
            cur_ious = torch.zeros(nAnchors)
            tbox = target[b].view(-1, 5).to("cpu")

            for t in range(50):
                if tbox[t][1] == 0:
                    break
                gx, gy = tbox[t][1] * nW, tbox[t][2] * nH
                gw, gh = tbox[t][3] * self.net_width, tbox[t][
                    4] * self.net_height
                cur_gt_boxes = torch.FloatTensor([gx, gy, gw,
                                                  gh]).repeat(nAnchors, 1).t()
                cur_ious = torch.max(
                    cur_ious,
                    multi_bbox_ious(cur_pred_boxes,
                                    cur_gt_boxes,
                                    x1y1x2y2=False))
            ignore_ix = (cur_ious > self.ignore_thresh).view(nA, nH, nW)
            noobj_mask[b][ignore_ix] = 0

            for t in range(50):
                if tbox[t][1] == 0:
                    break
                nGT += 1
                gx, gy = tbox[t][1] * nW, tbox[t][2] * nH
                gw, gh = tbox[t][3] * self.net_width, tbox[t][
                    4] * self.net_height
                gw, gh = gw.float(), gh.float()
                gi, gj = int(gx), int(gy)

                tmp_gt_boxes = torch.FloatTensor([0, 0, gw, gh]).repeat(nA,
                                                                        1).t()
                anchor_boxes = torch.cat(
                    (torch.zeros(nA, anchor_step), anchors), 1).t()
                _, best_n = torch.max(
                    multi_bbox_ious(anchor_boxes, tmp_gt_boxes,
                                    x1y1x2y2=False), 0)

                gt_box = torch.FloatTensor([gx, gy, gw, gh])
                pred_box = pred_boxes[b * nAnchors + best_n * nPixels +
                                      gj * nW + gi]
                iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False)

                obj_mask[b][best_n][gj][gi] = 1
                noobj_mask[b][best_n][gj][gi] = 0
                coord_mask[b][best_n][gj][gi] = 2. - tbox[t][3] * tbox[t][4]
                tcoord[0][b][best_n][gj][gi] = gx - gi
                tcoord[1][b][best_n][gj][gi] = gy - gj
                tcoord[2][b][best_n][gj][gi] = math.log(gw /
                                                        anchors[best_n][0])
                tcoord[3][b][best_n][gj][gi] = math.log(gh /
                                                        anchors[best_n][1])
                tcls[b][best_n][gj][gi][int(tbox[t][0])] = 1
                tconf[b][best_n][gj][gi] = iou if self.rescore else 1.

                if iou > 0.5:
                    nRecall += 1
                    if iou > 0.75:
                        nRecall75 += 1

        return nGT, nRecall, nRecall75, obj_mask, noobj_mask, coord_mask, tcoord, tconf, tcls
Beispiel #19
0
 def __getitem__(self, idx):
     l_bound = idx * self.config['BATCH_SIZE']
     r_bound = (idx + 1) * self.config['BATCH_SIZE']
     if r_bound > len(self.images):
         r_bound = len(self.images)
         l_bound = r_bound - self.config['BATCH_SIZE']
     instance_count = 0
     x_batch = np.zeros((r_bound - l_bound, self.config['IMAGE_H'],\
          self.config['IMAGE_W'], 3))
     b_batch = np.zeros((r_bound - l_bound, 1, 1, 1,\
          self.config['TRUE_BOX_BUFFER'], 4))
     y_batch = np.zeros((r_bound - l_bound, self.config['GRID_H'],\
          self.config['GRID_W'], self.config['BOX'],\
          4+1+len(self.config['LABELS'])))
     for train_instance in self.images[l_bound:r_bound]:
         img, all_objs = self.aug_image(train_instance, jitter=self.jitter)
         true_box_index = 0
         for obj in all_objs:
             if obj['xmax'] > obj['xmin'] and obj['ymax'] > obj['ymin'] and\
                   obj['name'] in self.config['LABELS']:
                 center_x = .5 * (obj['xmin'] + obj['xmax'])
                 center_x = center_x / (float(self.config['IMAGE_W']) /\
                      self.config['GRID_W'])
                 center_y = .5 * (obj['ymin'] + obj['ymax'])
                 center_y = center_y / (float(self.config['IMAGE_H']) /\
                      self.config['GRID_H'])
                 grid_x = int(np.floor(center_x))
                 grid_y = int(np.floor(center_y))
                 if grid_x < self.config['GRID_W'] and grid_y <\
                      self.config['GRID_H']:
                     obj_indx = self.config['LABELS'].index(obj['name'])
                     center_w = (obj['xmax'] - obj['xmin']) /\
                         (float(self.config['IMAGE_W']) /\
                         self.config['GRID_W'])
                     center_h = (obj['ymax'] - obj['ymin']) /\
                         (float(self.config['IMAGE_H']) /\
                         self.config['GRID_H'])
                     box = [center_x, center_y, center_w, center_h]
                     best_anchor = -1
                     max_iou = -1
                     shifted_box = BoundBox(0, 0, center_w, center_h)
                     for i in range(len(self.anchors)):
                         anchor = self.anchors[i]
                         iou = bbox_iou(shifted_box, anchor)
                         if max_iou < iou:
                             best_anchor = i
                             max_iou = iou
                     y_batch[instance_count, grid_y, grid_x,\
                            best_anchor, 0:4] = box
                     y_batch[instance_count, grid_y, grid_x, best_anchor,\
                            4] = 1.
                     y_batch[instance_count, grid_y, grid_x, best_anchor,\
                            5+obj_indx] = 1
                     b_batch[instance_count, 0, 0, 0, true_box_index] = box
                     true_box_index += 1
                     true_box_index = true_box_index\
                         % self.config['TRUE_BOX_BUFFER']
         if self.norm != None:
             x_batch[instance_count] = self.norm(img)
         else:
             for obj in all_objs:
                 if obj['xmax'] > obj['xmin'] and obj['ymax'] > obj['ymin']:
                     cv2.rectangle(img[:,:,::-1], (obj['xmin'],obj['ymin']),\
                        (obj['xmax'],obj['ymax']), (255,0,0), 3)
                     cv2.putText(img[:, :, ::-1], obj['name'],
                                 (obj['xmin'] + 2, obj['ymin'] + 12), 0,
                                 1.2e-3 * img.shape[0], (0, 255, 0), 2)
             x_batch[instance_count] = img
         instance_count += 1
     return [x_batch, b_batch], y_batch
    def get_generator(self):

        self.randomized_imgs = randomize_imgs(self.images)
        num_img = len(self.randomized_imgs)

        total_count = 0
        batch_count = 0

        x_batch = np.zeros((self.config['BATCH_SIZE'], self.config['IMAGE_H'],
                            self.config['IMAGE_W'], 3))  # input images
        b_batch = np.zeros(
            (self.config['BATCH_SIZE'], 1, 1, 1,
             self.config['TRUE_BOX_BUFFER'], 4)
        )  # list of self.config['TRUE_self.config['BOX']_BUFFER'] GT boxes
        y_batch = np.zeros((self.config['BATCH_SIZE'], self.config['GRID_H'],
                            self.config['GRID_W'], self.config['BOX'],
                            4 + 1 + 1))  # desired network output

        while True:
            if total_count < num_img:
                train_instance = self.randomized_imgs[total_count]

                # augment input image and fix object's position and size
                img, all_objs = self.aug_image(train_instance,
                                               jitter=self.jitter)

                # construct output from object's x, y, w, h
                true_box_index = 0

                for obj in all_objs:
                    if obj['xmax'] > obj['xmin'] and obj['ymax'] > obj[
                            'ymin'] and obj['name'] in self.config['LABELS']:
                        center_x = .5 * (obj['xmin'] + obj['xmax'])
                        center_x = center_x / (float(self.config['IMAGE_W']) /
                                               self.config['GRID_W'])
                        center_y = .5 * (obj['ymin'] + obj['ymax'])
                        center_y = center_y / (float(self.config['IMAGE_H']) /
                                               self.config['GRID_H'])

                        grid_x = int(np.floor(center_x))
                        grid_y = int(np.floor(center_y))

                        if grid_x < self.config[
                                'GRID_W'] and grid_y < self.config['GRID_H']:
                            obj_indx = self.config['LABELS'].index(obj['name'])

                            center_w = (obj['xmax'] - obj['xmin']) / (
                                float(self.config['IMAGE_W']) /
                                self.config['GRID_W'])  # unit: grid cell
                            center_h = (obj['ymax'] - obj['ymin']) / (
                                float(self.config['IMAGE_W']) /
                                self.config['GRID_W'])  # unit: grid cell

                            box = [center_x, center_y, center_w, center_h]

                            # find the anchor that best predicts this box
                            best_anchor = -1
                            max_iou = -1

                            shifted_box = BoundBox(0, 0, center_w, center_h)

                            for i in range(len(self.anchors)):
                                anchor = self.anchors[i]
                                iou = bbox_iou(shifted_box, anchor)

                                if max_iou < iou:
                                    best_anchor = i
                                    max_iou = iou

                            # assign ground truth x, y, w, h, confidence and class probs to y_batch
                            y_batch[batch_count, grid_y, grid_x, best_anchor,
                                    0:4] = box
                            y_batch[batch_count, grid_y, grid_x, best_anchor,
                                    4] = 1.
                            y_batch[batch_count, grid_y, grid_x, best_anchor,
                                    5] = obj_indx

                            # assign the true box to b_batch
                            b_batch[batch_count, 0, 0, 0, true_box_index] = box

                            true_box_index += 1
                            true_box_index = true_box_index % self.config[
                                'TRUE_BOX_BUFFER']

                # assign input image to x_batch
                if self.norm:
                    x_batch[batch_count] = normalize(img)
                else:
                    x_batch[batch_count] = img

                    # plot image and bounding boxes for sanity check
                    for obj in all_objs:
                        if obj['xmax'] > obj['xmin'] and obj['ymax'] > obj[
                                'ymin']:
                            cv2.rectangle(img[:, :, ::-1],
                                          (obj['xmin'], obj['ymin']),
                                          (obj['xmax'], obj['ymax']),
                                          (255, 0, 0), 3)
                            cv2.putText(img[:, :, ::-1], obj['name'],
                                        (obj['xmin'] + 2, obj['ymin'] + 12), 0,
                                        1.2e-3 * img.shape[0], (0, 255, 0), 2)
                    plt.figure(figsize=(10, 10))
                    plt.imshow(img)
                    plt.show()

                # increase instance counter in current batch
                batch_count += 1

            total_count += 1
            if total_count >= num_img:
                total_count = 0
                if self.shuffle:
                    self.randomized_imgs = randomize_imgs(self.images)

            if batch_count >= self.config['BATCH_SIZE']:
                yield [x_batch, b_batch], y_batch

                x_batch = np.zeros(
                    (self.config['BATCH_SIZE'], self.config['IMAGE_H'],
                     self.config['IMAGE_W'], 3))
                y_batch = np.zeros(
                    (self.config['BATCH_SIZE'], self.config['GRID_H'],
                     self.config['GRID_W'], self.config['BOX'],
                     5 + self.config['CLASS']))

                batch_count = 0

                if self.shuffle:
                    self.randomized_imgs = randomize_imgs(self.images)
    def build_targets(self, pred_boxes, target, nH, nW):
        nB = target.size(0)
        nA = self.num_anchors
        noobj_mask = torch.ones (nB, nA, nH, nW)
        obj_mask   = torch.zeros(nB, nA, nH, nW)
        coord_mask = torch.zeros(nB, nA, nH, nW)
        tcoord     = torch.zeros( 4, nB, nA, nH, nW)
        tconf      = torch.zeros(nB, nA, nH, nW)
        tcls       = torch.zeros(nB, nA, nH, nW)

        nAnchors = nA*nH*nW
        nPixels  = nH*nW
        nGT = 0 # number of ground truth
        nRecall = 0
        # it works faster on CPU than on GPU.
        anchors = self.anchors.to("cpu")

        if self.seen < 12800:
            tcoord[0].fill_(0.5)
            tcoord[1].fill_(0.5)
            coord_mask.fill_(0.01)
            # initial w, h == 0 means log(1)==0, s.t, anchor is equal to ground truth.

        for b in range(nB):
            cur_pred_boxes = pred_boxes[b*nAnchors:(b+1)*nAnchors].t()
            cur_ious = torch.zeros(nAnchors)
            tbox = target[b].view(-1,5).to("cpu")
            for t in range(50):
                if tbox[t][1] == 0:
                    break
                gx, gw = [ i * nW for i in (tbox[t][1], tbox[t][3]) ]
                gy, gh = [ i * nH for i in (tbox[t][2], tbox[t][4]) ]
                cur_gt_boxes = torch.FloatTensor([gx, gy, gw, gh]).repeat(nAnchors,1).t()
                cur_ious = torch.max(cur_ious, multi_bbox_ious(cur_pred_boxes, cur_gt_boxes, x1y1x2y2=False))
            ignore_ix = (cur_ious>self.thresh).view(nA,nH,nW)
            noobj_mask[b][ignore_ix] = 0

            for t in range(50):
                if tbox[t][1] == 0:
                    break
                nGT += 1
                gx, gw = [ i * nW for i in (tbox[t][1], tbox[t][3]) ]
                gy, gh = [ i * nH for i in (tbox[t][2], tbox[t][4]) ]
                gw, gh = gw.float(), gh.float()
                gi, gj = int(gx), int(gy)

                tmp_gt_boxes = torch.FloatTensor([0, 0, gw, gh]).repeat(nA,1).t()
                anchor_boxes = torch.cat((torch.zeros(nA, 2), anchors),1).t()
                tmp_ious = multi_bbox_ious(anchor_boxes, tmp_gt_boxes, x1y1x2y2=False)
                best_iou, best_n = torch.max(tmp_ious, 0)

                if self.anchor_step == 4: # this part is not tested.
                    tmp_ious_mask = (tmp_ious==best_iou)
                    if tmp_ious_mask.sum() > 0:
                        gt_pos = torch.FloatTensor([gi, gj, gx, gy]).repeat(nA,1).t()
                        an_pos = anchor_boxes[4:6] # anchor_boxes are consisted of [0 0 aw ah ax ay]
                        dist = pow(((gt_pos[0]+an_pos[0])-gt_pos[2]),2) + pow(((gt_pos[1]+an_pos[1])-gt_pos[3]),2)
                        dist[1-tmp_ious_mask]=10000 # set the large number for the small ious
                        _, best_n = torch.min(dist,0)

                gt_box = torch.FloatTensor([gx, gy, gw, gh])
                pred_box = pred_boxes[b*nAnchors+best_n*nPixels+gj*nW+gi]
                iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False)

                obj_mask  [b][best_n][gj][gi] = 1
                noobj_mask[b][best_n][gj][gi] = 0
                coord_mask[b][best_n][gj][gi] = 2. - tbox[t][3]*tbox[t][4]
                tcoord [0][b][best_n][gj][gi] = gx - gi
                tcoord [1][b][best_n][gj][gi] = gy - gj
                tcoord [2][b][best_n][gj][gi] = math.log(gw/anchors[best_n][0])
                tcoord [3][b][best_n][gj][gi] = math.log(gh/anchors[best_n][1])
                tcls      [b][best_n][gj][gi] = tbox[t][0]
                tconf     [b][best_n][gj][gi] = iou if self.rescore else 1.
                if iou > 0.5:
                    nRecall += 1

        return nGT, nRecall, obj_mask, noobj_mask, coord_mask, tcoord, tconf, tcls
Beispiel #22
0
    def __getitem__(self, idx):
        le = LabelEncoder()
        le.fit_transform(self.labels)

        x_batch = np.zeros(
            (BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, self.n_channels))
        b_batch = np.zeros((BATCH_SIZE, 1, 1, 1, self.max_obj, 4))

        y_batch = np.zeros(
            (BATCH_SIZE, GRID_H, GRID_W, self.nb_anchors,
             4 + 1 + self.num_classes()))  # desired network output

        # current_batch = self.dataset[l_bound:r_bound]
        current_batch = self.dictionaries[idx * BATCH_SIZE:(idx + 1) *
                                          BATCH_SIZE]

        instance_num = 0

        for instance in current_batch:
            img, object_annotations = self.aug_image(instance,
                                                     jitter=self.jitter)

            obj_num = 0

            # center of the bounding box is divided with the image width/height and grid width/height
            # to get the coordinates relative to a single element of a grid
            for obj in object_annotations:
                if obj['xmax'] > obj['xmin'] and obj['ymax'] > obj[
                        'ymin'] and obj['name'] in self.labels:
                    center_x = .5 * (
                        obj['xmin'] + obj['xmax']
                    )  # center of the lower side of the bb (by x axis)
                    center_x = center_x / (
                        float(IMAGE_SIZE) / GRID_W
                    )  # scaled to the grid unit (a value between 0 and GRID_W-1)
                    center_y = .5 * (obj['ymin'] + obj['ymax']
                                     )  # center of the lower side (by y axis)
                    center_y = center_y / (
                        float(IMAGE_SIZE) / GRID_H
                    )  # scaled to the grid unit (a value between 0 and GRID_H-1)

                    grid_x = int(np.floor(
                        center_x))  # assigns the object to the matching
                    grid_y = int(
                        np.floor(center_y)
                    )  # grid element according to (center_x, center_y)

                    if grid_x < GRID_W and grid_y < GRID_H:
                        center_w = (obj['xmax'] -
                                    obj['xmin']) / (float(IMAGE_SIZE) / GRID_W)
                        center_h = (obj['ymax'] -
                                    obj['ymin']) / (float(IMAGE_SIZE) / GRID_H)

                        box = [center_x, center_y, center_w, center_h]

                        # find the anchor that best predicts this box
                        best_anchor = -1
                        max_iou = -1

                        shifted_box = [0, 0, center_w, center_h]

                        for i in range(len(self.anchors)):
                            anchor = self.anchors[i]
                            iou = bbox_iou(shifted_box, anchor)

                            if max_iou < iou:
                                best_anchor = i
                                max_iou = iou

                        img = self.normalize(img)

                        x_batch[instance_num] = img

                        b_batch[instance_num, 0, 0, 0, obj_num] = box
                        y_batch[instance_num, grid_y, grid_x, best_anchor,
                                0:4] = box
                        y_batch[instance_num, grid_y, grid_x, best_anchor,
                                4] = 1.
                        y_batch[instance_num, grid_y, grid_x, best_anchor,
                                5] = 1

                        obj_num += 1
                        obj_num %= self.max_obj

            instance_num += 1

        return [x_batch, b_batch], y_batch
Beispiel #23
0
    def val(self):
        APs = []
        for i, (images, targets) in enumerate(self.val_data_loader):
            if i == 10:
                break
            images = self.to_var(images)
            targets = self.to_var(targets)

            with torch.no_grad():
                output = self.net(images)
                output = utils.non_max_suppression(output,
                                                   80,
                                                   conf_thres=self.conf_thres,
                                                   nms_thres=self.nms_thres)
            # Compute average precision for each sample
            for sample_i in range(targets.size(0)):
                correct = []
                # Get labels for sample where width is not zero (dummies)
                annotations = targets[sample_i, targets[sample_i, :, 3] != 0]
                # Extract detections
                detections = output[sample_i]
                if detections is None:
                    # If there are no detections but there are annotations mask as zero AP
                    if annotations.size(0) != 0:
                        APs.append(0)
                    continue
                # Get detections sorted by decreasing confidence scores
                detections = detections[np.argsort(-detections[:, 4])]
                # If no annotations add number of detections as incorrect
                if annotations.size(0) == 0:
                    correct.extend([0 for _ in range(len(detections))])
                else:
                    # Extract target boxes as (x1, y1, x2, y2)
                    target_boxes = torch.FloatTensor(annotations[:, 1:].shape)
                    target_boxes[:, 0] = (annotations[:, 1] -
                                          annotations[:, 3] / 2)
                    target_boxes[:, 1] = (annotations[:, 2] -
                                          annotations[:, 4] / 2)
                    target_boxes[:, 2] = (annotations[:, 1] +
                                          annotations[:, 3] / 2)
                    target_boxes[:, 3] = (annotations[:, 2] +
                                          annotations[:, 4] / 2)
                    target_boxes *= self.image_size
                    detected = []
                    for *pred_bbox, conf, obj_conf, obj_pred in detections:
                        pred_bbox = torch.FloatTensor(pred_bbox).view(1, -1)
                        # Compute iou with target boxes
                        iou = utils.bbox_iou(pred_bbox, target_boxes)
                        # Extract index of largest overlap
                        best_i = np.argmax(iou)
                        # If overlap exceeds threshold and classification is correct mark as correct
                        if iou[best_i] > self.iou_thres and obj_pred == annotations[
                                best_i, 0] and best_i not in detected:
                            correct.append(1)
                            detected.append(best_i)
                        else:
                            correct.append(0)
                # Extract true and false positives
                true_positives = np.array(correct)
                false_positives = 1 - true_positives
                # Compute cumulative false positives and true positives
                false_positives = np.cumsum(false_positives)
                true_positives = np.cumsum(true_positives)
                # Compute recall and precision at all ranks
                recall = true_positives / annotations.size(
                    0) if annotations.size(0) else true_positives
                precision = true_positives / np.maximum(
                    true_positives + false_positives,
                    np.finfo(np.float64).eps)
                # Compute average precision
                AP = utils.compute_ap(recall, precision)
                APs.append(AP)
        return np.mean(APs)
Beispiel #24
0
    def __getitem__(self, idx):
        l_bound = idx*self.config['BATCH_SIZE']
        r_bound = (idx+1)*self.config['BATCH_SIZE']

        if r_bound > len(self.images):
            r_bound = len(self.images)
            l_bound = r_bound - self.config['BATCH_SIZE']

        instance_count = 0
        if self.config['IMAGE_C'] == 3:
            x_batch = np.zeros((r_bound - l_bound, self.config['IMAGE_H'], self.config['IMAGE_W'], 3))                         # input images
        else:
            x_batch = np.zeros((r_bound - l_bound, self.config['IMAGE_H'], self.config['IMAGE_W'], 1))

        b_batch = np.zeros((r_bound - l_bound, 1     , 1     , 1    ,  self.config['TRUE_BOX_BUFFER'], 4))   # list of self.config['TRUE_self.config['BOX']_BUFFER'] GT boxes
        # y_batch = (batch_size, 13, 13, 5, (4+1+80))
        # 13 x 13 is the grid, 5 is the number of anchor boxes, and 4 is the xmin, ymin, xmax, ymax, and confidence score + 80 classes)
        y_batch = np.zeros((r_bound - l_bound, self.config['GRID_H'],  self.config['GRID_W'], self.config['BOX'], 4+1+len(self.config['LABELS'])))                # desired network output

        for train_instance in self.images[l_bound:r_bound]:
            # augment input image and fix object's position and size
            img, all_objs = self.aug_image(train_instance, jitter=self.jitter)
            
            # construct output from object's x, y, w, h
            true_box_index = 0
            
            for obj in all_objs:
                if obj['xmax'] > obj['xmin'] and obj['ymax'] > obj['ymin'] and obj['name'] in self.config['LABELS']:

                    # centere with respective the original image
                    center_x = .5*(obj['xmin'] + obj['xmax'])
                    # center_x, center_y are with respective to the 13 x 13 image
                    center_x = center_x / (float(self.config['IMAGE_W']) / self.config['GRID_W'])
                    center_y = .5*(obj['ymin'] + obj['ymax'])
                    center_y = center_y / (float(self.config['IMAGE_H']) / self.config['GRID_H'])

                    # find out which grid the centre of the object (the true bounding box) belongs to
                    grid_x = int(np.floor(center_x))
                    grid_y = int(np.floor(center_y))

                    if grid_x < self.config['GRID_W'] and grid_y < self.config['GRID_H']:
                        obj_indx  = self.config['LABELS'].index(obj['name'])

                        # image_w / grid_w, image_h / grid_h == size of each grid cell, e.g.
                        # in a 416 x 416 image, the size of grid cell = 416/13, 416/13 = 32 x 32
                        # so the centre_w and center_h is a percentage of the grid_cell
                        center_w = (obj['xmax'] - obj['xmin']) / (float(self.config['IMAGE_W']) / self.config['GRID_W']) # unit: grid cell
                        center_h = (obj['ymax'] - obj['ymin']) / (float(self.config['IMAGE_H']) / self.config['GRID_H']) # unit: grid cell

                        # center of true bounding gox with respective the 13 x 13 image
                        # width and height of true bounding box with respective size of each grid cell
                        box = [center_x, center_y, center_w, center_h]

                        # find the anchor that best predicts this box
                        best_anchor = -1
                        max_iou     = -1
                        
                        shifted_box = BoundBox(0, 
                                               0,
                                               center_w,                                                
                                               center_h)
                        
                        for i in range(len(self.anchors)):
                            anchor = self.anchors[i]
                            iou    = bbox_iou(shifted_box, anchor)
                            
                            if max_iou < iou:
                                best_anchor = i
                                max_iou     = iou
                                
                        # assign ground truth x, y, w, h, confidence and class probs to y_batch
                        y_batch[instance_count, grid_y, grid_x, best_anchor, 0:4] = box
                        y_batch[instance_count, grid_y, grid_x, best_anchor, 4  ] = 1.
                        y_batch[instance_count, grid_y, grid_x, best_anchor, 5+obj_indx] = 1
                        
                        # assign the true box to b_batch
                        b_batch[instance_count, 0, 0, 0, true_box_index] = box
                        
                        true_box_index += 1
                        true_box_index = true_box_index % self.config['TRUE_BOX_BUFFER']
                            
            # assign input image to x_batch
            if self.norm != None: 
                x_batch[instance_count] = self.norm(img)
            else:
                # plot image and bounding boxes for sanity check
                for obj in all_objs:
                    if obj['xmax'] > obj['xmin'] and obj['ymax'] > obj['ymin']:
                        cv2.rectangle(img[:,:,::-1], (obj['xmin'],obj['ymin']), (obj['xmax'],obj['ymax']), (255,0,0), 3)
                        cv2.putText(img[:,:,::-1], obj['name'], 
                                    (obj['xmin']+2, obj['ymin']+12), 
                                    0, 1.2e-3 * img.shape[0], 
                                    (0,255,0), 2)
                        
                x_batch[instance_count] = img
            

            # increase instance counter in current batch
            instance_count += 1  

        #print(' new batch created', idx)

        return [x_batch, b_batch], y_batch
Beispiel #25
0
    def __getitem__(self, idx):
        # get image input size, change every 10 batches
        # do the logic to fill in the inputs and the output
        train_instance = self.instances[idx]
        # augment input image and fix object's position and size
        # img, all_objs = self._aug_image(train_instance, net_h, net_w)
        augmented = self._aug_image(train_instance)

        net_h, net_w = augmented['image'].shape[0:2]
        x_batch = np.expand_dims(augmented['image'], 0)

        base_grid_h, base_grid_w = net_h // self.downsample, net_w // self.downsample
        gt_batch = np.zeros((1, 1, 1, 1, self.max_box_per_image,
                             4))  # list of groundtruth boxes

        # initialize the inputs and the outputs
        yolo_1 = np.zeros(
            (1, 1 * base_grid_h, 1 * base_grid_w, len(self.anchors) // 3,
             4 + 1 + len(self.labels)))  # desired network output 1
        yolo_2 = np.zeros(
            (1, 2 * base_grid_h, 2 * base_grid_w, len(self.anchors) // 3,
             4 + 1 + len(self.labels)))  # desired network output 2
        yolo_3 = np.zeros(
            (1, 4 * base_grid_h, 4 * base_grid_w, len(self.anchors) // 3,
             4 + 1 + len(self.labels)))  # desired network output 3
        yolos = [yolo_3, yolo_2, yolo_1]

        dummy_yolo_1 = np.zeros((1, 1))
        dummy_yolo_2 = np.zeros((1, 1))
        dummy_yolo_3 = np.zeros((1, 1))

        instance_count = 0
        true_box_index = 0

        for idx2, (xmin, ymin, xmax, ymax) in enumerate(augmented['bboxes']):
            xmin = int(xmin)
            xmax = int(xmax)
            ymin = int(ymin)
            ymax = int(ymax)
            # find the best anchor box for this object
            max_anchor = None
            max_index = -1
            max_iou = -1

            shifted_box = BoundBox(0, 0, xmax - xmin, ymax - ymin)

            for i in range(len(self.anchors)):
                anchor = self.anchors[i]
                iou = bbox_iou(shifted_box, anchor)

                if max_iou < iou:
                    max_anchor = anchor
                    max_index = i
                    max_iou = iou

                    # determine the yolo to be responsible for this bounding box
            yolo = yolos[max_index // 3]
            grid_h, grid_w = yolo.shape[1:3]

            # determine the position of the bounding box on the grid
            center_x = .5 * (xmin + xmax)
            center_x = center_x / float(net_w) * grid_w  # sigma(t_x) + c_x
            center_y = .5 * (ymin + ymax)
            center_y = center_y / float(net_h) * grid_h  # sigma(t_y) + c_y

            # determine the sizes of the bounding box
            w = np.log((xmax - xmin) / float(max_anchor.xmax))  # t_w
            h = np.log((ymax - ymin) / float(max_anchor.ymax))  # t_h

            box = [center_x, center_y, w, h]

            # determine the index of the label
            obj_indx = augmented['category_id'][idx2]

            # determine the location of the cell responsible for this object
            grid_x = int(np.floor(center_x))
            grid_y = int(np.floor(center_y))

            # assign ground truth x, y, w, h, confidence and class probs to y_batch
            yolo[0, grid_y, grid_x, max_index % 3] = 0
            yolo[0, grid_y, grid_x, max_index % 3, 0:4] = box
            yolo[0, grid_y, grid_x, max_index % 3, 4] = 1.
            yolo[0, grid_y, grid_x, max_index % 3, 5 + obj_indx] = 1

            # assign the true box to t_batch
            true_box = [center_x, center_y, xmax - xmin, ymax - ymin]
            gt_batch[0, 0, 0, 0, true_box_index] = true_box

            true_box_index += 1
            true_box_index = true_box_index % self.max_box_per_image

            # assign input image to x_batch
            x_batch = self.norm(x_batch)

        return [x_batch, gt_batch, yolo_1, yolo_2,
                yolo_3], [dummy_yolo_1, dummy_yolo_2, dummy_yolo_3]
Beispiel #26
0
    def __getitem__(self, idx):
        train_instance = self.images[idx]
        # augment input image and fix object's position and size
        augmented = self.aug_image(train_instance)
        x_batch = np.expand_dims(augmented['image'], 0)
        net_h, net_w = augmented['image'].shape[0:2]

        b_batch = np.zeros(
            (1, 1, 1, 1, self.config['TRUE_BOX_BUFFER'], 4)
        )  # list of self.config['TRUE_self.config['BOX']_BUFFER'] GT boxes
        y_batch = np.zeros(
            (1, net_h // 32, net_w // 32, self.config['BOX'],
             4 + 1 + len(self.config['LABELS'])))  # desired network output

        # construct output from object's x, y, w, h
        true_box_index = 0

        for idx2, (xmin, ymin, xmax, ymax) in enumerate(augmented['bboxes']):
            if xmax > xmin and ymax > ymin and augmented['category_id'][
                    idx2] in self.config['LABELS']:
                center_x = .5 * (xmin + xmax)
                center_x = center_x / (float(self.config['IMAGE_W']) /
                                       self.config['GRID_W'])
                center_y = .5 * (ymin + ymax)
                center_y = center_y / (float(self.config['IMAGE_H']) /
                                       self.config['GRID_H'])

                grid_x = int(np.floor(center_x))
                grid_y = int(np.floor(center_y))

                if grid_x < self.config['GRID_W'] and grid_y < self.config[
                        'GRID_H']:
                    obj_indx = augmented['category_id'][idx2]

                    center_w = (xmax - xmin) / (
                        float(self.config['IMAGE_W']) / self.config['GRID_W']
                    )  # unit: grid cell
                    center_h = (ymax - ymin) / (
                        float(self.config['IMAGE_H']) / self.config['GRID_H']
                    )  # unit: grid cell

                    box = [center_x, center_y, center_w, center_h]

                    # find the anchor that best predicts this box
                    best_anchor = -1
                    max_iou = -1

                    shifted_box = BoundBox(0, 0, center_w, center_h)

                    for i in range(len(self.anchors)):
                        anchor = self.anchors[i]
                        iou = bbox_iou(shifted_box, anchor)

                        if max_iou < iou:
                            best_anchor = i
                            max_iou = iou

                    # assign ground truth x, y, w, h, confidence and class probs to y_batch
                    y_batch[0, grid_y, grid_x, best_anchor, 0:4] = box
                    y_batch[0, grid_y, grid_x, best_anchor, 4] = 1.
                    y_batch[0, grid_y, grid_x, best_anchor, 5 + obj_indx] = 1

                    # assign the true box to b_batch
                    b_batch[0, 0, 0, 0, true_box_index] = box

                    true_box_index += 1
                    true_box_index = true_box_index % self.config[
                        'TRUE_BOX_BUFFER']

        # assign input image to x_batch
        x_batch = self.norm(x_batch)

        return [x_batch, b_batch], y_batch
Beispiel #27
0
    def __getitem__(self, idx):
        # get image input size, change every 10 batches
        net_h, net_w = self._get_net_size(idx)
        base_grid_h, base_grid_w = net_h // self.downsample, net_w // self.downsample

        # determine the first and the last indices of the batch
        l_bound = idx * self.batch_size
        r_bound = (idx + 1) * self.batch_size

        if r_bound > len(self.instances):
            r_bound = len(self.instances)
            l_bound = r_bound - self.batch_size

        x_batch = np.zeros(
            (r_bound - l_bound, net_h, net_w, 3))  # input images
        gt_batch = np.zeros(
            (r_bound - l_bound, 1, 1, 1, self.max_box_per_image,
             4))  # list of groundtruth boxes

        # initialize the inputs and the outputs
        yolo_1 = np.zeros(
            (r_bound - l_bound, 1 * base_grid_h, 1 * base_grid_w,
             len(self.anchors) // 3,
             4 + 1 + len(self.labels)))  # desired network output 1
        yolo_2 = np.zeros(
            (r_bound - l_bound, 2 * base_grid_h, 2 * base_grid_w,
             len(self.anchors) // 3,
             4 + 1 + len(self.labels)))  # desired network output 2
        yolo_3 = np.zeros(
            (r_bound - l_bound, 4 * base_grid_h, 4 * base_grid_w,
             len(self.anchors) // 3,
             4 + 1 + len(self.labels)))  # desired network output 3
        yolos = [yolo_3, yolo_2, yolo_1]

        dummy_yolo_1 = np.zeros((r_bound - l_bound, 1))
        dummy_yolo_2 = np.zeros((r_bound - l_bound, 1))
        dummy_yolo_3 = np.zeros((r_bound - l_bound, 1))

        instance_count = 0
        true_box_index = 0

        # do the logic to fill in the inputs and the output
        for train_instance in self.instances[l_bound:r_bound]:
            # augment input image and fix object's position and size
            img, all_objs = self._aug_image(train_instance, net_h, net_w)

            for obj in all_objs:
                # find the best anchor box for this object
                max_anchor = None
                max_index = -1
                max_iou = -1

                shifted_box = BoundBox(0, 0, obj['xmax'] - obj['xmin'],
                                       obj['ymax'] - obj['ymin'])

                for i in range(len(self.anchors)):
                    anchor = self.anchors[i]
                    iou = bbox_iou(shifted_box, anchor)

                    if max_iou < iou:
                        max_anchor = anchor
                        max_index = i
                        max_iou = iou

                        # determine the yolo to be responsible for this bounding box
                yolo = yolos[max_index // 3]
                grid_h, grid_w = yolo.shape[1:3]

                # determine the position of the bounding box on the grid
                center_x = .5 * (obj['xmin'] + obj['xmax'])
                center_x = center_x / float(net_w) * grid_w  # sigma(t_x) + c_x
                center_y = .5 * (obj['ymin'] + obj['ymax'])
                center_y = center_y / float(net_h) * grid_h  # sigma(t_y) + c_y

                # determine the sizes of the bounding box
                w = np.log((obj['xmax'] - obj['xmin']) /
                           float(max_anchor.xmax))  # t_w
                h = np.log((obj['ymax'] - obj['ymin']) /
                           float(max_anchor.ymax))  # t_h

                box = [center_x, center_y, w, h]

                # determine the index of the label
                obj_indx = self.labels.index(obj['name'])

                # determine the location of the cell responsible for this object
                grid_x = int(np.floor(center_x))
                grid_y = int(np.floor(center_y))

                # assign ground truth x, y, w, h, confidence and class probs to y_batch
                yolo[instance_count, grid_y, grid_x, max_index % 3] = 0
                yolo[instance_count, grid_y, grid_x, max_index % 3, 0:4] = box
                yolo[instance_count, grid_y, grid_x, max_index % 3, 4] = 1.
                yolo[instance_count, grid_y, grid_x, max_index % 3,
                     5 + obj_indx] = 1

                # assign the true box to t_batch
                true_box = [
                    center_x, center_y, obj['xmax'] - obj['xmin'],
                    obj['ymax'] - obj['ymin']
                ]
                gt_batch[instance_count, 0, 0, 0, true_box_index] = true_box

                true_box_index += 1
                true_box_index = true_box_index % self.max_box_per_image

                # assign input image to x_batch
            if self.norm is not None:
                x_batch[instance_count] = self.norm(img)
            else:
                # plot image and bounding boxes for sanity check
                for obj in all_objs:
                    cv2.rectangle(img, (obj['xmin'], obj['ymin']),
                                  (obj['xmax'], obj['ymax']), (255, 0, 0), 3)
                    cv2.putText(img, obj['name'],
                                (obj['xmin'] + 2, obj['ymin'] + 12), 0,
                                1.2e-3 * img.shape[0], (0, 255, 0), 2)

                x_batch[instance_count] = img

            # increase instance counter in the current batch
            instance_count += 1

        return [x_batch, gt_batch, yolo_1, yolo_2,
                yolo_3], [dummy_yolo_1, dummy_yolo_2, dummy_yolo_3]
Beispiel #28
0
    def build_target(self, pred_boxes, pred_conf, pred_cls, target, anchors,
                     num_anchors, num_classes, grid_size, ignore_thres,
                     img_dim):
        """
        Args:
          pred_boxes: Tensor, size:(batchsize, num_anchors, grid_size, grid_size, 4)
          pred_conf: Tensor, size:(batchsize, grid_size, grid_size)
          pred_cls: Tensor, size:(batchsize, grid_size, grid_size, classes)
          target: Tensor, size:(batchsize, max_obj, 5), ground truth.
          anchors: int, anchor num.
          num_classes: int, classes num.
          grid_size: width/height of feature map which predict layer apply on.
          ignore_thres: threshold of pred_conf to ingore background.
          img_dim: int, input image's width/height.
        Return:
          
        """
        nB = target.size(0)  # batchsize
        nA = num_anchors
        nC = num_classes
        nG = grid_size
        mask = torch.zeros(
            nB,
            nA,
        )
        conf_mask = torch.ones(nB, nA, nG, nG)
        tx = torch.zeros(nB, nA, nG, nG)
        ty = torch.zeros(nB, nA, nG, nG)
        tw = torch.zeros(nB, nA, nG, nG)
        th = torch.zeros(nB, nA, nG, nG)
        tconf = torch.ByteTensor(nB, nA, nG, nG).fill_(0)
        tcls = torch.ByteTensor(nB, nA, nG, nG, nC).fill_(0)

        nGT = 0
        nCorrect = 0
        for b in range(nB):
            for t in range(target.shape[1]):
                if target[b, t].sum() == 0:
                    # pad
                    continue
                nGT += 1
                # Convert to position relative to box
                gx = target[b, t, 1] * nG
                gy = target[b, t, 2] * nG
                gw = target[b, t, 3] * nG
                gh = target[b, t, 4] * nG
                # Get grid box indices
                gi = int(gx)
                gj = int(gy)
                # Get shape of gt box
                gt_box = torch.FloatTensor(np.array([0, 0, gw,
                                                     gh])).unsqueeze(0)
                # Get shape of anchor box
                anchor_shapes = torch.FloatTensor(
                    np.concatenate((np.zeros(
                        (len(anchors), 2)), np.array(anchors)), 1))

                # Calculate iou between gt and anchor shapes
                # 1 on 3
                anch_ious = bbox_iou(gt_box, anchor_shapes)
                # Where the overlap is larger than threshold set mask to zero (ignore)
                conf_mask[b, anch_ious > ignore_thres, gj, gi] = 0
                # Find the best matching anchor box

                best_n = np.argmax(anch_ious)
                # Get ground truth box
                gt_box = torch.FloatTensor(np.array([gx, gy, gw,
                                                     gh])).unsqueeze(0)
                # Get the best prediction
                pred_box = pred_boxes[b, best_n, gj, gi].unsqueeze(0)
                # Masks
                mask[b, best_n, gj, gi] = 1
                conf_mask[b, best_n, gj, gi] = 1
                # Coordinates
                tx[b, best_n, gj, gi] = gx - gi
                ty[b, best_n, gj, gi] = gy - gj
                # Width and height
                tw[b, best_n, gj,
                   gi] = math.log(gw / anchors[best_n][0] + 1e-16)
                th[b, best_n, gj,
                   gi] = math.log(gh / anchors[best_n][1] + 1e-16)
                # One-hot encoding of label
                target_label = int(target[b, t, 0])
                tcls[b, best_n, gj, gi, target_label] = 1
                tconf[b, best_n, gj, gi] = 1

                # Calculate iou between ground truth and best matching prediction
                iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False)
                pred_label = torch.argmax(pred_cls[b, best_n, gj, gi])
                score = pred_conf[b, best_n, gj, gi]
                if iou > 0.5 and pred_label == target_label and score > 0.5:
                    nCorrect += 1

        return nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls
    def __getitem__(self, idx):
        l_bound = idx*self.config['BATCH_SIZE']
        r_bound = (idx+1)*self.config['BATCH_SIZE']

        if r_bound > len(self.images):
            r_bound = len(self.images)
            l_bound = r_bound - self.config['BATCH_SIZE']

        instance_count = 0

        x_batch = np.zeros((r_bound - l_bound, self.config['IMAGE_H'], self.config['IMAGE_W'], 3))                         # input images
        b_batch = np.zeros((r_bound - l_bound, 1     , 1     , 1    ,  self.config['TRUE_BOX_BUFFER'], 4))   # list of self.config['TRUE_self.config['BOX']_BUFFER'] GT boxes
        y_batch = np.zeros((r_bound - l_bound, self.config['GRID_H'],  self.config['GRID_W'], self.config['BOX'], 4+1+len(self.config['LABELS'])))                # desired network output

        for train_instance in self.images[l_bound:r_bound]:
            # augment input image and fix object's position and size
            img, all_objs = self.aug_image(train_instance, jitter=self.jitter)
            
            # construct output from object's x, y, w, h
            true_box_index = 0
            
            for obj in all_objs:
                if obj['xmax'] > obj['xmin'] and obj['ymax'] > obj['ymin'] and obj['name'] in self.config['LABELS']:
                    center_x = .5*(obj['xmin'] + obj['xmax'])
                    center_x = center_x / (float(self.config['IMAGE_W']) / self.config['GRID_W'])
                    center_y = .5*(obj['ymin'] + obj['ymax'])
                    center_y = center_y / (float(self.config['IMAGE_H']) / self.config['GRID_H'])

                    grid_x = int(np.floor(center_x))
                    grid_y = int(np.floor(center_y))

                    if grid_x < self.config['GRID_W'] and grid_y < self.config['GRID_H']:
                        obj_indx  = self.config['LABELS'].index(obj['name'])
                        
                        center_w = (obj['xmax'] - obj['xmin']) / (float(self.config['IMAGE_W']) / self.config['GRID_W']) # unit: grid cell
                        center_h = (obj['ymax'] - obj['ymin']) / (float(self.config['IMAGE_H']) / self.config['GRID_H']) # unit: grid cell
                        
                        box = [center_x, center_y, center_w, center_h]

                        # find the anchor that best predicts this box
                        best_anchor = -1
                        max_iou     = -1
                        
                        shifted_box = BoundBox(0, 
                                               0,
                                               center_w,                                                
                                               center_h)
                        
                        for i in range(len(self.anchors)):
                            anchor = self.anchors[i]
                            iou    = bbox_iou(shifted_box, anchor)
                            
                            if max_iou < iou:
                                best_anchor = i
                                max_iou     = iou
                                
                        # assign ground truth x, y, w, h, confidence and class probs to y_batch
                        y_batch[instance_count, grid_y, grid_x, best_anchor, 0:4] = box
                        y_batch[instance_count, grid_y, grid_x, best_anchor, 4  ] = 1.
                        y_batch[instance_count, grid_y, grid_x, best_anchor, 5+obj_indx] = 1
                        
                        # assign the true box to b_batch
                        b_batch[instance_count, 0, 0, 0, true_box_index] = box
                        
                        true_box_index += 1
                        true_box_index = true_box_index % self.config['TRUE_BOX_BUFFER']
                            
            # assign input image to x_batch
            if self.norm != None: 
                x_batch[instance_count] = self.norm(img)
            else:
                # plot image and bounding boxes for sanity check
                for obj in all_objs:
                    if obj['xmax'] > obj['xmin'] and obj['ymax'] > obj['ymin']:
                        cv2.rectangle(img[:,:,::-1], (obj['xmin'],obj['ymin']), (obj['xmax'],obj['ymax']), (255,0,0), 3)
                        cv2.putText(img[:,:,::-1], obj['name'], 
                                    (obj['xmin']+2, obj['ymin']+12), 
                                    0, 1.2e-3 * img.shape[0], 
                                    (0,255,0), 2)
                        
                x_batch[instance_count] = img

            # increase instance counter in current batch
            instance_count += 1  

        #print(' new batch created', idx)

        return [x_batch, b_batch], y_batch
Beispiel #30
0
    def __getitem__(self, idx):
        l_bound = idx * self.config['BATCH_SIZE']
        r_bound = (idx + 1) * self.config['BATCH_SIZE']

        if r_bound > len(self.images):
            r_bound = len(self.images)
            l_bound = r_bound - self.config['BATCH_SIZE']

        instance_count = 0

        x_batch = np.zeros((r_bound - l_bound, self.config['IMAGE_H'],
                            self.config['IMAGE_W'], 3))  # input images
        b_batch = np.zeros(
            (r_bound - l_bound, 1, 1, 1, self.config['TRUE_BOX_BUFFER'], 4)
        )  # list of self.config['TRUE_self.config['BOX']_BUFFER'] GT boxes
        y_batch = np.zeros(
            (r_bound - l_bound, self.config['GRID_H'], self.config['GRID_W'],
             self.config['BOX'],
             4 + 1 + 3 + self.config['CLASS']))  # desired network output

        for train_instance in self.images[l_bound:r_bound]:
            # augment input image and fix object's position and size
            img, all_objs = self.aug_image(train_instance, jitter=self.jitter)

            # construct output from object's x, y, w, h
            true_box_index = 0

            for obj in all_objs:
                if obj['xmax'] > obj['xmin'] and obj['ymax'] > obj[
                        'ymin'] and obj['name'] in self.config['LABELS']:
                    center_x = .5 * (obj['xmin'] + obj['xmax'])
                    center_x = center_x / (float(self.config['IMAGE_W']) /
                                           self.config['GRID_W'])
                    center_y = .5 * (obj['ymin'] + obj['ymax'])
                    center_y = center_y / (float(self.config['IMAGE_H']) /
                                           self.config['GRID_H'])

                    grid_x = int(np.floor(center_x))
                    grid_y = int(np.floor(center_y))

                    if grid_x < self.config['GRID_W'] and grid_y < self.config[
                            'GRID_H']:
                        obj_indx = self.config['LABELS'].index(obj['name'])

                        center_w = (obj['xmax'] - obj['xmin']) / (
                            float(self.config['IMAGE_W']) /
                            self.config['GRID_W'])  # unit: grid cell
                        center_h = (obj['ymax'] - obj['ymin']) / (
                            float(self.config['IMAGE_H']) /
                            self.config['GRID_H'])  # unit: grid cell

                        box = [center_x, center_y, center_w, center_h]

                        # find the anchor that best predicts this box
                        best_anchor = -1
                        max_iou = -1

                        shifted_box = BoundBox(0, 0, center_w, center_h)

                        for i in range(len(self.anchors)):
                            anchor = self.anchors[i]
                            iou = bbox_iou(shifted_box, anchor)

                            if max_iou < iou:
                                best_anchor = i
                                max_iou = iou

                        # assign ground truth x, y, w, h, confidence and class probs to y_batch
                        y_batch[instance_count, grid_y, grid_x, best_anchor,
                                0:4] = box
                        y_batch[instance_count, grid_y, grid_x, best_anchor,
                                4] = 1.
                        y_batch[instance_count, grid_y, grid_x, best_anchor,
                                5 + obj_indx] = 1
                        y_batch[instance_count, grid_y, grid_x, best_anchor,
                                6:] = [
                                    obj['pose_x'], obj['pose_y'], obj['pose_z']
                                ]

                        # assign the true box to b_batch
                        b_batch[instance_count, 0, 0, 0, true_box_index] = box

                        true_box_index += 1
                        true_box_index = true_box_index % self.config[
                            'TRUE_BOX_BUFFER']

            # assign input image to x_batch
            if self.norm != None:
                x_batch[instance_count] = self.norm(img)
            else:
                # plot image and bounding boxes for sanity check
                for obj in all_objs:
                    if obj['xmax'] > obj['xmin'] and obj['ymax'] > obj['ymin']:
                        cv2.rectangle(img[:, :, ::-1],
                                      (obj['xmin'], obj['ymin']),
                                      (obj['xmax'], obj['ymax']), (255, 0, 0),
                                      3)
                        cv2.putText(img[:, :, ::-1], obj['name'],
                                    (obj['xmin'] + 2, obj['ymin'] + 12), 0,
                                    1.2e-3 * img.shape[0], (0, 255, 0), 2)

                x_batch[instance_count] = img

            # increase instance counter in current batch
            instance_count += 1

        #print ' new batch created', idx

        return [x_batch, b_batch], y_batch
    def __call__(
        self,
        roi,
        bbox,
        label,
    ):
        # loc_normalize_mean,
        # loc_normalize_std):
        """Assigns ground truth to sampled proposals.

        This function samples total of :obj:`self.n_sample` RoIs
        from the combination of :obj:`roi` and :obj:`bbox`.
        The RoIs are assigned with the ground truth class labels as well as
        bounding box offsets and scales to match the ground truth bounding
        boxes. As many as :obj:`pos_ratio * self.n_sample` RoIs are
        sampled as foregrounds.

        Offsets and scales of bounding boxes are calculated using
        :func:`model.utils.bbox_tools.bbox2loc`.
        Also, types of input arrays and output arrays are same.

        Here are notations.

        * :math:`S` is the total number of sampled RoIs, which equals \
            :obj:`self.n_sample`.
        * :math:`L` is number of object classes possibly including the \
            background.

        Args:
            roi (array): Region of Interests (RoIs) from which we sample.
                Its shape is :math:`(R, 4)`
            bbox (array): The coordinates of ground truth bounding boxes.
                Its shape is :math:`(R', 4)`.
            label (array): Ground truth bounding box labels. Its shape
                is :math:`(R',)`. Its range is :math:`[0, L - 1]`, where
                :math:`L` is the number of foreground classes.
            loc_normalize_mean (tuple of four floats): Mean values to normalize
                coordinates of bouding boxes.
            loc_normalize_std (tupler of four floats): Standard deviation of
                the coordinates of bounding boxes.

        Returns:
            (array, array, array):

            * **sample_roi**: Regions of interests that are sampled. \
                Its shape is :math:`(S, 4)`.
            * **gt_roi_loc**: Offsets and scales to match \
                the sampled RoIs to the ground truth bounding boxes. \
                Its shape is :math:`(S, 4)`.
            * **gt_roi_label**: Labels assigned to sampled RoIs. Its shape is \
                :math:`(S,)`. Its range is :math:`[0, L]`. The label with \
                value 0 is the background.

        """
        n_bbox, _ = bbox.shape

        # roi是rpn网络生成的候选区域
        # bbox是ground truth
        # # 这里要注意的是bbox区域也可以作为训练样本,所以这里将roi和bbox concat起来
        # roi = np.concatenate((roi, bbox), axis=0)

        pos_max_num = int(np.round(self.n_sample * self.pos_ratio))  # 采样的正样本数量
        # 每个roi对应每个bbox的IOU
        iou = utils.bbox_iou(roi, bbox)

        # 每个roi对应iou最大的bbox的index
        gt_assignment = iou.argmax(axis=1)
        # 每个roi对应最大iou的值
        max_iou = iou.max(axis=1)

        # 每个roi的label,0为背景类,所以别的类别都+1
        gt_roi_label = label[gt_assignment] + 1

        # 到这里我们得到了所有roi(包括bbox)的最大IOU值和他们的类别标签
        # 在标注类别标签的时候我们并不关心它与bbox最接近

        # Select foreground RoIs as those with >= pos_iou_thresh IoU.
        # 在大于IOU阈值的roi中选取正样本,为什么正样本比例设的这么低???,只有0.25
        pos_index = np.where(max_iou >= self.pos_iou_thresh)[0]
        if len(pos_index) > pos_max_num:
            pos_index = np.random.choice(pos_index,
                                         size=pos_max_num,
                                         replace=False)
        pos_num = len(pos_index)
        # print('ProposalTargetCreator pos index',len(pos_index))

        # Select background RoIs as those within
        # [neg_iou_thresh_lo, neg_iou_thresh_hi).
        # 在IOU区间内选择负样本,这里的iou区间是[0-0.5],我觉得0.5还挺高的???
        neg_index = np.where((max_iou < self.neg_iou_thresh_hi)
                             & (max_iou >= self.neg_iou_thresh_lo))[0]
        neg_max_num = self.n_sample - pos_num
        if len(neg_index) > neg_max_num:
            neg_index = np.random.choice(neg_index,
                                         size=neg_max_num,
                                         replace=False)
        # print('ProposalTargetCreator neg index',len(neg_index))

        # The indices that we're selecting (both positive and negative).
        # 正类保留分类,负类标签置0
        keep_index = np.append(pos_index, neg_index)
        gt_roi_label = gt_roi_label[keep_index]
        gt_roi_label[len(pos_index):] = 0  # negative labels --> 0
        sample_roi = roi[keep_index]
        # print('ProposalTargetCreator',sample_roi.shape)

        # Compute offsets and scales to match sampled RoIs to the GTs.
        # 计算4个修正量作为位置回归的ground truth
        gt_roi_loc = utils.bbox2loc(sample_roi,
                                    bbox[gt_assignment[keep_index]])
        # gt_roi_loc = (gt_roi_loc - loc_normalize_mean) / loc_normalize_std

        # # debug
        # print('debug')
        # # gt_roi_loc_ = utils.loc2bbox(sample_roi,gt_roi_loc)
        # # print(gt_roi_loc_.shape)
        # # print(gt_roi_loc_[:10])
        # gt_roi_label_ = gt_roi_label - 1
        # # gt_rpn_loc_ = gt_rpn_loc_[gt_rpn_label.numpy()>=0]
        # # dataset_utils.draw_pic(img[0].numpy(),dataset.VOC_BBOX_LABEL_NAMES,gt_rpn_loc_,)
        # # gt_roi_loc_ = gt_roi_loc_[gt_roi_label_>=0]
        # img_ = dataset_utils.inverse_normalize(img[0].numpy())
        # pos_roi_ = sample_roi[:len(pos_index)]
        # pos_roi_cls_ = gt_roi_label_[:len(pos_index)]
        # print(pos_roi_.shape,gt_roi_loc[:len(pos_index)].shape)
        # pos_bbox = utils.loc2bbox(pos_roi_,gt_roi_loc[:len(pos_index)])
        # dataset_utils.draw_pic(img_, dataset.VOC_BBOX_LABEL_NAMES, pos_bbox,pos_roi_cls_)

        # 这里似乎并不能保证选取出来的sample_roi数目一定是128个,因为极端情况下可以有很多不符合条件的roi,即不能选作正样本也不能选做负样本
        return sample_roi, gt_roi_loc, gt_roi_label