Ejemplo n.º 1
0
        def _compute_targets(ex_rois, gt_rois):
            """Compute bounding-box regression targets for an image."""

            assert ex_rois.shape[0] == gt_rois.shape[0]
            assert ex_rois.shape[1] == 4
            assert gt_rois.shape[1] >= 5

            # add float convert
            return bbox_transform(torch.from_numpy(ex_rois),
                                  torch.from_numpy(gt_rois[:, :4])).numpy()
Ejemplo n.º 2
0
def produce_batch(image_file, true_boxes):
    image = Image.open(image_file).resize((image_size, image_size),
                                          Image.NEAREST)
    data = asarray(image) / 255.0
    del image

    proposals, anchor_probs = generate_proposals(data)
    del data

    # Non maximal suppression
    keep = py_cpu_nms(np.hstack((proposals, anchor_probs)), NSM_THRESHOLD)
    if post_nms_N > 0:
        keep = keep[:post_nms_N]
    proposals = proposals[keep, :]
    anchor_probs = anchor_probs[keep]

    # RCNN proposals
    #proposals = np.vstack( (proposals, true_boxes) )
    overlaps = bbox_overlaps(proposals, enlarged_bboxes)
    which_box = overlaps.argmax(axis=1)
    proposal_max_overlaps = overlaps.max(axis=1)

    # sub sample foreground and background
    fg_inds = np.where(proposal_max_overlaps >= FG_THRESHOLD_RCNN)[0]
    fg_rois_in_image = min(int(BATCH_SIZE / (1 + BG_FG_FRAC_RCNN)),
                           fg_inds.size)
    if fg_inds.size > 0:
        fg_inds = npr.choice(fg_inds, size=fg_rois_in_image, replace=False)

    bg_inds = np.where((proposal_max_overlaps < BG_THRESH_HI)
                       & (proposal_max_overlaps >= BG_THRESH_LO))[0]
    bg_rois_in_image = min(fg_rois_in_image, bg_inds.size)
    if bg_inds.size > 0:
        bg_inds = npr.choice(bg_inds, size=bg_rois_in_image, replace=False)

    keep_inds = np.append(fg_inds, bg_inds)
    np.random.shuffle(keep_inds)

    # Select sampled values from various arrays:
    rois = proposals[keep_inds]  # The chosen rois
    # Scores of chosen rois (fg=1, bg=0)
    new_scores = np.zeros(len(proposals))
    new_scores[fg_inds] = 1
    roi_scores = new_scores[keep_inds].reshape(-1, 1)
    # targets
    targets = np.zeros((len(proposals), 4)).reshape(-1, 4)
    targets[fg_inds] = bbox_transform(proposals[fg_inds],
                                      true_boxes[which_box[fg_inds]])
    targets = targets[keep_inds]

    return rois, targets, roi_scores
Ejemplo n.º 3
0
def _compute_targets(ex_rois, gt_rois, labels):
    """Compute bounding-box regression targets for an image."""

    assert ex_rois.shape[0] == gt_rois.shape[0]
    assert ex_rois.shape[1] == 4
    assert gt_rois.shape[1] == 4

    targets = bbox_transform(ex_rois, gt_rois)
    if False:  # cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
        # Optionally normalize targets by a precomputed mean and stdev
        targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS))
                   / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS))
    return np.hstack(
        (labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
Ejemplo n.º 4
0
        def _compute_targets(ex_rois, gt_rois, label):
            """Compute bounding-box regression targets for an image."""
            # Inputs are tensor

            assert ex_rois.shape[0] == gt_rois.shape[0]
            assert ex_rois.shape[1] == 4
            assert gt_rois.shape[1] == 4

            targets = bbox_transform(ex_rois, gt_rois)
            if self.config['train_bbox_normalize_targets_precomputed']:
                # Optionally normalize targets by a precomputed mean and stdev
                means = self.config['train_bbox_normalize_means']
                stds = self.config['train_bbox_normalize_stds']
                targets = ((targets - targets.new(means)) / targets.new(stds))
            return torch.cat([label.unsqueeze(1), targets], 1)
Ejemplo n.º 5
0
def define_bbox(pred_bbox_delta, ANCHOR_BOX):
	delta_x, delta_y, delta_w, delta_h = torch.unbind(
	  pred_bbox_delta, dim=2)
	# set_anchors(mc, scale)

	anchor_x = ANCHOR_BOX[:, 0]
	anchor_y = ANCHOR_BOX[:, 1]
	anchor_w = ANCHOR_BOX[:, 2]
	anchor_h = ANCHOR_BOX[:, 3]

	box_center_x =  anchor_x + delta_x * anchor_w
	box_center_y =  anchor_y + delta_y * anchor_h
	# box_width = anchor_w * util.safe_exp(delta_w, EXP_THRESH)
	# box_height = anchor_h * util.safe_exp(delta_h, EXP_THRESH)
	box_width = anchor_w * torch.exp(delta_w)
	box_height = anchor_h * torch.exp(delta_h) # ok, this needs to be done on CPU side

	xmins, ymins, xmaxs, ymaxs = util.bbox_transform(
	    [box_center_x, box_center_y, box_width, box_height])

	xmins = xmins.cpu().detach().numpy()
	ymins = ymins.cpu().detach().numpy()
	xmaxs = xmaxs.cpu().detach().numpy()
	ymaxs = ymaxs.cpu().detach().numpy()

	# The max x position is mc.IMAGE_WIDTH - 1 since we use zero-based
	# pixels. Same for y.
	xmins = np.minimum(
	    np.maximum(0.0, xmins), IMAGE_WIDTH-1.0)

	ymins = np.minimum(
	    np.maximum(0.0, ymins), IMAGE_HEIGHT-1.0)

	xmaxs = np.maximum(
	    np.minimum(IMAGE_WIDTH-1.0, xmaxs), 0.0)

	ymaxs = np.maximum(
	    np.minimum(IMAGE_HEIGHT-1.0, ymaxs), 0.0)

	det_boxes = torch.transpose(
	    torch.stack(util.bbox_transform_inv(torch.FloatTensor([xmins, ymins, xmaxs, ymaxs]))),
	    1, 2) # this is not needed for hardware implementation
	return det_boxes
Ejemplo n.º 6
0
    def get_rpn_targets(self, targets):
        """
        :param targets: (N, x1, y1, x2, y1, C) targets
        :return: rpn_labels (batch_size, 1), rpn_bbox_targets (batch_size, 4), keep (batch_size)
        indices at which the batch was sampled)
        """
        all_anchor_boxes = self.all_anchor_boxes
        # anchor_boxes = self.filter_anchor_boxes(all_anchor_boxes)
        overlaps = get_overlaps(all_anchor_boxes, targets)
        labels = self.get_anchor_box_labels(overlaps)
        batch_labels, batch_anchor_boxes, batch_overlaps, keep = self.sample_batch(
            labels, all_anchor_boxes, overlaps)

        # assign anchors to targets
        anchor_assignments = np.argmax(batch_overlaps, axis=1)

        # compute target bbox deltas for rpn regressor head (256, 4)
        bbox_targets = bbox_transform(batch_anchor_boxes,
                                      targets[anchor_assignments])
        bbox_targets = torch.from_numpy(bbox_targets)
        rpn_labels = torch.from_numpy(batch_labels).long()  # convert properly
        batch_indices = torch.from_numpy(keep).long()
        return rpn_labels, bbox_targets, batch_indices  # rpn indices to keep for loss
Ejemplo n.º 7
0
    def get_targets(self, proposal_boxes, targets):
        """
        Arguments:
            proposal_boxes (Tensor) : (# proposal boxes , 4)
            targets: (N, 5)

        Return:
            labels (Ndarray) : (256,)
            bbox_deltas[:, :-1] : (256, 4)
            batch_indices for targets that were sampled
        """
        if not self.test:
            height, width = self.feature_map_dim[2:]
            indices = filter_cross_boundary_boxes(proposal_boxes,
                                                  (height * 16, width * 16))
            proposal_boxes = proposal_boxes[indices]
        targets_batch, proposals_batch, batch_indices = self.foreground_sample(
            proposal_boxes, targets)
        bbox_deltas = bbox_transform(proposals_batch, targets_batch)
        labels_batch = targets_batch[:, -1]
        labels_batch = torch.from_numpy(labels_batch).long()
        bbox_deltas = torch.from_numpy(bbox_deltas).float()
        batch_indices = torch.from_numpy(batch_indices).long()
        return labels_batch, bbox_deltas, batch_indices
Ejemplo n.º 8
0
def produce_batch(filepath, gt_boxes, scale):
    img = load_img(filepath)
    img_width = np.shape(img)[1] * scale[1]
    img_height = np.shape(img)[0] * scale[0]
    img = img.resize((int(img_width), int(img_height)))
    #feed image to pretrained model and get feature map
    img = img_to_array(img)
    img = np.expand_dims(img, axis=0)
    feature_map = pretrained_model.predict(img)
    height = np.shape(feature_map)[1]
    width = np.shape(feature_map)[2]
    num_feature_map = width * height
    #calculate output w, h stride
    w_stride = img_width / width
    h_stride = img_height / height
    #generate base anchors according output stride.
    #base anchors are 9 anchors wrt a tile (0,0,w_stride-1,h_stride-1)
    base_anchors = generate_anchors(w_stride, h_stride)
    #slice tiles according to image size and stride.
    #each 1x1x1532 feature map is mapping to a tile.
    shift_x = np.arange(0, width) * w_stride
    shift_y = np.arange(0, height) * h_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()
    #apply base anchors to all tiles, to have a num_feature_map*9 anchors.
    all_anchors = (base_anchors.reshape((1, 9, 4)) + shifts.reshape(
        (1, num_feature_map, 4)).transpose((1, 0, 2)))
    total_anchors = num_feature_map * 9
    all_anchors = all_anchors.reshape((total_anchors, 4))
    #only keep anchors inside image+borader.
    border = 0
    inds_inside = np.where((all_anchors[:, 0] >= -border)
                           & (all_anchors[:, 1] >= -border)
                           & (all_anchors[:, 2] < img_width + border)
                           &  # width
                           (all_anchors[:, 3] < img_height + border)  # height
                           )[0]
    anchors = all_anchors[inds_inside]
    # calculate overlaps each anchors to each gt boxes,
    # a matrix with shape [len(anchors) x len(gt_boxes)]
    overlaps = bbox_overlaps(anchors, gt_boxes)
    # find the gt box with biggest overlap to each anchors,
    # and the overlap ratio. result (len(anchors),)
    argmax_overlaps = overlaps.argmax(axis=1)
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
    # find the anchor with biggest overlap to each gt boxes,
    # and the overlap ratio. result (len(gt_boxes),)
    gt_argmax_overlaps = overlaps.argmax(axis=0)
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
    #labels, 1=fg/0=bg/-1=ignore
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)
    # set positive label, define in Paper3.1.2:
    # We assign a positive label to two kinds of anchors: (i) the
    # anchor/anchors with the highest Intersection-overUnion
    # (IoU) overlap with a ground-truth box, or (ii) an
    # anchor that has an IoU overlap higher than 0.7 with any gt boxes
    labels[gt_argmax_overlaps] = 1
    labels[max_overlaps >= .7] = 1
    # set negative labels
    labels[max_overlaps <= .3] = 0
    # subsample positive labels if we have too many
    #     num_fg = int(RPN_FG_FRACTION * RPN_BATCHSIZE)
    fg_inds = np.where(labels == 1)[0]
    #     if len(fg_inds) > num_fg:
    #         disable_inds = npr.choice(
    #             fg_inds, size=(len(fg_inds) - num_fg), replace=False)
    #         labels[disable_inds] = -1
    # subsample negative labels if we have too many
    num_bg = int(len(fg_inds) * BG_FG_FRAC)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        labels[disable_inds] = -1
    #
    batch_inds = inds_inside[labels != -1]
    batch_inds = (batch_inds / k).astype(np.int)
    full_labels = unmap(labels, total_anchors, inds_inside, fill=-1)
    batch_label_targets = full_labels.reshape(-1, 1, 1, 1 * k)[batch_inds]
    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    # bbox_targets = bbox_transform(anchors, gt_boxes[argmax_overlaps, :]
    pos_anchors = all_anchors[inds_inside[labels == 1]]
    bbox_targets = bbox_transform(pos_anchors,
                                  gt_boxes[argmax_overlaps, :][labels == 1])
    bbox_targets = unmap(bbox_targets,
                         total_anchors,
                         inds_inside[labels == 1],
                         fill=0)
    batch_bbox_targets = bbox_targets.reshape(-1, 1, 1, 4 * k)[batch_inds]
    padded_fcmap = np.pad(feature_map, ((0, 0), (1, 1), (1, 1), (0, 0)),
                          mode='constant')
    padded_fcmap = np.squeeze(padded_fcmap)
    batch_tiles = []
    for ind in batch_inds:
        x = ind % width
        y = int(ind / width)
        fc_3x3 = padded_fcmap[y:y + 3, x:x + 3, :]
        batch_tiles.append(fc_3x3)
    return np.asarray(batch_tiles), batch_label_targets.tolist(
    ), batch_bbox_targets.tolist()
Ejemplo n.º 9
0
    def __anchor_target_layer(self,rpn_cls_score,gt_boxes,im_info,feat_stride,anchor,A):
        allowed_border = 0
        total_anchors = anchor.shape[0]
        height, width = rpn_cls_score.shape[1:3]

        inds_inside = np.where( (anchor[:,0] >= allowed_border) &
                                (anchor[:,1] >= allowed_border) &
                                (anchor[:,2] < im_info[1] + allowed_border) &
                                (anchor[:,3] < im_info[0] + allowed_border)
                               )[0]


        anchors = anchor[inds_inside,:]
        labels = np.empty((len(inds_inside),), dtype=np.float32)
        labels.fill(-1)
        #print("anchor detail..")
        #print(anchors)
        #print("gt_boxes detail..")
        #print(gt_boxes)

        overlaps = bbox_overlaps(anchors,gt_boxes)
        '''anchor class output..'''
        argmax_overlap = overlaps.argmax(axis= 1)
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlap]
        '''gt class output'''
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps    = overlaps[gt_argmax_overlaps,np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
        #print("argmax_overlap")
        #print(np.where(argmax_overlap > 0))

        #print("gt_argmax_overlaps")
        #print(gt_argmax_overlaps.shape)
        #print(gt_argmax_overlaps)
        #print(anchors[gt_argmax_overlaps])


        labels[max_overlaps < self._threshold_for_label_zero] = 0
        labels[gt_argmax_overlaps] = 1
        #print(gt_argmax_overlaps.shape)
        labels[max_overlaps > self._threshold_for_label_one] = 1
        #print("label_one")
        #print(np.where(max_overlaps > self._threshold_for_label_one)[0].shape)



        fg_index = np.where(labels == 1)[0]
        bg_index = np.where(labels == 0)[0]
        fg_index_len =len(fg_index)
        bg_index_len =len(bg_index)
        '''always same ratio'''
        if 3* fg_index_len > bg_index_len:
            disable_inds = np.random.choice(fg_index,size = (3* fg_index_len - bg_index_len) ,replace = False)
        else:
            disable_inds = np.random.choice(bg_index,size = (bg_index_len - 3* fg_index_len) , replace = False)
        labels[disable_inds] = -1

        #print(np.where(labels == 0)[0].shape)
        #print(np.where(labels == 1)[0].shape)
        #print("gt_boxes[argmax_overlap,:]")

        #print(gt_boxes[argmax_overlap, :])
        #print(gt_boxes[argmax_overlap,:].shape)
        #print(gt_boxes[argmax_overlap,:][np.where(argmax_overlap > 0)])
        #print(argmax_overlap.shape)

        bbox_targets = bbox_transform(anchors,gt_boxes[argmax_overlap,:])
        bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
        bbox_inside_weights[labels == 1, :] = np.array([1.0,1.0,1.0,1.0])

        bbox_outside_weights = np.zeros((len(inds_inside),4), dtype=np.float32)
        num_example = np.sum(labels >= 0)
        positive_weight = np.ones((1,4)) * 1.0 / num_example
        negative_weight = np.ones((1,4)) * 1.0 / num_example

        bbox_outside_weights[labels == 1, :] = positive_weight
        bbox_outside_weights[labels == 0, :] = negative_weight


        labels          = _unmap(labels, total_anchors, inds_inside,fill = -1)
        bbox_targets    = _unmap(bbox_targets,total_anchors,inds_inside,fill = 0)
        bbox_inside_weights = _unmap(bbox_inside_weights,total_anchors,inds_inside,fill = 0)
        bbox_outside_weights= _unmap(bbox_outside_weights,total_anchors,inds_inside,fill = 0)


        # labels
        labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
        labels = labels.reshape((1, 1, A * height, width))
        #print("label")
        #print(np.where(labels == 1)[0].shape)
        #print(np.where(labels == 0)[0].shape)

        # bbox_targets
        bbox_targets = bbox_targets.reshape((1, height, width, A * 4))

        # bbox_inside_weights
        bbox_inside_weights = bbox_inside_weights.reshape((1, height, width, A * 4))

        # bbox_outside_weights
        bbox_outside_weights = bbox_outside_weights.reshape((1, height, width, A * 4))


        return labels, bbox_targets, bbox_inside_weights, bbox_outside_weights
def produce_batch(feature_map, gt_boxes, h_w=None, category=None):
    height = np.shape(feature_map)[1]
    width = np.shape(feature_map)[2]
    num_feature_map = width * height

    w_stride = h_w[1] / width
    h_stride = h_w[0] / height
    #base anchors are 9 anchors wrt a tile (0,0,w_stride-1,h_stride-1)
    base_anchors = generate_anchors(w_stride, h_stride)
    shift_x = np.arange(0, width) * w_stride
    shift_y = np.arange(0, height) * h_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()

    all_anchors = (base_anchors.reshape((1, anchors_num, 4)) + shifts.reshape(
        (1, num_feature_map, 4)).transpose((1, 0, 2)))
    total_anchors = num_feature_map * anchors_num
    all_anchors = all_anchors.reshape((total_anchors, 4))
    # 用训练好的rpn进行预测,得出scores和deltas
    res = rpn_model.query_cnn(feature_map)
    scores = res[0]
    scores = scores.reshape(-1, 1)
    deltas = res[1]
    deltas = np.reshape(deltas, (-1, 4))
    # 把dx dy转换成具体的xy值,并把照片以外的anchors去掉
    proposals = bbox_transform_inv(all_anchors, deltas)
    proposals = clip_boxes(proposals, (h_w[0], h_w[1]))
    # remove small boxes
    keep = filter_boxes(proposals,
                        small_box_threshold)  # here threshold is 40 pixel
    proposals = proposals[keep, :]
    scores = scores[keep]

    # sort socres and only keep top 6000.
    pre_nms_topN = 6000
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]
    # apply NMS to to 6000, and then keep top 300
    post_nms_topN = 300
    keep = py_cpu_nms(np.hstack((proposals, scores)), 0.7)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]
    # 把ground true也加到proposals中
    proposals = np.vstack((proposals, gt_boxes))
    # calculate overlaps of proposal and gt_boxes
    overlaps = bbox_overlaps(proposals, gt_boxes)
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    # labels = gt_labels[gt_assignment] #?

    # sub sample
    fg_inds = np.where(max_overlaps >= FG_THRESH)[0]
    fg_rois_per_this_image = min(int(BATCH * FG_FRAC), fg_inds.size)
    # Sample foreground regions without replacement
    if fg_inds.size > 0:
        fg_inds = npr.choice(fg_inds,
                             size=fg_rois_per_this_image,
                             replace=False)
    bg_inds = np.where((max_overlaps < BG_THRESH_HI)
                       & (max_overlaps >= BG_THRESH_LO))[0]
    bg_rois_per_this_image = BATCH - fg_rois_per_this_image
    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
    # Sample background regions without replacement
    if bg_inds.size > 0:
        bg_inds = npr.choice(bg_inds,
                             size=bg_rois_per_this_image,
                             replace=False)
    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    # labels = labels[keep_inds]
    rois = proposals[keep_inds]
    gt_rois = gt_boxes[gt_assignment[keep_inds]]
    targets = bbox_transform(rois, gt_rois)  #input rois
    rois_num = targets.shape[0]
    batch_box = np.zeros((rois_num, 200, 4))
    for i in range(rois_num):
        batch_box[i, category] = targets[i]
    batch_box = np.reshape(batch_box, (rois_num, -1))
    # get gt category
    batch_categories = np.zeros((rois_num, 200, 1))
    for i in range(rois_num):
        batch_categories[i, category] = 1
    batch_categories = np.reshape(batch_categories, (rois_num, -1))
    return rois, batch_box, batch_categories
Ejemplo n.º 11
0
def produce_batch(filepath, gt_boxes, h_w, category):
    img = load_img(filepath)
    img_width = np.shape(img)[1] * scale[1]
    img_height = np.shape(img)[0] * scale[0]
    img = img.resize((int(img_width), int(img_height)))
    #feed image to pretrained model and get feature map
    img = img_to_array(img)
    img = np.expand_dims(img, axis=0)
    feature_map = pretrained_model.predict(img)
    height = np.shape(feature_map)[1]
    width = np.shape(feature_map)[2]
    num_feature_map = width * height
    #calculate output w, h stride
    w_stride = h_w[1] / width
    h_stride = h_w[0] / height
    #generate base anchors according output stride.
    #base anchors are 9 anchors wrt a tile (0,0,w_stride-1,h_stride-1)
    base_anchors = generate_anchors(w_stride, h_stride)
    #slice tiles according to image size and stride.
    #each 1x1x1532 feature map is mapping to a tile.
    shift_x = np.arange(0, width) * w_stride
    shift_y = np.arange(0, height) * h_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()
    #apply base anchors to all tiles, to have a num_feature_map*9 anchors.
    all_anchors = (base_anchors.reshape((1, 9, 4)) + shifts.reshape(
        (1, num_feature_map, 4)).transpose((1, 0, 2)))
    total_anchors = num_feature_map * 9
    all_anchors = all_anchors.reshape((total_anchors, 4))
    # feed feature map to pretrained RPN model, get proposal labels and bboxes.
    res = rpn_model.predict(feature_map)
    scores = res[0]
    scores = scores.reshape(-1, 1)
    deltas = res[1]
    deltas = np.reshape(deltas, (-1, 4))
    # proposals transform to bbox values (x1, y1, x2, y2)
    proposals = bbox_transform_inv(all_anchors, deltas)
    proposals = clip_boxes(proposals, (h_w[0], h_w[1]))
    # remove small boxes, here threshold is 40 pixel
    keep = filter_boxes(proposals, 40)
    proposals = proposals[keep, :]
    scores = scores[keep]

    # sort socres and only keep top 6000.
    pre_nms_topN = 6000
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]
    # apply NMS to to 6000, and then keep top 300
    post_nms_topN = 300
    keep = py_cpu_nms(np.hstack((proposals, scores)), 0.7)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]
    # add gt_boxes to proposals.
    proposals = np.vstack((proposals, gt_boxes))
    # calculate overlaps of proposal and gt_boxes
    overlaps = bbox_overlaps(proposals, gt_boxes)
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    # labels = gt_labels[gt_assignment] #?

    # sub sample
    fg_inds = np.where(max_overlaps >= FG_THRESH)[0]
    fg_rois_per_this_image = min(int(BATCH * FG_FRAC), fg_inds.size)
    # Sample foreground regions without replacement
    if fg_inds.size > 0:
        fg_inds = npr.choice(fg_inds,
                             size=fg_rois_per_this_image,
                             replace=False)
    bg_inds = np.where((max_overlaps < BG_THRESH_HI)
                       & (max_overlaps >= BG_THRESH_LO))[0]
    bg_rois_per_this_image = BATCH - fg_rois_per_this_image
    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
    # Sample background regions without replacement
    if bg_inds.size > 0:
        bg_inds = npr.choice(bg_inds,
                             size=bg_rois_per_this_image,
                             replace=False)
    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    # labels = labels[keep_inds]
    rois = proposals[keep_inds]
    gt_rois = gt_boxes[gt_assignment[keep_inds]]
    targets = bbox_transform(rois, gt_rois)  #input rois
    rois_num = targets.shape[0]
    batch_box = np.zeros((rois_num, 200, 4))
    for i in range(rois_num):
        batch_box[i, category] = targets[i]
    batch_box = np.reshape(batch_box, (rois_num, -1))
    # get gt category
    batch_categories = np.zeros((rois_num, 200, 1))
    for i in range(rois_num):
        batch_categories[i, category] = 1
    batch_categories = np.reshape(batch_categories, (rois_num, -1))
    return rois, batch_box, batch_categories
Ejemplo n.º 12
0
    def conf_loss(self, y_true, y_pred):
        """
        squeezeDet loss function for object detection and classification
        :param y_true: ground truth with shape [batchsize, #anchors, classes+8+labels]
        :param y_pred:
        :return: a tensor of the conf loss
        """

        #handle for config
        mc = self.config

        #calculate non padded entries
        n_outputs = mc.CLASSES + 1 + 4

        #slice and reshape network output
        y_pred = y_pred[:, :, 0:n_outputs]
        y_pred = K.reshape(y_pred, (mc.BATCH_SIZE, mc.N_ANCHORS_HEIGHT, mc.N_ANCHORS_WIDTH, -1))


        #slice y_true
        input_mask = y_true[:, :, 0]
        input_mask = K.expand_dims(input_mask, axis=-1)
        box_input = y_true[:, :, 1:5]

        #number of objects. Used to normalize bbox and classification loss
        num_objects = K.sum(input_mask)


        #before computing the losses we need to slice the network outputs

        #number of class probabilities, n classes for each anchor
        num_class_probs = mc.ANCHOR_PER_GRID * mc.CLASSES



        #number of confidence scores, one for each anchor + class probs
        num_confidence_scores = mc.ANCHOR_PER_GRID+num_class_probs

        #slice the confidence scores and put them trough a sigmoid for probabilities
        pred_conf = K.sigmoid(
            K.reshape(
                  y_pred[:, :, :, num_class_probs:num_confidence_scores],
                  [mc.BATCH_SIZE, mc.ANCHORS]
              )
          )

        #slice remaining bounding box_deltas
        pred_box_delta = K.reshape(
              y_pred[:, :, :, num_confidence_scores:],
              [mc.BATCH_SIZE, mc.ANCHORS, 4]
          )

        #compute boxes
        det_boxes = utils.boxes_from_deltas(pred_box_delta, mc)


        #again unstack is not avaible in pure keras backend
        unstacked_boxes_pred = []
        unstacked_boxes_input = []

        for i in range(4):
            unstacked_boxes_pred.append(det_boxes[:, :, i])
            unstacked_boxes_input.append(box_input[:, :, i])



        #compute the ious
        ious = utils.tensor_iou(utils.bbox_transform(unstacked_boxes_pred),
                                utils.bbox_transform(unstacked_boxes_input),
                                input_mask,
                                mc
                                )



        #reshape input for correct broadcasting
        input_mask = K.reshape(input_mask, [mc.BATCH_SIZE, mc.ANCHORS])

        #confidence score loss
        conf_loss = K.mean(
            K.sum(
                K.square((ious - pred_conf))
                * (input_mask * mc.LOSS_COEF_CONF_POS / num_objects
                   + (1 - input_mask) * mc.LOSS_COEF_CONF_NEG / (mc.ANCHORS - num_objects)),
                axis=[1]
            ),
        )



        return conf_loss
Ejemplo n.º 13
0
def produce_batch(image_file, true_boxes):

    image_name = image_file.replace('.jpg','').replace(trainDIR ,'')
    image = Image.open(image_file).resize((image_size ,image_size ), Image.NEAREST)
    data =  asarray(image)/255.0
    del image
    feature_map = pretrained_model.predict(data.reshape(-1,data.shape[0],data.shape[1],data.shape[2]))
    del data  

    feature_size = feature_map.shape[1]
    feature_stride = int( image_size / feature_size ) 
    number_feature_points = feature_size * feature_size 
    shift = np.arange(0, feature_size) * feature_stride
    shift_x, shift_y = np.meshgrid(shift, shift)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() 
    base_anchors = generate_anchors(feature_stride, feature_stride,ratios = ANCHOR_RATIOS, scales = ANCHOR_SCALES)
    all_anchors = (base_anchors.reshape((1, anchor_number, 4)) + shifts.reshape((1, number_feature_points, 4)).transpose((1, 0, 2)))
    total_anchor_number = anchor_number*number_feature_points
    all_anchors = all_anchors.reshape((total_anchor_number , 4))

    #only keep anchors inside image+border.
    border=0 # could also be FILTER_SIZE x feature stride
    inds_inside = np.where(
            (all_anchors[:, 0] >= -border) &
            (all_anchors[:, 1] >= -border) &
            (all_anchors[:, 2] < image_size+border ) &  
            (all_anchors[:, 3] < image_size+border)    
    )[0]
    anchors=all_anchors[inds_inside]
    useful_anchor_number = len(inds_inside)


    overlaps = bbox_overlaps(anchors, true_boxes) 

    which_box = overlaps.argmax(axis=1) # Which true box has more overlap with each anchor?
    anchor_max_overlaps = overlaps[np.arange(overlaps.shape[0]), which_box] 

    which_anchor = overlaps.argmax(axis=0) # Which anchor has more overlap for each true box?
    box_max_overlaps = overlaps[which_anchor, np.arange(overlaps.shape[1])] 
    which_anchor_v2 = np.where(overlaps == box_max_overlaps)[0]

    labels = np.empty((useful_anchor_number, ), dtype=np.float32)
    labels.fill(-1)

    labels[ which_anchor_v2 ] = 1
    labels[ anchor_max_overlaps >= FG_THRESHOLD] = 1
    labels[ anchor_max_overlaps <= BG_THRESHOLD] = 0

    fg_inds = np.where(labels == 1)[0]
    bg_inds = np.where(labels == 0)[0]

    num_fg = int(BATCH_SIZE/(1+BG_FG_FRAC))
    if len(fg_inds) > num_fg:
      disable_inds = np.random.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False)
      labels[disable_inds] = -1
    fg_inds = np.where(labels == 1)[0]

    num_bg = int(len(fg_inds) * BG_FG_FRAC) 
    if len(bg_inds) > num_bg:
        disable_inds = np.random.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)
        labels[disable_inds] = -1
    bg_inds = np.where(labels == 0)[0]

    anchor_batch_inds = inds_inside[labels!=-1]
    np.random.shuffle(anchor_batch_inds)  
    feature_batch_inds=(anchor_batch_inds / anchor_number).astype(np.int)

    pad_size = int((FILTER_SIZE-1)/2)
    padded_fcmap=np.pad(feature_map,((0,0),(pad_size,pad_size),(pad_size,pad_size),(0,0)),mode='constant')
    padded_fcmap=np.squeeze(padded_fcmap)
    batch_tiles=[]  
    for ind in feature_batch_inds:
        # x,y are the point in the feature map pointed at by feature_batch_inds indices
        x = ind % feature_size
        y = int(ind/feature_size)
        fc_snip=padded_fcmap[y:y+FILTER_SIZE,x:x+FILTER_SIZE,:] 
        batch_tiles.append(fc_snip)

    # unmap creates another array of labels that includes a -1 for the originally deleted anchors for being out of bounds.
    full_labels = unmap(labels, total_anchor_number , inds_inside, fill=-1)
    batch_labels =full_labels.reshape(-1,1,1,1*anchor_number)[feature_batch_inds]


    targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    pos_anchors=all_anchors[inds_inside[labels==1]] # positive anchors
    targets = bbox_transform(pos_anchors, true_boxes[which_box, :][labels==1])
    targets = unmap(targets, total_anchor_number, inds_inside[labels==1], fill=0)
    batch_targets = targets.reshape(-1,1,1,4*anchor_number)[feature_batch_inds]

    return np.asarray(batch_tiles), batch_labels.tolist(), batch_targets.tolist()
Ejemplo n.º 14
0
    def rpn_targets(self, all_anchors, im, gt):
        total_anchors = all_anchors.shape[0]
        gt_boxes = gt['boxes']

        height, width = im.size()[-2:]
        # only keep anchors inside the image
        _allowed_border = 0
        inds_inside = np.where(
            (all_anchors[:, 0] >= -_allowed_border)
            & (all_anchors[:, 1] >= -_allowed_border)
            & (all_anchors[:, 2] < width + _allowed_border) &  # width
            (all_anchors[:, 3] < height + _allowed_border)  # height
        )[0]

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]
        #print(anchors.shape)
        # assert anchors.shape[0] > 0, '{0}x{1} -> {2}'.format(height,width,total_anchors)
        if anchors.shape[0] == 0:
            print('{0}x{1} -> {2}'.format(height, width, total_anchors))
            return None, None

        # label: 1 is positive, 0 is negative, -1 is dont care
        labels = np.empty((len(inds_inside), ), dtype=np.float32)
        labels.fill(-1)

        # overlaps between the anchors and the gt boxes
        # overlaps (ex, gt)
        #overlaps = bbox_overlaps(anchors, gt_boxes)#.numpy()
        overlaps = bbox_overlaps(torch.from_numpy(anchors), gt_boxes).numpy()
        gt_boxes = gt_boxes.numpy()
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        # assign bg labels first so that positive labels can clobber them
        labels[max_overlaps < self.negative_overlap] = 0

        # fg label: for each gt, anchor with highest overlap
        labels[gt_argmax_overlaps] = 1

        # fg label: above threshold IOU
        labels[max_overlaps >= self.positive_overlap] = 1

        # subsample positive labels if we have too many
        num_fg = int(self.fg_fraction * self.batch_size)
        fg_inds = np.where(labels == 1)[0]
        if len(fg_inds) > num_fg:
            disable_inds = npr.choice(fg_inds,
                                      size=(len(fg_inds) - num_fg),
                                      replace=False)
            labels[disable_inds] = -1

        # subsample negative labels if we have too many
        num_bg = self.batch_size - np.sum(labels == 1)
        bg_inds = np.where(labels == 0)[0]
        if len(bg_inds) > num_bg:
            disable_inds = npr.choice(bg_inds,
                                      size=(len(bg_inds) - num_bg),
                                      replace=False)
            labels[disable_inds] = -1

        #bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
        #bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
        bbox_targets = bbox_transform(anchors, gt_boxes[argmax_overlaps, :])

        # map up to original set of anchors
        labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
        bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)

        return labels, bbox_targets
Ejemplo n.º 15
0
    def __anchor_target_layer(self, rpn_cls_score, gt_boxes, im_info,
                              feat_stride, anchor, A):
        allowed_border = 0
        total_anchors = anchor.shape[0]
        height, width = rpn_cls_score.shape[1:3]

        inds_inside = np.where((anchor[:, 0] >= allowed_border)
                               & (anchor[:, 1] >= allowed_border)
                               & (anchor[:, 2] < im_info[1] + allowed_border) &
                               (anchor[:, 3] < im_info[0] + allowed_border))[0]

        anchors = anchor[inds_inside, :]
        labels = np.empty((len(inds_inside), ), dtype=np.float32)
        labels.fill(-1)

        overlaps = bbox_overlaps(anchors, gt_boxes)
        argmax_overlap = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlap]
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        labels[max_overlaps < 0.3] = 0
        labels[gt_argmax_overlaps] = 1

        bbox_targets = bbox_transform(anchors, gt_boxes[argmax_overlap, :])
        bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
        bbox_inside_weights[labels == 1, :] = np.array([1.0, 1.0, 1.0, 1.0])

        bbox_outside_weights = np.zeros((len(inds_inside), 4),
                                        dtype=np.float32)
        num_example = np.sum(labels >= 0)
        positive_weight = np.ones((1, 4)) * 1.0 / num_example
        negative_weight = np.ones((1, 4)) * 1.0 / num_example

        bbox_outside_weights[labels == 1, :] = positive_weight
        bbox_outside_weights[labels == 0, :] = negative_weight

        labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
        bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
        bbox_inside_weights = _unmap(bbox_inside_weights,
                                     total_anchors,
                                     inds_inside,
                                     fill=0)
        bbox_outside_weights = _unmap(bbox_outside_weights,
                                      total_anchors,
                                      inds_inside,
                                      fill=0)

        # labels
        labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
        labels = labels.reshape((1, 1, A * height, width))

        # bbox_targets
        bbox_targets = bbox_targets.reshape((1, height, width, A * 4))

        # bbox_inside_weights
        bbox_inside_weights = bbox_inside_weights.reshape(
            (1, height, width, A * 4))

        # bbox_outside_weights
        bbox_outside_weights = bbox_outside_weights.reshape(
            (1, height, width, A * 4))

        return labels, bbox_targets, bbox_inside_weights, bbox_outside_weights
Ejemplo n.º 16
0
def produce_batch(filepath, gt_boxes, w_h):
    # 首先加载feature_map
    feature_map=np.load(filepath)["fc"]
    # print("load feature map done.")
    # 获得feature map的长乘宽,即所有像素点数量
    height = np.shape(feature_map)[1]
    width = np.shape(feature_map)[2]
    num_feature_map=width*height
    # 用图片的长宽除以feature map的长宽,获得步长
    img_width = w_h[0]
    img_height = w_h[1]
    w_stride = img_width / width
    h_stride = img_height / height
    # print("w_stride, h_stride", w_stride, h_stride)
    # 根据步长计算anchors
    #base anchors are 9 anchors wrt a tile (0,0,w_stride-1,h_stride-1)
    # base_anchors = generate_anchors(w_stride, h_stride, scales=np.asarray([1, 2, 4]))
    base_anchors = generate_anchors(16, 16, ratios=[0.5, 1], scales=np.asarray([1, 2, 8, 16]))
    #slice tiles according to image size and stride.
    #each 1x1x1532 feature map is mapping to a tile.
    shift_x = np.arange(0, width) * w_stride
    shift_y = np.arange(0, height) * h_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y) #这一步获得了分割点的所有横坐标及纵坐标
    # 计算出了所有偏移的(x, y, x, y)值,为什么会重复两下,因为base_anchors输出的就是(0,0,w_stride-1,h_stride-1)的模式,需要同步偏移
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()

    # 事实证明,对shape为(1, 9, 4)的矩阵与shape为(num_feature_map, 1, 4)的矩阵相加结果是得到shape为(num_feature_map, 9, 4)
    all_anchors = (base_anchors.reshape((1, k, 4)) + shifts.reshape((1, num_feature_map, 4)).transpose((1, 0, 2)))
    total_anchors = num_feature_map*k
    all_anchors = all_anchors.reshape((total_anchors, 4))
    #only keep anchors inside image+borader.
    border=0
    inds_inside = np.where(
            (all_anchors[:, 0] >= -border) &
            (all_anchors[:, 1] >= -border) &
            (all_anchors[:, 2] < img_width+border ) &  # width
            (all_anchors[:, 3] < img_height+border)    # height
    )[0]
    anchors=all_anchors[inds_inside]
    if len(anchors) == 0:
        return None, None, None
    # calculate overlaps each anchors to each gt boxes,
    # a matrix with shape [len(anchors) x len(gt_boxes)]
    overlaps = bbox_overlaps(anchors, gt_boxes)
    # find the gt box with biggest overlap to each anchors,
    # and the overlap ratio. result (len(anchors),)
    argmax_overlaps = overlaps.argmax(axis=1) # overlaps中每一行的最大值的索引值,即每一个anchor与哪一个gt_box得分最高,返回的是一维张量
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] # 获得overlaps中每一列的最大值,即得分
    # find the anchor with biggest overlap to each gt boxes,
    # and the overlap ratio. result (len(gt_boxes),)
    gt_argmax_overlaps = overlaps.argmax(axis=0) # overlaps中每一列的最大值的索引,即gt与哪个anchor最接近
    gt_max_overlaps = overlaps[gt_argmax_overlaps, 
                                np.arange(overlaps.shape[1])] # 获得overlaps中每一列的最大值
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] # 获得与最大值相同的列值(纵坐标)
    #labels, 1=fg/0=bg/-1=ignore 指在图片范围内的anchors的标签
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)
    # 根据论文,设置positive标签:
    # 只对两种anchor设置positive标签
    # (1)与对每一个gt,IoU值最高的anchor
    # (2)对每一个anchor,其与所有gt的IoU最高分大于0.7的anchor
    labels[gt_argmax_overlaps] = 1
    labels[max_overlaps >= .7] = 1
    # 设置负面标签
    labels[max_overlaps <= .3] = 0
    # subsample positive labels if we have too many
    # num_fg = int(RPN_FG_FRACTION * RPN_BATCHSIZE)
    fg_inds = np.where(labels == 1)[0]
    # if len(fg_inds) > num_fg:
    #     disable_inds = npr.choice(
    #         fg_inds, size=(len(fg_inds) - num_fg), replace=False)
    #     labels[disable_inds] = -1
    # subsample negative labels if we have too many
    num_bg = int(len(fg_inds) * BG_FG_FRAC)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        # 因为背景太多了,随机选出多余个的设置成忽略
        disable_inds = npr.choice(
            bg_inds, size=(len(bg_inds) - num_bg), replace=False) # 从np.arange(0, bg_inds)中随机选len(bg_inds) - num_bg个
        labels[disable_inds] = -1
    # 从这里开始,计算batch,batch_inds是所有不被忽略的points
    batch_inds=inds_inside[labels!=-1]
    # 是这样的,首先batch_inds获得了在特征图内部的的anchor的索引值,又因为anchor排列是按9个9个排下来的,因此除9就是为了得到这个anchor对应的坐标
    batch_inds=(batch_inds / k).astype(np.int)
    # 获得对应于所有anchos的label
    full_labels = unmap(labels, total_anchors, inds_inside, fill=-1)
    # batch_label_targets为n个1*1*k的
    batch_label_targets=full_labels.reshape(-1,1,1,1*k)[batch_inds]

    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    # bbox_targets = bbox_transform(anchors, gt_boxes[argmax_overlaps, :]
    # 获得标签为fg的anchors
    pos_anchors=all_anchors[inds_inside[labels==1]]
    # 归一化?
    bbox_targets = bbox_transform(pos_anchors, gt_boxes[argmax_overlaps, :][labels==1])
    bbox_targets = unmap(bbox_targets, total_anchors, inds_inside[labels==1], fill=0)
    batch_bbox_targets = bbox_targets.reshape(-1,1,1,4*k)[batch_inds]
    # 在feature_map的第二个和第三个轴前后各填充一个值
    padded_fcmap=np.pad(feature_map,((0,0),(1,1),(1,1),(0,0)),mode='constant')
    # 把padded_fcmap中维度为1的轴去掉,预期得到的是3维
    padded_fcmap=np.squeeze(padded_fcmap)
    batch_tiles=[]
    for ind in batch_inds:
        x = ind % width
        y = int(ind/width)
        fc_3x3=padded_fcmap[y:y+3,x:x+3,:]
        batch_tiles.append(fc_3x3)
    # print("produce batch done.")
    return np.asarray(batch_tiles), batch_label_targets.tolist(), batch_bbox_targets.tolist()
Ejemplo n.º 17
0
    def loss_without_regularization(self, y_true, y_pred):
        """
        squeezeDet loss function for object detection and classification
        :param y_true: ground truth with shape [batchsize, #anchors, classes+8+labels]
        :param y_pred:
        :return: a tensor of the total loss
        """

        #handle for config
        mc = self.config

        #slice y_true
        input_mask = y_true[:, :, 0]
        input_mask = K.expand_dims(input_mask, axis=-1)
        box_input = y_true[:, :, 1:5]
        box_delta_input = y_true[:, :, 5:9]
        labels = y_true[:, :, 9:]

        #number of objects. Used to normalize bbox and classification loss
        num_objects = K.sum(input_mask)


        #before computing the losses we need to slice the network outputs

        pred_class_probs, pred_conf, pred_box_delta = utils.slice_predictions(y_pred, mc)

        #compute boxes
        det_boxes = utils.boxes_from_deltas(pred_box_delta, mc)

        #again unstack is not avaible in pure keras backend
        unstacked_boxes_pred = []
        unstacked_boxes_input = []

        for i in range(4):
            unstacked_boxes_pred.append(det_boxes[:, :, i])
            unstacked_boxes_input.append(box_input[:, :, i])



        #compute the ious
        ious = utils.tensor_iou(utils.bbox_transform(unstacked_boxes_pred),
                                utils.bbox_transform(unstacked_boxes_input),
                                input_mask,
                                mc)


        # cross-entropy: q * -log(p) + (1-q) * -log(1-p)
        # add a small value into log to prevent blowing up


        #compute class loss
        class_loss = K.sum(labels * (-K.log(pred_class_probs + mc.EPSILON))
                 + (1 - labels) * (-K.log(1 - pred_class_probs + mc.EPSILON))
                * input_mask * mc.LOSS_COEF_CLASS) / num_objects



        #bounding box loss
        bbox_loss = (K.sum(mc.LOSS_COEF_BBOX * K.square(input_mask * (pred_box_delta - box_delta_input))) / num_objects)

        #reshape input for correct broadcasting
        input_mask = K.reshape(input_mask, [mc.BATCH_SIZE, mc.ANCHORS])

        #confidence score loss
        conf_loss = K.mean(
            K.sum(
                K.square((ious - pred_conf))
                * (input_mask * mc.LOSS_COEF_CONF_POS / num_objects
                   + (1 - input_mask) * mc.LOSS_COEF_CONF_NEG / (mc.ANCHORS - num_objects)),
                axis=[1]
            ),
        )

        # add above losses 
        total_loss = class_loss + conf_loss + bbox_loss

        return total_loss