def parse_annotation_jpeg(annotation_path, jpeg_path, gs):
    """
    获取正负样本(注:忽略属性difficult为True的标注边界框)
    正样本:候选建议与标注边界框IoU大于等于0.5
    负样本:IoU大于0,小于0.5。为了进一步限制负样本数目,其大小必须大于标注框的1/5
    """
    img = cv2.imread(jpeg_path)

    selectivesearch.config(gs, img, strategy='q')
    # 计算候选建议
    rects = selectivesearch.get_rects(gs)
    # 获取标注边界框
    bndboxs = parse_xml(annotation_path)

    # 标注框大小
    maximum_bndbox_size = 0
    for bndbox in bndboxs:
        xmin, ymin, xmax, ymax = bndbox
        bndbox_size = (ymax - ymin) * (xmax - xmin)
        if bndbox_size > maximum_bndbox_size:
            maximum_bndbox_size = bndbox_size

    # 获取候选建议和标注边界框的IoU
    iou_list = compute_ious(rects, bndboxs)

    positive_list = list()
    negative_list = list()
    for i in range(len(iou_list)):
        xmin, ymin, xmax, ymax = rects[i]
        rect_size = (ymax - ymin) * (xmax - xmin)

        iou_score = iou_list[i]
        if iou_list[i] >= 0.5:
            # 正样本
            positive_list.append(rects[i])
        if 0 < iou_list[i] < 0.5 and rect_size > maximum_bndbox_size / 5.0:
            # 负样本
            negative_list.append(rects[i])
        else:
            pass

    return positive_list, negative_list
Esempio n. 2
0
def parse_annotation_jpeg(annotation_path, jpeg_path, gs):
    """
    获取正负样本(注:忽略属性difficult为True的标注边界框)
    正样本:候选建议与标注边界框IoU大于等于0.5 + 标注边界框
    负样本:IoU大于0.1,小于0.5
    """
    img = cv2.imread(jpeg_path)

    selectivesearch.config(gs, img, strategy='q')
    # 计算候选建议
    rects = selectivesearch.get_rects(gs)
    # 获取标注边界框
    bndboxs = parse_xml(annotation_path)

    # 获取候选建议和标注边界框的IoU
    iou_list = compute_ious(rects, bndboxs)

    positive_list = list()
    negative_list = list()
    for i in range(len(iou_list)):
        xmin, ymin, xmax, ymax = rects[i]
        rect_size = (ymax - ymin) * (xmax - xmin)

        iou_score = iou_list[i]
        if iou_score >= 0.5:
            # 正样本
            positive_list.append(rects[i])
        if 0.1 <= iou_score < 0.5:
            # 负样本
            negative_list.append(rects[i])
        else:
            pass

    # 添加标注边界框到正样本列表
    positive_list.extend(bndboxs)

    return positive_list, negative_list
Esempio n. 3
0
    def _anchors_target_level(self, anchors_level, gt_boxes, num_classes):

        gt_boxes = tf.cast(gt_boxes, tf.float32)

        anchors_shape = tf.shape(anchors_level)
        height, width, num_anchors, _ = anchors_shape
        labels = tf.zeros(shape=(height, width, num_anchors, num_classes))

        # 1.discard invalid gt boxes.
        valid_gt = trim_zeros(gt_boxes)

        if len(valid_gt) == 0:
            return labels, tf.zeros_like(anchors_level), \
                   tf.zeros(shape=(height, width, num_anchors)), tf.zeros(shape=(height, width, num_anchors))

        ious = compute_ious(anchors_level, valid_gt[..., 0:4])
        gt_class = tf.cast(valid_gt[..., 4], tf.int64)

        # (h, w, 9)
        ious_max = tf.reduce_max(ious, axis=-1)
        ious_argmax = tf.argmax(ious, axis=-1)

        # 2.if max iou > positive threshold, anchors are assigned to ground truth.
        # (num_pos, 3)
        pos_index = tf.where(ious_max >= 0.5)
        class_id = tf.gather(gt_class, tf.gather_nd(ious_argmax, pos_index))
        class_id = tf.expand_dims(class_id, axis=-1)
        positive_class_index = tf.concat([pos_index, class_id], axis=-1)

        num_positive = tf.shape(positive_class_index)[0]

        labels = tf.tensor_scatter_nd_update(labels, positive_class_index,
                                             tf.ones(shape=(num_positive, )))

        # 3.if max iou < negative thredhold, anchors are assigned to background.
        neg_index = tf.where(ious_max < 0.4)

        # 4.transform boxes to delta
        ious_argmax = tf.reshape(ious_argmax, (height * width * num_anchors, ))
        valid_gt_box = valid_gt[..., 0:4]
        ious_argmax_box = tf.gather(valid_gt_box, ious_argmax)
        ious_argmax_box = tf.reshape(ious_argmax_box,
                                     (height, width, num_anchors, 4))
        delta = box2delta(anchors_level, ious_argmax_box, self.target_means,
                          self.target_stds)

        # 5.create label weights and box weights
        label_weights = tf.zeros(shape=(height, width, num_anchors))
        box_weights = tf.zeros(shape=(height, width, num_anchors))

        label_weights = tf.tensor_scatter_nd_update(
            label_weights, pos_index, tf.ones(shape=(num_positive, )))

        num_negative = tf.shape(neg_index)[0]
        label_weights = tf.tensor_scatter_nd_update(
            label_weights, neg_index, tf.ones(shape=(num_negative, )))

        box_weights = tf.tensor_scatter_nd_update(
            box_weights, pos_index, tf.ones(shape=(num_positive, )))

        return labels, delta, label_weights, box_weights
Esempio n. 4
0
    def compute_loss(self, x, y, y_hat, step) -> (torch.Tensor, defaultdict, defaultdict):
        loss_config = self.config['loss']

        total_losses = []
        scalar_summaries = defaultdict(float)
        list_summaries = defaultdict(list)
        batch_size = self.config['batch_size'] \
            if self.training else self.config['eval_batch_size']

        for batch_idx in range(batch_size):
            # fix scene
            scene_idxs = (x.C[:, 0] == batch_idx) \
                .nonzero().squeeze(dim=1)
            if scene_idxs.shape[0] <= 0:
                continue

            # unravel objects
            embs = y_hat[scene_idxs]
            # only backgrounds
            gt_insts = y[scene_idxs, 1]
            if gt_insts.sum() <= 0.:
                continue
            num_insts = gt_insts.max()

            inst_emb_means = []
            inst_idxs = []
            inter_losses = []
            dist2mean = []  # compute average distance to instance mean

            for inst in range(1, num_insts + 1):
                # fix instance in data
                single_inst_idxs = (gt_insts == inst)
                # no instance
                if single_inst_idxs.sum() == 0:
                    continue
                inst_idxs.append(single_inst_idxs)
                inst_embs = embs[single_inst_idxs]  # Tensor of N x D
                inst_emb_mean = inst_embs.mean(dim=0)
                inst_emb_means.append(inst_emb_mean)

                # compute inter_loss
                inst_dists = torch.norm(
                    inst_embs - inst_emb_mean.unsqueeze(dim=0),
                    dim=1
                )  # Tensor of N
                inter_losses.append(torch.relu(inst_dists - loss_config['delta_inter']).mean())
                dist2mean.append(inst_dists)

            # inter loss
            inter_losses = torch.stack(inter_losses)
            num_inst_points = torch.tensor([x.shape[0] for x in inst_idxs])

            # weight loss by p, s.t. 0 <= p <= 1
            # if p == 0, equal weighting to each instance
            # if p == 1, equal weighting to per point
            # exclude bg from inter losses
            inter_loss_weight = num_inst_points.float() \
                .pow(loss_config['inter_chill']).to(self.device)
            inter_loss_weight = inter_loss_weight / inter_loss_weight.sum()
            inter_loss = torch.dot(inter_losses, inter_loss_weight)

            # intra_loss
            inst_emb_means = torch.stack(inst_emb_means, dim=0)
            pair_dist_mean = pairwise_distance(inst_emb_means)
            pair_dist_mean = torch.sqrt(torch.relu(pair_dist_mean))  # relu assures positiveness
            henge_dist_pair = torch.relu(2 * loss_config['delta_intra'] - pair_dist_mean)
            intra_loss = henge_dist_pair.sum() - torch.diag(henge_dist_pair).sum()

            # delete for memory efficiency
            del pair_dist_mean
            del henge_dist_pair

            # if background alone or one instance
            intra_loss /= 2 * num_insts * (num_insts - 1)
            if num_insts <= 1:
                intra_loss = torch.tensor(0.).to(self.device)

            # reg_loss
            reg_loss = torch.norm(inst_emb_means, dim=1).mean()

            # sum all the losses
            eff_inter_loss = loss_config['gamma_inter'] * inter_loss
            eff_intra_loss = loss_config['gamma_intra'] * intra_loss
            eff_reg_loss = loss_config['gamma_reg'] * reg_loss
            total_loss = eff_inter_loss + eff_intra_loss + eff_reg_loss
            total_losses.append(total_loss)

            if torch.isnan(total_loss):
                __import__('pdb').set_trace()

            # add losses to summaries
            mode = 'train' if self.training else 'val'
            loss_prefix = 'loss/{}/'.format(mode)
            raw_prefix = loss_prefix + 'raw/'
            eff_prefix = loss_prefix + 'eff/'
            ratio_prefix = loss_prefix + 'ratio/'
            iou_prefix = 'iou/{}/'.format(mode)
            dist_prefix = 'dist/{}/'.format(mode)

            # add total loss
            scalar_summaries[loss_prefix + 'total'] += total_loss.item()

            # add raw loss
            scalar_summaries[raw_prefix + 'inter_loss'] += inter_loss.item()
            scalar_summaries[raw_prefix + 'intra_loss'] += intra_loss.item()
            scalar_summaries[raw_prefix + 'reg_loss'] += reg_loss.item()

            # add eff loss
            scalar_summaries[eff_prefix + 'inter_loss'] += eff_inter_loss.item()
            scalar_summaries[eff_prefix + 'intra_loss'] += eff_intra_loss.item()
            scalar_summaries[eff_prefix + 'reg_loss'] += eff_reg_loss.item()

            # add loss ratio
            if total_loss.item() != 0:
                scalar_summaries[ratio_prefix + 'inter_loss'] \
                    += eff_inter_loss.item() / total_loss.item()
                scalar_summaries[ratio_prefix + 'intra_loss'] \
                    += eff_intra_loss.item() / total_loss.item()
                scalar_summaries[ratio_prefix + 'reg_loss'] \
                    += eff_reg_loss.item() / total_loss.item()

            # add dist2mean
            dist2mean = torch.cat(dist2mean)  # Tensor of shape N
            scalar_summaries[dist_prefix + 'dist_to_mean'] \
                += dist2mean.mean().item()  # without bg
            eff_dist2mean = dist2mean.clone()
            eff_dist2mean = eff_dist2mean[dist2mean > loss_config['delta_inter']]
            scalar_summaries[dist_prefix + 'eff_dist_to_mean'] \
                += eff_dist2mean.mean().item()

            # bg dist2_emb_mean
            bg_embs = embs[gt_insts == 0]
            if bg_embs.nelement() != 0:
                bg_dist2emb_mean = pairwise_distance(bg_embs, inst_emb_means)
                bg_dist2emb_mean = bg_dist2emb_mean.min(dim=1).values  # bg to nearest embedding mean
                scalar_summaries[dist_prefix + 'bg_dist_to_emb_mean'] \
                    += bg_dist2emb_mean.mean().item()
                list_summaries[dist_prefix + 'bg_dist_to_emb_mean_hist'] += bg_dist2emb_mean.cpu().tolist()

            if (not self.training) or (self.training and ((step + 1) % self.config['summary_step'] == 0)):
                list_summaries[loss_prefix + 'inter_loss_weight'] += inter_loss_weight.tolist()

                # use ground truth mean for debugging
                if num_insts > 1:
                    inst_mean_seeds = [(inst_emb_means[i], i) for i in range(inst_emb_means.shape[0])]
                    ious = compute_ious(inst_mean_seeds, embs, inst_idxs, self.config['emb_thres'])
                    scalar_summaries[iou_prefix + 'mean_sample/mean'] += sum(ious) / float(len(ious))
                    scalar_summaries[iou_prefix + 'mean_sample/max'] += max(ious)
                    scalar_summaries[iou_prefix + 'mean_sample/min'] += min(ious)
                    list_summaries[iou_prefix + 'mean_sample'] += ious

        scalar_summaries = {k: (float(v) / batch_size) for (k, v) in scalar_summaries.items()}
        loss = torch.stack(total_losses).mean()
        return loss, scalar_summaries, list_summaries